@jocmp/mercury-parser 3.0.8 → 3.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/generate-custom-parser.js +730 -618
- package/dist/generate-custom-parser.js.map +1 -1
- package/dist/mercury.js +540 -370
- package/dist/mercury.js.map +1 -1
- package/dist/mercury.web.js +2 -2
- package/dist/mercury.web.js.map +1 -1
- package/package.json +7 -12
|
@@ -23,7 +23,6 @@ var _Array$from = require('@babel/runtime-corejs2/core-js/array/from');
|
|
|
23
23
|
var _Symbol = require('@babel/runtime-corejs2/core-js/symbol');
|
|
24
24
|
var _Symbol$iterator = require('@babel/runtime-corejs2/core-js/symbol/iterator');
|
|
25
25
|
var _Array$isArray = require('@babel/runtime-corejs2/core-js/array/is-array');
|
|
26
|
-
var _typeof = require('@babel/runtime-corejs2/helpers/typeof');
|
|
27
26
|
var _Object$create = require('@babel/runtime-corejs2/core-js/object/create');
|
|
28
27
|
var _Object$freeze = require('@babel/runtime-corejs2/core-js/object/freeze');
|
|
29
28
|
var require$$7 = require('@babel/runtime-corejs2/helpers/objectWithoutProperties');
|
|
@@ -44,51 +43,6 @@ var require$$33 = require('wuzzy');
|
|
|
44
43
|
var require$$34 = require('difflib');
|
|
45
44
|
var _taggedTemplateLiteral = require('@babel/runtime-corejs2/helpers/taggedTemplateLiteral');
|
|
46
45
|
|
|
47
|
-
function _interopDefaultLegacy (e) { return e && typeof e === 'object' && 'default' in e ? e : { 'default': e }; }
|
|
48
|
-
|
|
49
|
-
var _slicedToArray__default = /*#__PURE__*/_interopDefaultLegacy(_slicedToArray);
|
|
50
|
-
var _toConsumableArray__default = /*#__PURE__*/_interopDefaultLegacy(_toConsumableArray);
|
|
51
|
-
var fs__default = /*#__PURE__*/_interopDefaultLegacy(fs);
|
|
52
|
-
var inquirer__default = /*#__PURE__*/_interopDefaultLegacy(inquirer);
|
|
53
|
-
var ora__default = /*#__PURE__*/_interopDefaultLegacy(ora);
|
|
54
|
-
var _Reflect$ownKeys__default = /*#__PURE__*/_interopDefaultLegacy(_Reflect$ownKeys);
|
|
55
|
-
var _parseInt__default = /*#__PURE__*/_interopDefaultLegacy(_parseInt);
|
|
56
|
-
var URL__default = /*#__PURE__*/_interopDefaultLegacy(URL$1);
|
|
57
|
-
var _Object$keys__default = /*#__PURE__*/_interopDefaultLegacy(_Object$keys);
|
|
58
|
-
var _Object$getOwnPropertySymbols__default = /*#__PURE__*/_interopDefaultLegacy(_Object$getOwnPropertySymbols);
|
|
59
|
-
var _Object$getOwnPropertyDescriptor__default = /*#__PURE__*/_interopDefaultLegacy(_Object$getOwnPropertyDescriptor);
|
|
60
|
-
var _Object$getOwnPropertyDescriptors__default = /*#__PURE__*/_interopDefaultLegacy(_Object$getOwnPropertyDescriptors);
|
|
61
|
-
var _Object$defineProperties__default = /*#__PURE__*/_interopDefaultLegacy(_Object$defineProperties);
|
|
62
|
-
var _Object$defineProperty__default = /*#__PURE__*/_interopDefaultLegacy(_Object$defineProperty);
|
|
63
|
-
var _defineProperty__default = /*#__PURE__*/_interopDefaultLegacy(_defineProperty);
|
|
64
|
-
var _parseFloat__default = /*#__PURE__*/_interopDefaultLegacy(_parseFloat);
|
|
65
|
-
var iconv__default = /*#__PURE__*/_interopDefaultLegacy(iconv);
|
|
66
|
-
var _Set__default = /*#__PURE__*/_interopDefaultLegacy(_Set);
|
|
67
|
-
var _Array$from__default = /*#__PURE__*/_interopDefaultLegacy(_Array$from);
|
|
68
|
-
var _Symbol__default = /*#__PURE__*/_interopDefaultLegacy(_Symbol);
|
|
69
|
-
var _Symbol$iterator__default = /*#__PURE__*/_interopDefaultLegacy(_Symbol$iterator);
|
|
70
|
-
var _Array$isArray__default = /*#__PURE__*/_interopDefaultLegacy(_Array$isArray);
|
|
71
|
-
var _typeof__default = /*#__PURE__*/_interopDefaultLegacy(_typeof);
|
|
72
|
-
var _Object$create__default = /*#__PURE__*/_interopDefaultLegacy(_Object$create);
|
|
73
|
-
var _Object$freeze__default = /*#__PURE__*/_interopDefaultLegacy(_Object$freeze);
|
|
74
|
-
var require$$7__default = /*#__PURE__*/_interopDefaultLegacy(require$$7);
|
|
75
|
-
var require$$8__default = /*#__PURE__*/_interopDefaultLegacy(require$$8);
|
|
76
|
-
var require$$9__default = /*#__PURE__*/_interopDefaultLegacy(require$$9);
|
|
77
|
-
var require$$11__default = /*#__PURE__*/_interopDefaultLegacy(require$$11);
|
|
78
|
-
var require$$12__default = /*#__PURE__*/_interopDefaultLegacy(require$$12);
|
|
79
|
-
var require$$16__default = /*#__PURE__*/_interopDefaultLegacy(require$$16);
|
|
80
|
-
var require$$17__default = /*#__PURE__*/_interopDefaultLegacy(require$$17);
|
|
81
|
-
var require$$26__default = /*#__PURE__*/_interopDefaultLegacy(require$$26);
|
|
82
|
-
var require$$27__default = /*#__PURE__*/_interopDefaultLegacy(require$$27);
|
|
83
|
-
var require$$28__default = /*#__PURE__*/_interopDefaultLegacy(require$$28);
|
|
84
|
-
var require$$29__default = /*#__PURE__*/_interopDefaultLegacy(require$$29);
|
|
85
|
-
var require$$30__default = /*#__PURE__*/_interopDefaultLegacy(require$$30);
|
|
86
|
-
var require$$31__default = /*#__PURE__*/_interopDefaultLegacy(require$$31);
|
|
87
|
-
var require$$32__default = /*#__PURE__*/_interopDefaultLegacy(require$$32);
|
|
88
|
-
var require$$33__default = /*#__PURE__*/_interopDefaultLegacy(require$$33);
|
|
89
|
-
var require$$34__default = /*#__PURE__*/_interopDefaultLegacy(require$$34);
|
|
90
|
-
var _taggedTemplateLiteral__default = /*#__PURE__*/_interopDefaultLegacy(_taggedTemplateLiteral);
|
|
91
|
-
|
|
92
46
|
// Spacer images to be removed
|
|
93
47
|
|
|
94
48
|
// The class we will use to mark elements we want to keep
|
|
@@ -102,7 +56,7 @@ function getAttrs(node) {
|
|
|
102
56
|
var attribs = node.attribs,
|
|
103
57
|
attributes = node.attributes;
|
|
104
58
|
if (!attribs && attributes) {
|
|
105
|
-
var attrs = _Reflect$
|
|
59
|
+
var attrs = _Reflect$ownKeys(attributes).reduce(function (acc, index) {
|
|
106
60
|
var attr = attributes[index];
|
|
107
61
|
|
|
108
62
|
// In browser, Reflect.ownKeys includes non-numeric keys like 'length', 'item', etc.
|
|
@@ -142,7 +96,7 @@ function absolutize($, rootUrl, attr) {
|
|
|
142
96
|
var attrs = getAttrs(node);
|
|
143
97
|
var url = attrs[attr];
|
|
144
98
|
if (!url) return;
|
|
145
|
-
var absoluteUrl =
|
|
99
|
+
var absoluteUrl = URL$1.resolve(baseUrl || rootUrl, url);
|
|
146
100
|
setAttr(node, attr, absoluteUrl);
|
|
147
101
|
});
|
|
148
102
|
}
|
|
@@ -160,10 +114,10 @@ function absolutizeSet($, rootUrl, $content) {
|
|
|
160
114
|
// a candidate URL cannot start or end with a comma
|
|
161
115
|
// descriptors are separated from the URLs by unescaped whitespace
|
|
162
116
|
var parts = candidate.trim().replace(/,$/, '').split(/\s+/);
|
|
163
|
-
parts[0] =
|
|
117
|
+
parts[0] = URL$1.resolve(rootUrl, parts[0]);
|
|
164
118
|
return parts.join(' ');
|
|
165
119
|
});
|
|
166
|
-
var absoluteUrlSet =
|
|
120
|
+
var absoluteUrlSet = _toConsumableArray(new _Set(absoluteCandidates)).join(', ');
|
|
167
121
|
setAttr(node, 'srcset', absoluteUrlSet);
|
|
168
122
|
}
|
|
169
123
|
});
|
|
@@ -185,54 +139,48 @@ var hasRequiredMercury;
|
|
|
185
139
|
function requireMercury() {
|
|
186
140
|
if (hasRequiredMercury) return mercury;
|
|
187
141
|
hasRequiredMercury = 1;
|
|
188
|
-
var _Object$keys = _Object$
|
|
189
|
-
var _Object$getOwnPropertySymbols = _Object$
|
|
190
|
-
var _Object$getOwnPropertyDescriptor = _Object$
|
|
191
|
-
var _Object$getOwnPropertyDescriptors = _Object$
|
|
192
|
-
var _Object$defineProperties = _Object$
|
|
193
|
-
var _Object$defineProperty = _Object$
|
|
194
|
-
var _defineProperty =
|
|
195
|
-
var _objectWithoutProperties = require$$
|
|
196
|
-
var _asyncToGenerator = require$$
|
|
197
|
-
var _regeneratorRuntime = require$$
|
|
198
|
-
var URL$1 =
|
|
199
|
-
var TurndownService = require$$
|
|
200
|
-
var cheerio = require$$
|
|
201
|
-
var iconv =
|
|
202
|
-
var _parseInt =
|
|
203
|
-
var _slicedToArray =
|
|
204
|
-
var _Promise = require$$
|
|
205
|
-
var request = require$$
|
|
206
|
-
var _Reflect$ownKeys = _Reflect$
|
|
207
|
-
var _toConsumableArray =
|
|
208
|
-
var _parseFloat =
|
|
209
|
-
var _Set =
|
|
210
|
-
var _Array$from = _Array$
|
|
211
|
-
var _Symbol =
|
|
212
|
-
var _Symbol$iterator = _Symbol$
|
|
213
|
-
var _Array$isArray = _Array$
|
|
214
|
-
var _Object$assign = require$$
|
|
215
|
-
var stringDirection = require$$
|
|
216
|
-
var _Number$isNaN = require$$
|
|
217
|
-
var dayjs = require$$
|
|
218
|
-
var utc = require$$
|
|
219
|
-
var timezonePlugin = require$$
|
|
220
|
-
var customParseFormat = require$$
|
|
221
|
-
var wuzzy = require$$
|
|
222
|
-
var difflib = require$$
|
|
223
|
-
function
|
|
224
|
-
|
|
225
|
-
'default': e
|
|
226
|
-
};
|
|
227
|
-
}
|
|
228
|
-
function _interopNamespace(e) {
|
|
229
|
-
if (e && e.__esModule) return e;
|
|
230
|
-
var n = _Object$create__default["default"](null);
|
|
142
|
+
var _Object$keys$1 = _Object$keys;
|
|
143
|
+
var _Object$getOwnPropertySymbols$1 = _Object$getOwnPropertySymbols;
|
|
144
|
+
var _Object$getOwnPropertyDescriptor$1 = _Object$getOwnPropertyDescriptor;
|
|
145
|
+
var _Object$getOwnPropertyDescriptors$1 = _Object$getOwnPropertyDescriptors;
|
|
146
|
+
var _Object$defineProperties$1 = _Object$defineProperties;
|
|
147
|
+
var _Object$defineProperty$1 = _Object$defineProperty;
|
|
148
|
+
var _defineProperty$1 = _defineProperty;
|
|
149
|
+
var _objectWithoutProperties = require$$7;
|
|
150
|
+
var _asyncToGenerator = require$$8;
|
|
151
|
+
var _regeneratorRuntime = require$$9;
|
|
152
|
+
var URL$1$1 = URL$1;
|
|
153
|
+
var TurndownService = require$$11;
|
|
154
|
+
var cheerio = require$$12;
|
|
155
|
+
var iconv$1 = iconv;
|
|
156
|
+
var _parseInt$1 = _parseInt;
|
|
157
|
+
var _slicedToArray$1 = _slicedToArray;
|
|
158
|
+
var _Promise = require$$16;
|
|
159
|
+
var request = require$$17;
|
|
160
|
+
var _Reflect$ownKeys$1 = _Reflect$ownKeys;
|
|
161
|
+
var _toConsumableArray$1 = _toConsumableArray;
|
|
162
|
+
var _parseFloat$1 = _parseFloat;
|
|
163
|
+
var _Set$1 = _Set;
|
|
164
|
+
var _Array$from$1 = _Array$from;
|
|
165
|
+
var _Symbol$1 = _Symbol;
|
|
166
|
+
var _Symbol$iterator$1 = _Symbol$iterator;
|
|
167
|
+
var _Array$isArray$1 = _Array$isArray;
|
|
168
|
+
var _Object$assign = require$$26;
|
|
169
|
+
var stringDirection = require$$27;
|
|
170
|
+
var _Number$isNaN = require$$28;
|
|
171
|
+
var dayjs = require$$29;
|
|
172
|
+
var utc = require$$30;
|
|
173
|
+
var timezonePlugin = require$$31;
|
|
174
|
+
var customParseFormat = require$$32;
|
|
175
|
+
var wuzzy = require$$33;
|
|
176
|
+
var difflib = require$$34;
|
|
177
|
+
function _interopNamespaceDefault(e) {
|
|
178
|
+
var n = _Object$create(null);
|
|
231
179
|
if (e) {
|
|
232
|
-
_Object$
|
|
180
|
+
_Object$keys(e).forEach(function (k) {
|
|
233
181
|
if (k !== 'default') {
|
|
234
|
-
var d = _Object$
|
|
235
|
-
_Object$
|
|
182
|
+
var d = _Object$getOwnPropertyDescriptor(e, k);
|
|
183
|
+
_Object$defineProperty(n, k, d.get ? d : {
|
|
236
184
|
enumerable: true,
|
|
237
185
|
get: function get() {
|
|
238
186
|
return e[k];
|
|
@@ -242,43 +190,9 @@ function requireMercury() {
|
|
|
242
190
|
});
|
|
243
191
|
}
|
|
244
192
|
n["default"] = e;
|
|
245
|
-
return _Object$
|
|
246
|
-
}
|
|
247
|
-
var
|
|
248
|
-
var _Object$getOwnPropertySymbols__default$1 = /*#__PURE__*/_interopDefaultLegacy(_Object$getOwnPropertySymbols);
|
|
249
|
-
var _Object$getOwnPropertyDescriptor__default$1 = /*#__PURE__*/_interopDefaultLegacy(_Object$getOwnPropertyDescriptor);
|
|
250
|
-
var _Object$getOwnPropertyDescriptors__default$1 = /*#__PURE__*/_interopDefaultLegacy(_Object$getOwnPropertyDescriptors);
|
|
251
|
-
var _Object$defineProperties__default$1 = /*#__PURE__*/_interopDefaultLegacy(_Object$defineProperties);
|
|
252
|
-
var _Object$defineProperty__default$1 = /*#__PURE__*/_interopDefaultLegacy(_Object$defineProperty);
|
|
253
|
-
var _defineProperty__default$1 = /*#__PURE__*/_interopDefaultLegacy(_defineProperty);
|
|
254
|
-
var _objectWithoutProperties__default = /*#__PURE__*/_interopDefaultLegacy(_objectWithoutProperties);
|
|
255
|
-
var _asyncToGenerator__default = /*#__PURE__*/_interopDefaultLegacy(_asyncToGenerator);
|
|
256
|
-
var _regeneratorRuntime__default = /*#__PURE__*/_interopDefaultLegacy(_regeneratorRuntime);
|
|
257
|
-
var URL__default$1 = /*#__PURE__*/_interopDefaultLegacy(URL$1);
|
|
258
|
-
var TurndownService__default = /*#__PURE__*/_interopDefaultLegacy(TurndownService);
|
|
259
|
-
var cheerio__namespace = /*#__PURE__*/_interopNamespace(cheerio);
|
|
260
|
-
var iconv__default$1 = /*#__PURE__*/_interopDefaultLegacy(iconv);
|
|
261
|
-
var _parseInt__default$1 = /*#__PURE__*/_interopDefaultLegacy(_parseInt);
|
|
262
|
-
var _slicedToArray__default$1 = /*#__PURE__*/_interopDefaultLegacy(_slicedToArray);
|
|
263
|
-
var _Promise__default = /*#__PURE__*/_interopDefaultLegacy(_Promise);
|
|
264
|
-
var request__default = /*#__PURE__*/_interopDefaultLegacy(request);
|
|
265
|
-
var _Reflect$ownKeys__default$1 = /*#__PURE__*/_interopDefaultLegacy(_Reflect$ownKeys);
|
|
266
|
-
var _toConsumableArray__default$1 = /*#__PURE__*/_interopDefaultLegacy(_toConsumableArray);
|
|
267
|
-
var _parseFloat__default$1 = /*#__PURE__*/_interopDefaultLegacy(_parseFloat);
|
|
268
|
-
var _Set__default$1 = /*#__PURE__*/_interopDefaultLegacy(_Set);
|
|
269
|
-
var _Array$from__default$1 = /*#__PURE__*/_interopDefaultLegacy(_Array$from);
|
|
270
|
-
var _Symbol__default$1 = /*#__PURE__*/_interopDefaultLegacy(_Symbol);
|
|
271
|
-
var _Symbol$iterator__default$1 = /*#__PURE__*/_interopDefaultLegacy(_Symbol$iterator);
|
|
272
|
-
var _Array$isArray__default$1 = /*#__PURE__*/_interopDefaultLegacy(_Array$isArray);
|
|
273
|
-
var _Object$assign__default = /*#__PURE__*/_interopDefaultLegacy(_Object$assign);
|
|
274
|
-
var stringDirection__default = /*#__PURE__*/_interopDefaultLegacy(stringDirection);
|
|
275
|
-
var _Number$isNaN__default = /*#__PURE__*/_interopDefaultLegacy(_Number$isNaN);
|
|
276
|
-
var dayjs__default = /*#__PURE__*/_interopDefaultLegacy(dayjs);
|
|
277
|
-
var utc__default = /*#__PURE__*/_interopDefaultLegacy(utc);
|
|
278
|
-
var timezonePlugin__default = /*#__PURE__*/_interopDefaultLegacy(timezonePlugin);
|
|
279
|
-
var customParseFormat__default = /*#__PURE__*/_interopDefaultLegacy(customParseFormat);
|
|
280
|
-
var wuzzy__default = /*#__PURE__*/_interopDefaultLegacy(wuzzy);
|
|
281
|
-
var difflib__default = /*#__PURE__*/_interopDefaultLegacy(difflib);
|
|
193
|
+
return _Object$freeze(n);
|
|
194
|
+
}
|
|
195
|
+
var cheerio__namespace = /*#__PURE__*/_interopNamespaceDefault(cheerio);
|
|
282
196
|
var NORMALIZE_RE = /\s{2,}(?![^<>]*<\/(pre|code|textarea)>)/g;
|
|
283
197
|
function normalizeSpaces(text) {
|
|
284
198
|
return text.replace(NORMALIZE_RE, ' ').trim();
|
|
@@ -324,7 +238,7 @@ function requireMercury() {
|
|
|
324
238
|
function pageNumFromUrl(url) {
|
|
325
239
|
var matches = url.match(PAGE_IN_HREF_RE);
|
|
326
240
|
if (!matches) return null;
|
|
327
|
-
var pageNum =
|
|
241
|
+
var pageNum = _parseInt$1(matches[6], 10);
|
|
328
242
|
|
|
329
243
|
// Return pageNum < 100, otherwise
|
|
330
244
|
// return null
|
|
@@ -360,7 +274,7 @@ function requireMercury() {
|
|
|
360
274
|
// pagination data exists in it. Useful for comparing to other links
|
|
361
275
|
// that might have pagination data within them.
|
|
362
276
|
function articleBaseUrl(url, parsed) {
|
|
363
|
-
var parsedUrl = parsed ||
|
|
277
|
+
var parsedUrl = parsed || URL$1$1.parse(url);
|
|
364
278
|
var protocol = parsedUrl.protocol,
|
|
365
279
|
host = parsedUrl.host,
|
|
366
280
|
path = parsedUrl.path;
|
|
@@ -371,7 +285,7 @@ function requireMercury() {
|
|
|
371
285
|
// Split off and save anything that looks like a file type.
|
|
372
286
|
if (segment.includes('.')) {
|
|
373
287
|
var _segment$split = segment.split('.'),
|
|
374
|
-
_segment$split2 =
|
|
288
|
+
_segment$split2 = _slicedToArray$1(_segment$split, 2),
|
|
375
289
|
possibleSegment = _segment$split2[0],
|
|
376
290
|
fileExt = _segment$split2[1];
|
|
377
291
|
if (IS_ALPHA_RE.test(fileExt)) {
|
|
@@ -420,10 +334,10 @@ function requireMercury() {
|
|
|
420
334
|
var encoding = DEFAULT_ENCODING;
|
|
421
335
|
var matches = ENCODING_RE.exec(str);
|
|
422
336
|
if (matches !== null) {
|
|
423
|
-
var _matches =
|
|
337
|
+
var _matches = _slicedToArray$1(matches, 2);
|
|
424
338
|
str = _matches[1];
|
|
425
339
|
}
|
|
426
|
-
if (
|
|
340
|
+
if (iconv$1.encodingExists(str)) {
|
|
427
341
|
encoding = str;
|
|
428
342
|
}
|
|
429
343
|
return encoding;
|
|
@@ -449,11 +363,11 @@ function requireMercury() {
|
|
|
449
363
|
// for us to attempt parsing. Defaults to 5 MB.
|
|
450
364
|
var MAX_CONTENT_LENGTH = 5242880;
|
|
451
365
|
function ownKeys$h(e, r) {
|
|
452
|
-
var t = _Object$
|
|
453
|
-
if (_Object$
|
|
454
|
-
var o = _Object$
|
|
366
|
+
var t = _Object$keys$1(e);
|
|
367
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
368
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
455
369
|
r && (o = o.filter(function (r) {
|
|
456
|
-
return _Object$
|
|
370
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
457
371
|
})), t.push.apply(t, o);
|
|
458
372
|
}
|
|
459
373
|
return t;
|
|
@@ -461,17 +375,17 @@ function requireMercury() {
|
|
|
461
375
|
function _objectSpread$h(e) {
|
|
462
376
|
for (var r = 1; r < arguments.length; r++) {
|
|
463
377
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
464
|
-
r % 2 ? ownKeys$h(Object(t),
|
|
465
|
-
|
|
466
|
-
}) : _Object$
|
|
467
|
-
_Object$
|
|
378
|
+
r % 2 ? ownKeys$h(Object(t), true).forEach(function (r) {
|
|
379
|
+
_defineProperty$1(e, r, t[r]);
|
|
380
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$h(Object(t)).forEach(function (r) {
|
|
381
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
468
382
|
});
|
|
469
383
|
}
|
|
470
384
|
return e;
|
|
471
385
|
}
|
|
472
386
|
function get(options) {
|
|
473
|
-
return new
|
|
474
|
-
|
|
387
|
+
return new _Promise(function (resolve, reject) {
|
|
388
|
+
request(options, function (err, response, body) {
|
|
475
389
|
if (err) {
|
|
476
390
|
reject(err);
|
|
477
391
|
} else {
|
|
@@ -530,7 +444,7 @@ function requireMercury() {
|
|
|
530
444
|
return _fetchResource.apply(this, arguments);
|
|
531
445
|
}
|
|
532
446
|
function _fetchResource() {
|
|
533
|
-
_fetchResource =
|
|
447
|
+
_fetchResource = _asyncToGenerator(/*#__PURE__*/_regeneratorRuntime.mark(function _callee(url, parsedUrl) {
|
|
534
448
|
var headers,
|
|
535
449
|
options,
|
|
536
450
|
_yield$get,
|
|
@@ -538,11 +452,11 @@ function requireMercury() {
|
|
|
538
452
|
body,
|
|
539
453
|
_args = arguments,
|
|
540
454
|
_t;
|
|
541
|
-
return
|
|
455
|
+
return _regeneratorRuntime.wrap(function (_context) {
|
|
542
456
|
while (1) switch (_context.prev = _context.next) {
|
|
543
457
|
case 0:
|
|
544
458
|
headers = _args.length > 2 && _args[2] !== undefined ? _args[2] : {};
|
|
545
|
-
parsedUrl = parsedUrl ||
|
|
459
|
+
parsedUrl = parsedUrl || URL$1$1.parse(encodeURI(url));
|
|
546
460
|
options = _objectSpread$h({
|
|
547
461
|
url: parsedUrl.href,
|
|
548
462
|
headers: _objectSpread$h(_objectSpread$h({}, REQUEST_HEADERS), headers),
|
|
@@ -801,7 +715,7 @@ function requireMercury() {
|
|
|
801
715
|
var attribs = node.attribs,
|
|
802
716
|
attributes = node.attributes;
|
|
803
717
|
if (!attribs && attributes) {
|
|
804
|
-
var attrs = _Reflect$
|
|
718
|
+
var attrs = _Reflect$ownKeys$1(attributes).reduce(function (acc, index) {
|
|
805
719
|
var attr = attributes[index];
|
|
806
720
|
|
|
807
721
|
// In browser, Reflect.ownKeys includes non-numeric keys like 'length', 'item', etc.
|
|
@@ -820,7 +734,7 @@ function requireMercury() {
|
|
|
820
734
|
return $;
|
|
821
735
|
}
|
|
822
736
|
var attrs = getAttrs(node) || {};
|
|
823
|
-
var attribString = _Reflect$
|
|
737
|
+
var attribString = _Reflect$ownKeys$1(attrs).map(function (key) {
|
|
824
738
|
return "".concat(key, "=").concat(attrs[key]);
|
|
825
739
|
}).join(' ');
|
|
826
740
|
var html;
|
|
@@ -877,8 +791,8 @@ function requireMercury() {
|
|
|
877
791
|
return $;
|
|
878
792
|
}
|
|
879
793
|
function cleanForHeight($img, $) {
|
|
880
|
-
var height =
|
|
881
|
-
var width =
|
|
794
|
+
var height = _parseInt$1($img.attr('height'), 10);
|
|
795
|
+
var width = _parseInt$1($img.attr('width'), 10) || 20;
|
|
882
796
|
|
|
883
797
|
// Remove images that explicitly have very small heights or
|
|
884
798
|
// widths, because they are most likely shims or icons,
|
|
@@ -916,10 +830,10 @@ function requireMercury() {
|
|
|
916
830
|
tags = KEEP_SELECTORS;
|
|
917
831
|
}
|
|
918
832
|
if (url) {
|
|
919
|
-
var _URL$parse =
|
|
833
|
+
var _URL$parse = URL$1$1.parse(url),
|
|
920
834
|
protocol = _URL$parse.protocol,
|
|
921
835
|
hostname = _URL$parse.hostname;
|
|
922
|
-
tags = [].concat(
|
|
836
|
+
tags = [].concat(_toConsumableArray$1(tags), ["iframe[src^=\"".concat(protocol, "//").concat(hostname, "\"]")]);
|
|
923
837
|
}
|
|
924
838
|
$(tags.join(','), article).addClass(KEEP_CLASS);
|
|
925
839
|
return $;
|
|
@@ -959,18 +873,18 @@ function requireMercury() {
|
|
|
959
873
|
while (node.attributes.length > 0) {
|
|
960
874
|
node.removeAttribute(node.attributes[0].name);
|
|
961
875
|
}
|
|
962
|
-
_Reflect$
|
|
876
|
+
_Reflect$ownKeys$1(attrs).forEach(function (key) {
|
|
963
877
|
node.setAttribute(key, attrs[key]);
|
|
964
878
|
});
|
|
965
879
|
}
|
|
966
880
|
return node;
|
|
967
881
|
}
|
|
968
882
|
function ownKeys$g(e, r) {
|
|
969
|
-
var t = _Object$
|
|
970
|
-
if (_Object$
|
|
971
|
-
var o = _Object$
|
|
883
|
+
var t = _Object$keys$1(e);
|
|
884
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
885
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
972
886
|
r && (o = o.filter(function (r) {
|
|
973
|
-
return _Object$
|
|
887
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
974
888
|
})), t.push.apply(t, o);
|
|
975
889
|
}
|
|
976
890
|
return t;
|
|
@@ -978,10 +892,10 @@ function requireMercury() {
|
|
|
978
892
|
function _objectSpread$g(e) {
|
|
979
893
|
for (var r = 1; r < arguments.length; r++) {
|
|
980
894
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
981
|
-
r % 2 ? ownKeys$g(Object(t),
|
|
982
|
-
|
|
983
|
-
}) : _Object$
|
|
984
|
-
_Object$
|
|
895
|
+
r % 2 ? ownKeys$g(Object(t), true).forEach(function (r) {
|
|
896
|
+
_defineProperty$1(e, r, t[r]);
|
|
897
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$g(Object(t)).forEach(function (r) {
|
|
898
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
985
899
|
});
|
|
986
900
|
}
|
|
987
901
|
return e;
|
|
@@ -989,9 +903,9 @@ function requireMercury() {
|
|
|
989
903
|
function removeAllButWhitelist($article, $) {
|
|
990
904
|
$article.find('*').each(function (index, node) {
|
|
991
905
|
var attrs = getAttrs(node);
|
|
992
|
-
setAttrs(node, _Reflect$
|
|
906
|
+
setAttrs(node, _Reflect$ownKeys$1(attrs).reduce(function (acc, attr) {
|
|
993
907
|
if (WHITELIST_ATTRS_RE.test(attr)) {
|
|
994
|
-
return _objectSpread$g(_objectSpread$g({}, acc), {},
|
|
908
|
+
return _objectSpread$g(_objectSpread$g({}, acc), {}, _defineProperty$1({}, attr, attrs[attr]));
|
|
995
909
|
}
|
|
996
910
|
return acc;
|
|
997
911
|
}, {}));
|
|
@@ -1021,7 +935,7 @@ function requireMercury() {
|
|
|
1021
935
|
// the node's score attribute
|
|
1022
936
|
// returns null if no score set
|
|
1023
937
|
function getScore($node) {
|
|
1024
|
-
return
|
|
938
|
+
return _parseFloat$1($node.attr('score')) || null;
|
|
1025
939
|
}
|
|
1026
940
|
function setScore($node, $, score) {
|
|
1027
941
|
$node.attr('score', score);
|
|
@@ -1203,7 +1117,7 @@ function requireMercury() {
|
|
|
1203
1117
|
return score;
|
|
1204
1118
|
}
|
|
1205
1119
|
|
|
1206
|
-
// eslint-disable-next-line import/no-cycle
|
|
1120
|
+
// eslint-disable-next-line import-x/no-cycle
|
|
1207
1121
|
function addScore($node, $, amount) {
|
|
1208
1122
|
try {
|
|
1209
1123
|
var score = getOrInitScore($node, $) + amount;
|
|
@@ -1214,7 +1128,7 @@ function requireMercury() {
|
|
|
1214
1128
|
return $node;
|
|
1215
1129
|
}
|
|
1216
1130
|
|
|
1217
|
-
// eslint-disable-next-line import/no-cycle
|
|
1131
|
+
// eslint-disable-next-line import-x/no-cycle
|
|
1218
1132
|
|
|
1219
1133
|
// Adds 1/4 of a child's score to its parent
|
|
1220
1134
|
function addToParent(node, $, score) {
|
|
@@ -1406,7 +1320,7 @@ function requireMercury() {
|
|
|
1406
1320
|
var attrs = getAttrs(node);
|
|
1407
1321
|
var url = attrs[attr];
|
|
1408
1322
|
if (!url) return;
|
|
1409
|
-
var absoluteUrl =
|
|
1323
|
+
var absoluteUrl = URL$1$1.resolve(baseUrl || rootUrl, url);
|
|
1410
1324
|
setAttr(node, attr, absoluteUrl);
|
|
1411
1325
|
});
|
|
1412
1326
|
}
|
|
@@ -1424,10 +1338,10 @@ function requireMercury() {
|
|
|
1424
1338
|
// a candidate URL cannot start or end with a comma
|
|
1425
1339
|
// descriptors are separated from the URLs by unescaped whitespace
|
|
1426
1340
|
var parts = candidate.trim().replace(/,$/, '').split(/\s+/);
|
|
1427
|
-
parts[0] =
|
|
1341
|
+
parts[0] = URL$1$1.resolve(rootUrl, parts[0]);
|
|
1428
1342
|
return parts.join(' ');
|
|
1429
1343
|
});
|
|
1430
|
-
var absoluteUrlSet =
|
|
1344
|
+
var absoluteUrlSet = _toConsumableArray$1(new _Set$1(absoluteCandidates)).join(', ');
|
|
1431
1345
|
setAttr(node, 'srcset', absoluteUrlSet);
|
|
1432
1346
|
}
|
|
1433
1347
|
});
|
|
@@ -1448,9 +1362,9 @@ function requireMercury() {
|
|
|
1448
1362
|
return cleanText === '' ? text : cleanText;
|
|
1449
1363
|
}
|
|
1450
1364
|
function _createForOfIteratorHelper$4(r, e) {
|
|
1451
|
-
var t = "undefined" != typeof
|
|
1365
|
+
var t = "undefined" != typeof _Symbol$1 && r[_Symbol$iterator$1] || r["@@iterator"];
|
|
1452
1366
|
if (!t) {
|
|
1453
|
-
if (_Array$
|
|
1367
|
+
if (_Array$isArray$1(r) || (t = _unsupportedIterableToArray$4(r)) || e) {
|
|
1454
1368
|
t && (r = t);
|
|
1455
1369
|
var _n = 0,
|
|
1456
1370
|
F = function F() {};
|
|
@@ -1458,9 +1372,9 @@ function requireMercury() {
|
|
|
1458
1372
|
s: F,
|
|
1459
1373
|
n: function n() {
|
|
1460
1374
|
return _n >= r.length ? {
|
|
1461
|
-
done:
|
|
1375
|
+
done: true
|
|
1462
1376
|
} : {
|
|
1463
|
-
done:
|
|
1377
|
+
done: false,
|
|
1464
1378
|
value: r[_n++]
|
|
1465
1379
|
};
|
|
1466
1380
|
},
|
|
@@ -1473,8 +1387,8 @@ function requireMercury() {
|
|
|
1473
1387
|
throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.");
|
|
1474
1388
|
}
|
|
1475
1389
|
var o,
|
|
1476
|
-
a =
|
|
1477
|
-
u =
|
|
1390
|
+
a = true,
|
|
1391
|
+
u = false;
|
|
1478
1392
|
return {
|
|
1479
1393
|
s: function s() {
|
|
1480
1394
|
t = t.call(r);
|
|
@@ -1484,7 +1398,7 @@ function requireMercury() {
|
|
|
1484
1398
|
return a = r.done, r;
|
|
1485
1399
|
},
|
|
1486
1400
|
e: function e(r) {
|
|
1487
|
-
u =
|
|
1401
|
+
u = true, o = r;
|
|
1488
1402
|
},
|
|
1489
1403
|
f: function f() {
|
|
1490
1404
|
try {
|
|
@@ -1499,7 +1413,7 @@ function requireMercury() {
|
|
|
1499
1413
|
if (r) {
|
|
1500
1414
|
if ("string" == typeof r) return _arrayLikeToArray$4(r, a);
|
|
1501
1415
|
var t = {}.toString.call(r).slice(8, -1);
|
|
1502
|
-
return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? _Array$
|
|
1416
|
+
return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? _Array$from$1(r) : "Arguments" === t || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t) ? _arrayLikeToArray$4(r, a) : void 0;
|
|
1503
1417
|
}
|
|
1504
1418
|
}
|
|
1505
1419
|
function _arrayLikeToArray$4(r, a) {
|
|
@@ -1515,8 +1429,6 @@ function requireMercury() {
|
|
|
1515
1429
|
var foundNames = metaNames.filter(function (name) {
|
|
1516
1430
|
return cachedNames.indexOf(name) !== -1;
|
|
1517
1431
|
});
|
|
1518
|
-
|
|
1519
|
-
// eslint-disable-next-line no-restricted-syntax
|
|
1520
1432
|
var _iterator = _createForOfIteratorHelper$4(foundNames),
|
|
1521
1433
|
_step;
|
|
1522
1434
|
try {
|
|
@@ -1546,7 +1458,7 @@ function requireMercury() {
|
|
|
1546
1458
|
if (cleanTags) {
|
|
1547
1459
|
metaValue = stripTags(values[0], $);
|
|
1548
1460
|
} else {
|
|
1549
|
-
var _values =
|
|
1461
|
+
var _values = _slicedToArray$1(values, 1);
|
|
1550
1462
|
metaValue = _values[0];
|
|
1551
1463
|
}
|
|
1552
1464
|
return {
|
|
@@ -1580,9 +1492,9 @@ function requireMercury() {
|
|
|
1580
1492
|
return commentParent !== undefined;
|
|
1581
1493
|
}
|
|
1582
1494
|
function _createForOfIteratorHelper$3(r, e) {
|
|
1583
|
-
var t = "undefined" != typeof
|
|
1495
|
+
var t = "undefined" != typeof _Symbol$1 && r[_Symbol$iterator$1] || r["@@iterator"];
|
|
1584
1496
|
if (!t) {
|
|
1585
|
-
if (_Array$
|
|
1497
|
+
if (_Array$isArray$1(r) || (t = _unsupportedIterableToArray$3(r)) || e) {
|
|
1586
1498
|
t && (r = t);
|
|
1587
1499
|
var _n = 0,
|
|
1588
1500
|
F = function F() {};
|
|
@@ -1590,9 +1502,9 @@ function requireMercury() {
|
|
|
1590
1502
|
s: F,
|
|
1591
1503
|
n: function n() {
|
|
1592
1504
|
return _n >= r.length ? {
|
|
1593
|
-
done:
|
|
1505
|
+
done: true
|
|
1594
1506
|
} : {
|
|
1595
|
-
done:
|
|
1507
|
+
done: false,
|
|
1596
1508
|
value: r[_n++]
|
|
1597
1509
|
};
|
|
1598
1510
|
},
|
|
@@ -1605,8 +1517,8 @@ function requireMercury() {
|
|
|
1605
1517
|
throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.");
|
|
1606
1518
|
}
|
|
1607
1519
|
var o,
|
|
1608
|
-
a =
|
|
1609
|
-
u =
|
|
1520
|
+
a = true,
|
|
1521
|
+
u = false;
|
|
1610
1522
|
return {
|
|
1611
1523
|
s: function s() {
|
|
1612
1524
|
t = t.call(r);
|
|
@@ -1616,7 +1528,7 @@ function requireMercury() {
|
|
|
1616
1528
|
return a = r.done, r;
|
|
1617
1529
|
},
|
|
1618
1530
|
e: function e(r) {
|
|
1619
|
-
u =
|
|
1531
|
+
u = true, o = r;
|
|
1620
1532
|
},
|
|
1621
1533
|
f: function f() {
|
|
1622
1534
|
try {
|
|
@@ -1631,7 +1543,7 @@ function requireMercury() {
|
|
|
1631
1543
|
if (r) {
|
|
1632
1544
|
if ("string" == typeof r) return _arrayLikeToArray$3(r, a);
|
|
1633
1545
|
var t = {}.toString.call(r).slice(8, -1);
|
|
1634
|
-
return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? _Array$
|
|
1546
|
+
return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? _Array$from$1(r) : "Arguments" === t || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t) ? _arrayLikeToArray$3(r, a) : void 0;
|
|
1635
1547
|
}
|
|
1636
1548
|
}
|
|
1637
1549
|
function _arrayLikeToArray$3(r, a) {
|
|
@@ -1658,7 +1570,6 @@ function requireMercury() {
|
|
|
1658
1570
|
function extractFromSelectors($, selectors) {
|
|
1659
1571
|
var maxChildren = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 1;
|
|
1660
1572
|
var textOnly = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : true;
|
|
1661
|
-
// eslint-disable-next-line no-restricted-syntax
|
|
1662
1573
|
var _iterator = _createForOfIteratorHelper$3(selectors),
|
|
1663
1574
|
_step;
|
|
1664
1575
|
try {
|
|
@@ -1725,7 +1636,7 @@ function requireMercury() {
|
|
|
1725
1636
|
};
|
|
1726
1637
|
$('img').each(function (_, img) {
|
|
1727
1638
|
var attrs = getAttrs(img);
|
|
1728
|
-
_Reflect$
|
|
1639
|
+
_Reflect$ownKeys$1(attrs).forEach(function (attr) {
|
|
1729
1640
|
var value = attrs[attr];
|
|
1730
1641
|
if (attr !== 'srcset' && IS_LINK.test(value) && IS_SRCSET.test(value)) {
|
|
1731
1642
|
$(img).attr('srcset', value);
|
|
@@ -1765,9 +1676,9 @@ function requireMercury() {
|
|
|
1765
1676
|
create: function create(url, preparedResponse, parsedUrl) {
|
|
1766
1677
|
var _arguments = arguments,
|
|
1767
1678
|
_this = this;
|
|
1768
|
-
return
|
|
1679
|
+
return _asyncToGenerator(/*#__PURE__*/_regeneratorRuntime.mark(function _callee() {
|
|
1769
1680
|
var headers, result, validResponse;
|
|
1770
|
-
return
|
|
1681
|
+
return _regeneratorRuntime.wrap(function (_context) {
|
|
1771
1682
|
while (1) switch (_context.prev = _context.next) {
|
|
1772
1683
|
case 0:
|
|
1773
1684
|
headers = _arguments.length > 3 && _arguments[3] !== undefined ? _arguments[3] : {};
|
|
@@ -1847,7 +1758,7 @@ function requireMercury() {
|
|
|
1847
1758
|
}
|
|
1848
1759
|
var encoding = getEncoding(contentType);
|
|
1849
1760
|
// UTF-8 is handled natively by Node.js, skip iconv-lite
|
|
1850
|
-
var decodedContent = encoding === 'utf-8' ? content.toString('utf-8') :
|
|
1761
|
+
var decodedContent = encoding === 'utf-8' ? content.toString('utf-8') : iconv$1.decode(content, encoding);
|
|
1851
1762
|
var $ = cheerio__namespace.load(decodedContent);
|
|
1852
1763
|
// after first cheerio.load, check to see if encoding matches
|
|
1853
1764
|
var contentTypeSelector = isBrowser ? 'meta[http-equiv=content-type]' : 'meta[http-equiv=content-type i]';
|
|
@@ -1856,7 +1767,7 @@ function requireMercury() {
|
|
|
1856
1767
|
|
|
1857
1768
|
// if encodings in the header/body dont match, use the one in the body
|
|
1858
1769
|
if (metaContentType && properEncoding !== encoding) {
|
|
1859
|
-
decodedContent = properEncoding === 'utf-8' ? content.toString('utf-8') :
|
|
1770
|
+
decodedContent = properEncoding === 'utf-8' ? content.toString('utf-8') : iconv$1.decode(content, properEncoding);
|
|
1860
1771
|
$ = cheerio__namespace.load(decodedContent);
|
|
1861
1772
|
}
|
|
1862
1773
|
return $;
|
|
@@ -1865,8 +1776,8 @@ function requireMercury() {
|
|
|
1865
1776
|
function range() {
|
|
1866
1777
|
var start = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 1;
|
|
1867
1778
|
var end = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 1;
|
|
1868
|
-
return /*#__PURE__*/
|
|
1869
|
-
return
|
|
1779
|
+
return /*#__PURE__*/_regeneratorRuntime.mark(function _callee() {
|
|
1780
|
+
return _regeneratorRuntime.wrap(function (_context) {
|
|
1870
1781
|
while (1) switch (_context.prev = _context.next) {
|
|
1871
1782
|
case 0:
|
|
1872
1783
|
if (!(start <= end)) {
|
|
@@ -1899,7 +1810,7 @@ function requireMercury() {
|
|
|
1899
1810
|
}, {});
|
|
1900
1811
|
};
|
|
1901
1812
|
function mergeSupportedDomains(extractor) {
|
|
1902
|
-
return extractor.supportedDomains ? merge(extractor, [extractor.domain].concat(
|
|
1813
|
+
return extractor.supportedDomains ? merge(extractor, [extractor.domain].concat(_toConsumableArray$1(extractor.supportedDomains))) : merge(extractor, [extractor.domain]);
|
|
1903
1814
|
}
|
|
1904
1815
|
var apiExtractors = {};
|
|
1905
1816
|
function addExtractor(extractor) {
|
|
@@ -1909,7 +1820,7 @@ function requireMercury() {
|
|
|
1909
1820
|
message: 'Unable to add custom extractor. Invalid parameters.'
|
|
1910
1821
|
};
|
|
1911
1822
|
}
|
|
1912
|
-
_Object$
|
|
1823
|
+
_Object$assign(apiExtractors, mergeSupportedDomains(extractor));
|
|
1913
1824
|
return apiExtractors;
|
|
1914
1825
|
}
|
|
1915
1826
|
var BalloonJuiceComExtractor = {
|
|
@@ -2610,7 +2521,7 @@ function requireMercury() {
|
|
|
2610
2521
|
var $parent = $node.parents('figure');
|
|
2611
2522
|
if (ytRe.test(thumb)) {
|
|
2612
2523
|
var _thumb$match = thumb.match(ytRe),
|
|
2613
|
-
_thumb$match2 =
|
|
2524
|
+
_thumb$match2 = _slicedToArray$1(_thumb$match, 2);
|
|
2614
2525
|
_thumb$match2[0];
|
|
2615
2526
|
var youtubeId = _thumb$match2[1]; // eslint-disable-line
|
|
2616
2527
|
$node.attr('src', "https://www.youtube.com/embed/".concat(youtubeId));
|
|
@@ -2633,7 +2544,7 @@ function requireMercury() {
|
|
|
2633
2544
|
// Remove any smaller images that did not get caught by the generic image
|
|
2634
2545
|
// cleaner (author photo 48px, leading sentence images 79px, etc.).
|
|
2635
2546
|
img: function img($node) {
|
|
2636
|
-
var width =
|
|
2547
|
+
var width = _parseInt$1($node.attr('width'), 10);
|
|
2637
2548
|
if (width < 100) $node.remove();
|
|
2638
2549
|
}
|
|
2639
2550
|
},
|
|
@@ -3501,7 +3412,7 @@ function requireMercury() {
|
|
|
3501
3412
|
// before it's consumable content? E.g., unusual lazy loaded images
|
|
3502
3413
|
transforms: {
|
|
3503
3414
|
'.pane-node-body': function paneNodeBody($node, $) {
|
|
3504
|
-
var _WwwMsnbcComExtractor =
|
|
3415
|
+
var _WwwMsnbcComExtractor = _slicedToArray$1(WwwMsnbcComExtractor.lead_image_url.selectors[0], 2),
|
|
3505
3416
|
selector = _WwwMsnbcComExtractor[0],
|
|
3506
3417
|
attr = _WwwMsnbcComExtractor[1];
|
|
3507
3418
|
var src = $(selector).attr(attr);
|
|
@@ -5573,7 +5484,7 @@ function requireMercury() {
|
|
|
5573
5484
|
'img[data-original]': function imgDataOriginal($node) {
|
|
5574
5485
|
var dataOriginal = $node.attr('data-original');
|
|
5575
5486
|
var src = $node.attr('src');
|
|
5576
|
-
var url =
|
|
5487
|
+
var url = URL$1$1.resolve(src, dataOriginal);
|
|
5577
5488
|
$node.attr('src', url);
|
|
5578
5489
|
}
|
|
5579
5490
|
},
|
|
@@ -5865,9 +5776,6 @@ function requireMercury() {
|
|
|
5865
5776
|
clean: []
|
|
5866
5777
|
}
|
|
5867
5778
|
};
|
|
5868
|
-
|
|
5869
|
-
/* eslint-disable no-nested-ternary */
|
|
5870
|
-
/* eslint-disable no-unused-expressions */
|
|
5871
5779
|
var WwwAbendblattDeExtractor = {
|
|
5872
5780
|
domain: 'www.abendblatt.de',
|
|
5873
5781
|
title: {
|
|
@@ -6466,11 +6374,11 @@ function requireMercury() {
|
|
|
6466
6374
|
}
|
|
6467
6375
|
};
|
|
6468
6376
|
function ownKeys$f(e, r) {
|
|
6469
|
-
var t = _Object$
|
|
6470
|
-
if (_Object$
|
|
6471
|
-
var o = _Object$
|
|
6377
|
+
var t = _Object$keys$1(e);
|
|
6378
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
6379
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
6472
6380
|
r && (o = o.filter(function (r) {
|
|
6473
|
-
return _Object$
|
|
6381
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
6474
6382
|
})), t.push.apply(t, o);
|
|
6475
6383
|
}
|
|
6476
6384
|
return t;
|
|
@@ -6478,10 +6386,10 @@ function requireMercury() {
|
|
|
6478
6386
|
function _objectSpread$f(e) {
|
|
6479
6387
|
for (var r = 1; r < arguments.length; r++) {
|
|
6480
6388
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
6481
|
-
r % 2 ? ownKeys$f(Object(t),
|
|
6482
|
-
|
|
6483
|
-
}) : _Object$
|
|
6484
|
-
_Object$
|
|
6389
|
+
r % 2 ? ownKeys$f(Object(t), true).forEach(function (r) {
|
|
6390
|
+
_defineProperty$1(e, r, t[r]);
|
|
6391
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$f(Object(t)).forEach(function (r) {
|
|
6392
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
6485
6393
|
});
|
|
6486
6394
|
}
|
|
6487
6395
|
return e;
|
|
@@ -6490,11 +6398,11 @@ function requireMercury() {
|
|
|
6490
6398
|
domain: 'sport.se.pl'
|
|
6491
6399
|
});
|
|
6492
6400
|
function ownKeys$e(e, r) {
|
|
6493
|
-
var t = _Object$
|
|
6494
|
-
if (_Object$
|
|
6495
|
-
var o = _Object$
|
|
6401
|
+
var t = _Object$keys$1(e);
|
|
6402
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
6403
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
6496
6404
|
r && (o = o.filter(function (r) {
|
|
6497
|
-
return _Object$
|
|
6405
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
6498
6406
|
})), t.push.apply(t, o);
|
|
6499
6407
|
}
|
|
6500
6408
|
return t;
|
|
@@ -6502,10 +6410,10 @@ function requireMercury() {
|
|
|
6502
6410
|
function _objectSpread$e(e) {
|
|
6503
6411
|
for (var r = 1; r < arguments.length; r++) {
|
|
6504
6412
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
6505
|
-
r % 2 ? ownKeys$e(Object(t),
|
|
6506
|
-
|
|
6507
|
-
}) : _Object$
|
|
6508
|
-
_Object$
|
|
6413
|
+
r % 2 ? ownKeys$e(Object(t), true).forEach(function (r) {
|
|
6414
|
+
_defineProperty$1(e, r, t[r]);
|
|
6415
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$e(Object(t)).forEach(function (r) {
|
|
6416
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
6509
6417
|
});
|
|
6510
6418
|
}
|
|
6511
6419
|
return e;
|
|
@@ -6541,11 +6449,11 @@ function requireMercury() {
|
|
|
6541
6449
|
}
|
|
6542
6450
|
};
|
|
6543
6451
|
function ownKeys$d(e, r) {
|
|
6544
|
-
var t = _Object$
|
|
6545
|
-
if (_Object$
|
|
6546
|
-
var o = _Object$
|
|
6452
|
+
var t = _Object$keys$1(e);
|
|
6453
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
6454
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
6547
6455
|
r && (o = o.filter(function (r) {
|
|
6548
|
-
return _Object$
|
|
6456
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
6549
6457
|
})), t.push.apply(t, o);
|
|
6550
6458
|
}
|
|
6551
6459
|
return t;
|
|
@@ -6553,10 +6461,10 @@ function requireMercury() {
|
|
|
6553
6461
|
function _objectSpread$d(e) {
|
|
6554
6462
|
for (var r = 1; r < arguments.length; r++) {
|
|
6555
6463
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
6556
|
-
r % 2 ? ownKeys$d(Object(t),
|
|
6557
|
-
|
|
6558
|
-
}) : _Object$
|
|
6559
|
-
_Object$
|
|
6464
|
+
r % 2 ? ownKeys$d(Object(t), true).forEach(function (r) {
|
|
6465
|
+
_defineProperty$1(e, r, t[r]);
|
|
6466
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$d(Object(t)).forEach(function (r) {
|
|
6467
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
6560
6468
|
});
|
|
6561
6469
|
}
|
|
6562
6470
|
return e;
|
|
@@ -6565,11 +6473,11 @@ function requireMercury() {
|
|
|
6565
6473
|
domain: 'szczecin.se.pl'
|
|
6566
6474
|
});
|
|
6567
6475
|
function ownKeys$c(e, r) {
|
|
6568
|
-
var t = _Object$
|
|
6569
|
-
if (_Object$
|
|
6570
|
-
var o = _Object$
|
|
6476
|
+
var t = _Object$keys$1(e);
|
|
6477
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
6478
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
6571
6479
|
r && (o = o.filter(function (r) {
|
|
6572
|
-
return _Object$
|
|
6480
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
6573
6481
|
})), t.push.apply(t, o);
|
|
6574
6482
|
}
|
|
6575
6483
|
return t;
|
|
@@ -6577,10 +6485,10 @@ function requireMercury() {
|
|
|
6577
6485
|
function _objectSpread$c(e) {
|
|
6578
6486
|
for (var r = 1; r < arguments.length; r++) {
|
|
6579
6487
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
6580
|
-
r % 2 ? ownKeys$c(Object(t),
|
|
6581
|
-
|
|
6582
|
-
}) : _Object$
|
|
6583
|
-
_Object$
|
|
6488
|
+
r % 2 ? ownKeys$c(Object(t), true).forEach(function (r) {
|
|
6489
|
+
_defineProperty$1(e, r, t[r]);
|
|
6490
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$c(Object(t)).forEach(function (r) {
|
|
6491
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
6584
6492
|
});
|
|
6585
6493
|
}
|
|
6586
6494
|
return e;
|
|
@@ -6589,11 +6497,11 @@ function requireMercury() {
|
|
|
6589
6497
|
domain: 'superbiz.se.pl'
|
|
6590
6498
|
});
|
|
6591
6499
|
function ownKeys$b(e, r) {
|
|
6592
|
-
var t = _Object$
|
|
6593
|
-
if (_Object$
|
|
6594
|
-
var o = _Object$
|
|
6500
|
+
var t = _Object$keys$1(e);
|
|
6501
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
6502
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
6595
6503
|
r && (o = o.filter(function (r) {
|
|
6596
|
-
return _Object$
|
|
6504
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
6597
6505
|
})), t.push.apply(t, o);
|
|
6598
6506
|
}
|
|
6599
6507
|
return t;
|
|
@@ -6601,10 +6509,10 @@ function requireMercury() {
|
|
|
6601
6509
|
function _objectSpread$b(e) {
|
|
6602
6510
|
for (var r = 1; r < arguments.length; r++) {
|
|
6603
6511
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
6604
|
-
r % 2 ? ownKeys$b(Object(t),
|
|
6605
|
-
|
|
6606
|
-
}) : _Object$
|
|
6607
|
-
_Object$
|
|
6512
|
+
r % 2 ? ownKeys$b(Object(t), true).forEach(function (r) {
|
|
6513
|
+
_defineProperty$1(e, r, t[r]);
|
|
6514
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$b(Object(t)).forEach(function (r) {
|
|
6515
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
6608
6516
|
});
|
|
6609
6517
|
}
|
|
6610
6518
|
return e;
|
|
@@ -6634,11 +6542,11 @@ function requireMercury() {
|
|
|
6634
6542
|
}
|
|
6635
6543
|
};
|
|
6636
6544
|
function ownKeys$a(e, r) {
|
|
6637
|
-
var t = _Object$
|
|
6638
|
-
if (_Object$
|
|
6639
|
-
var o = _Object$
|
|
6545
|
+
var t = _Object$keys$1(e);
|
|
6546
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
6547
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
6640
6548
|
r && (o = o.filter(function (r) {
|
|
6641
|
-
return _Object$
|
|
6549
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
6642
6550
|
})), t.push.apply(t, o);
|
|
6643
6551
|
}
|
|
6644
6552
|
return t;
|
|
@@ -6646,10 +6554,10 @@ function requireMercury() {
|
|
|
6646
6554
|
function _objectSpread$a(e) {
|
|
6647
6555
|
for (var r = 1; r < arguments.length; r++) {
|
|
6648
6556
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
6649
|
-
r % 2 ? ownKeys$a(Object(t),
|
|
6650
|
-
|
|
6651
|
-
}) : _Object$
|
|
6652
|
-
_Object$
|
|
6557
|
+
r % 2 ? ownKeys$a(Object(t), true).forEach(function (r) {
|
|
6558
|
+
_defineProperty$1(e, r, t[r]);
|
|
6559
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$a(Object(t)).forEach(function (r) {
|
|
6560
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
6653
6561
|
});
|
|
6654
6562
|
}
|
|
6655
6563
|
return e;
|
|
@@ -6658,11 +6566,11 @@ function requireMercury() {
|
|
|
6658
6566
|
domain: 'lodz.se.pl'
|
|
6659
6567
|
});
|
|
6660
6568
|
function ownKeys$9(e, r) {
|
|
6661
|
-
var t = _Object$
|
|
6662
|
-
if (_Object$
|
|
6663
|
-
var o = _Object$
|
|
6569
|
+
var t = _Object$keys$1(e);
|
|
6570
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
6571
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
6664
6572
|
r && (o = o.filter(function (r) {
|
|
6665
|
-
return _Object$
|
|
6573
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
6666
6574
|
})), t.push.apply(t, o);
|
|
6667
6575
|
}
|
|
6668
6576
|
return t;
|
|
@@ -6670,10 +6578,10 @@ function requireMercury() {
|
|
|
6670
6578
|
function _objectSpread$9(e) {
|
|
6671
6579
|
for (var r = 1; r < arguments.length; r++) {
|
|
6672
6580
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
6673
|
-
r % 2 ? ownKeys$9(Object(t),
|
|
6674
|
-
|
|
6675
|
-
}) : _Object$
|
|
6676
|
-
_Object$
|
|
6581
|
+
r % 2 ? ownKeys$9(Object(t), true).forEach(function (r) {
|
|
6582
|
+
_defineProperty$1(e, r, t[r]);
|
|
6583
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$9(Object(t)).forEach(function (r) {
|
|
6584
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
6677
6585
|
});
|
|
6678
6586
|
}
|
|
6679
6587
|
return e;
|
|
@@ -6682,11 +6590,11 @@ function requireMercury() {
|
|
|
6682
6590
|
domain: 'wroclaw.se.pl'
|
|
6683
6591
|
});
|
|
6684
6592
|
function ownKeys$8(e, r) {
|
|
6685
|
-
var t = _Object$
|
|
6686
|
-
if (_Object$
|
|
6687
|
-
var o = _Object$
|
|
6593
|
+
var t = _Object$keys$1(e);
|
|
6594
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
6595
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
6688
6596
|
r && (o = o.filter(function (r) {
|
|
6689
|
-
return _Object$
|
|
6597
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
6690
6598
|
})), t.push.apply(t, o);
|
|
6691
6599
|
}
|
|
6692
6600
|
return t;
|
|
@@ -6694,10 +6602,10 @@ function requireMercury() {
|
|
|
6694
6602
|
function _objectSpread$8(e) {
|
|
6695
6603
|
for (var r = 1; r < arguments.length; r++) {
|
|
6696
6604
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
6697
|
-
r % 2 ? ownKeys$8(Object(t),
|
|
6698
|
-
|
|
6699
|
-
}) : _Object$
|
|
6700
|
-
_Object$
|
|
6605
|
+
r % 2 ? ownKeys$8(Object(t), true).forEach(function (r) {
|
|
6606
|
+
_defineProperty$1(e, r, t[r]);
|
|
6607
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$8(Object(t)).forEach(function (r) {
|
|
6608
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
6701
6609
|
});
|
|
6702
6610
|
}
|
|
6703
6611
|
return e;
|
|
@@ -6706,11 +6614,11 @@ function requireMercury() {
|
|
|
6706
6614
|
domain: 'lublin.se.pl'
|
|
6707
6615
|
});
|
|
6708
6616
|
function ownKeys$7(e, r) {
|
|
6709
|
-
var t = _Object$
|
|
6710
|
-
if (_Object$
|
|
6711
|
-
var o = _Object$
|
|
6617
|
+
var t = _Object$keys$1(e);
|
|
6618
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
6619
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
6712
6620
|
r && (o = o.filter(function (r) {
|
|
6713
|
-
return _Object$
|
|
6621
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
6714
6622
|
})), t.push.apply(t, o);
|
|
6715
6623
|
}
|
|
6716
6624
|
return t;
|
|
@@ -6718,10 +6626,10 @@ function requireMercury() {
|
|
|
6718
6626
|
function _objectSpread$7(e) {
|
|
6719
6627
|
for (var r = 1; r < arguments.length; r++) {
|
|
6720
6628
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
6721
|
-
r % 2 ? ownKeys$7(Object(t),
|
|
6722
|
-
|
|
6723
|
-
}) : _Object$
|
|
6724
|
-
_Object$
|
|
6629
|
+
r % 2 ? ownKeys$7(Object(t), true).forEach(function (r) {
|
|
6630
|
+
_defineProperty$1(e, r, t[r]);
|
|
6631
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$7(Object(t)).forEach(function (r) {
|
|
6632
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
6725
6633
|
});
|
|
6726
6634
|
}
|
|
6727
6635
|
return e;
|
|
@@ -6971,7 +6879,7 @@ function requireMercury() {
|
|
|
6971
6879
|
img: function img($node) {
|
|
6972
6880
|
var srcset = $node.attr('srcset');
|
|
6973
6881
|
var _split = (srcset || '').split(','),
|
|
6974
|
-
_split2 =
|
|
6882
|
+
_split2 = _slicedToArray$1(_split, 1),
|
|
6975
6883
|
src = _split2[0];
|
|
6976
6884
|
if (src) {
|
|
6977
6885
|
$node.parent().replaceWith("<figure><img srcset=\"".concat(srcset, "\" src=\"").concat(src, "\"/></figure>"));
|
|
@@ -7010,7 +6918,7 @@ function requireMercury() {
|
|
|
7010
6918
|
img: function img($node) {
|
|
7011
6919
|
var srcset = $node.attr('srcset');
|
|
7012
6920
|
var _split = (srcset || '').split(','),
|
|
7013
|
-
_split2 =
|
|
6921
|
+
_split2 = _slicedToArray$1(_split, 1),
|
|
7014
6922
|
src = _split2[0];
|
|
7015
6923
|
if (src) {
|
|
7016
6924
|
$node.parent().replaceWith("<figure><img srcset=\"".concat(srcset, "\" src=\"").concat(src, "\"/></figure>"));
|
|
@@ -7525,11 +7433,11 @@ function requireMercury() {
|
|
|
7525
7433
|
}
|
|
7526
7434
|
};
|
|
7527
7435
|
function ownKeys$6(e, r) {
|
|
7528
|
-
var t = _Object$
|
|
7529
|
-
if (_Object$
|
|
7530
|
-
var o = _Object$
|
|
7436
|
+
var t = _Object$keys$1(e);
|
|
7437
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
7438
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
7531
7439
|
r && (o = o.filter(function (r) {
|
|
7532
|
-
return _Object$
|
|
7440
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
7533
7441
|
})), t.push.apply(t, o);
|
|
7534
7442
|
}
|
|
7535
7443
|
return t;
|
|
@@ -7537,10 +7445,10 @@ function requireMercury() {
|
|
|
7537
7445
|
function _objectSpread$6(e) {
|
|
7538
7446
|
for (var r = 1; r < arguments.length; r++) {
|
|
7539
7447
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
7540
|
-
r % 2 ? ownKeys$6(Object(t),
|
|
7541
|
-
|
|
7542
|
-
}) : _Object$
|
|
7543
|
-
_Object$
|
|
7448
|
+
r % 2 ? ownKeys$6(Object(t), true).forEach(function (r) {
|
|
7449
|
+
_defineProperty$1(e, r, t[r]);
|
|
7450
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$6(Object(t)).forEach(function (r) {
|
|
7451
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
7544
7452
|
});
|
|
7545
7453
|
}
|
|
7546
7454
|
return e;
|
|
@@ -7572,213 +7480,421 @@ function requireMercury() {
|
|
|
7572
7480
|
clean: []
|
|
7573
7481
|
}
|
|
7574
7482
|
};
|
|
7575
|
-
var
|
|
7483
|
+
var ActualidadRtComExtractor = {
|
|
7484
|
+
domain: 'actualidad.rt.com',
|
|
7485
|
+
title: {
|
|
7486
|
+
selectors: [['meta[name="og:title"]', 'value']]
|
|
7487
|
+
},
|
|
7488
|
+
author: {
|
|
7489
|
+
selectors: [['meta[name="article:author"]', 'value']]
|
|
7490
|
+
},
|
|
7491
|
+
date_published: {
|
|
7492
|
+
selectors: [['meta[name="mediator_published_time"]', 'value']]
|
|
7493
|
+
},
|
|
7494
|
+
dek: {
|
|
7495
|
+
selectors: [['meta[name="og:description"]', 'value']]
|
|
7496
|
+
},
|
|
7497
|
+
lead_image_url: {
|
|
7498
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
7499
|
+
},
|
|
7500
|
+
content: {
|
|
7501
|
+
selectors: ['.ArticleView-text'],
|
|
7502
|
+
transforms: {},
|
|
7503
|
+
// RT wraps each <img> in a <picture> whose <source> elements carry a
|
|
7504
|
+
// base64 placeholder srcset; browsers honor that over the real <img src>,
|
|
7505
|
+
// so drop the sources and let the <img> (real URL) render.
|
|
7506
|
+
clean: ['.ReadMore-root', 'source']
|
|
7507
|
+
}
|
|
7508
|
+
};
|
|
7509
|
+
var WwwTweaktownComExtractor = {
|
|
7510
|
+
domain: 'www.tweaktown.com',
|
|
7511
|
+
title: {
|
|
7512
|
+
selectors: [['meta[name="og:title"]', 'value']]
|
|
7513
|
+
},
|
|
7514
|
+
author: {
|
|
7515
|
+
selectors: ['.info-bar-div2 a[rel="author"]']
|
|
7516
|
+
},
|
|
7517
|
+
date_published: {
|
|
7518
|
+
selectors: [['meta[name="article:published_time"]', 'value']]
|
|
7519
|
+
},
|
|
7520
|
+
dek: {
|
|
7521
|
+
selectors: [['meta[name="og:description"]', 'value']]
|
|
7522
|
+
},
|
|
7523
|
+
lead_image_url: {
|
|
7524
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
7525
|
+
},
|
|
7526
|
+
content: {
|
|
7527
|
+
selectors: ['#article-body'],
|
|
7528
|
+
transforms: {},
|
|
7529
|
+
clean: []
|
|
7530
|
+
}
|
|
7531
|
+
};
|
|
7532
|
+
var WwwFrandroidComExtractor = {
|
|
7533
|
+
domain: 'www.frandroid.com',
|
|
7534
|
+
title: {
|
|
7535
|
+
selectors: [['meta[name="og:title"]', 'value']]
|
|
7536
|
+
},
|
|
7537
|
+
author: {
|
|
7538
|
+
selectors: [['meta[name="parsely-author"]', 'value']]
|
|
7539
|
+
},
|
|
7540
|
+
date_published: {
|
|
7541
|
+
selectors: [['meta[name="article:published_time"]', 'value']]
|
|
7542
|
+
},
|
|
7543
|
+
dek: {
|
|
7544
|
+
selectors: [['meta[name="og:description"]', 'value']]
|
|
7545
|
+
},
|
|
7546
|
+
lead_image_url: {
|
|
7547
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
7548
|
+
},
|
|
7549
|
+
content: {
|
|
7550
|
+
selectors: ['section.article-content'],
|
|
7551
|
+
transforms: {
|
|
7552
|
+
h2: function h2(node) {
|
|
7553
|
+
return node.attr('class', 'mercury-parser-keep');
|
|
7554
|
+
},
|
|
7555
|
+
h3: function h3(node) {
|
|
7556
|
+
return node.attr('class', 'mercury-parser-keep');
|
|
7557
|
+
}
|
|
7558
|
+
},
|
|
7559
|
+
clean: ['.index-menu-wrapper', '.is-gastric-kingfisher', '.newsletter-form', '.share', '.article-footer', '.js-feed-posts', '.optidigital-adslot', '[id^="optidigital-adslot"]']
|
|
7560
|
+
}
|
|
7561
|
+
};
|
|
7562
|
+
var WwwMotorsportComExtractor = {
|
|
7563
|
+
domain: 'www.motorsport.com',
|
|
7564
|
+
title: {
|
|
7565
|
+
selectors: [['meta[name="og:title"]', 'value']]
|
|
7566
|
+
},
|
|
7567
|
+
author: {
|
|
7568
|
+
selectors: ['.msnt-author-toolbar a[href*="/info/about-us/"]']
|
|
7569
|
+
},
|
|
7570
|
+
date_published: {
|
|
7571
|
+
selectors: [['meta[name="datePublished"]', 'value']]
|
|
7572
|
+
},
|
|
7573
|
+
dek: {
|
|
7574
|
+
selectors: ['h2.text-article-description']
|
|
7575
|
+
},
|
|
7576
|
+
lead_image_url: {
|
|
7577
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
7578
|
+
},
|
|
7579
|
+
content: {
|
|
7580
|
+
selectors: ['.ms-article-content'],
|
|
7581
|
+
transforms: {
|
|
7582
|
+
h2: function h2(node) {
|
|
7583
|
+
return node.attr('class', 'mercury-parser-keep');
|
|
7584
|
+
}
|
|
7585
|
+
},
|
|
7586
|
+
clean: ['msnt-survey-promo', '.article-fullwidth-gallery_item ~ .article-fullwidth-gallery_item', '.ms-inarticle-widgets', '.relatedContent', '.ms-apb', '.ms-ap-native', '.outstream_partner']
|
|
7587
|
+
}
|
|
7588
|
+
};
|
|
7589
|
+
var SubstackComExtractor = {
|
|
7590
|
+
domain: 'substack.com',
|
|
7591
|
+
title: {
|
|
7592
|
+
selectors: [['meta[name="og:title"]', 'value']]
|
|
7593
|
+
},
|
|
7594
|
+
author: {
|
|
7595
|
+
selectors: [['meta[name="author"]', 'value']]
|
|
7596
|
+
},
|
|
7597
|
+
date_published: {
|
|
7598
|
+
selectors: [['meta[name="article:published_time"]', 'value']]
|
|
7599
|
+
},
|
|
7600
|
+
dek: {
|
|
7601
|
+
selectors: [['meta[name="og:description"]', 'value']]
|
|
7602
|
+
},
|
|
7603
|
+
lead_image_url: {
|
|
7604
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
7605
|
+
},
|
|
7606
|
+
content: {
|
|
7607
|
+
selectors: ['.available-content'],
|
|
7608
|
+
transforms: {
|
|
7609
|
+
'div.captioned-image-container': 'figure',
|
|
7610
|
+
'div.image-link': function divImageLink($node) {
|
|
7611
|
+
$node.replaceWith($node.find('img'));
|
|
7612
|
+
}
|
|
7613
|
+
},
|
|
7614
|
+
clean: ['.subscribe-widget', '.subscription-widget-wrap', '.subscription-widget-wrap-editor', '.button-wrapper', '.poll-embed', '.share-dialog']
|
|
7615
|
+
}
|
|
7616
|
+
};
|
|
7617
|
+
var WwwDwComExtractor = {
|
|
7618
|
+
domain: 'www.dw.com',
|
|
7619
|
+
title: {
|
|
7620
|
+
selectors: [['meta[name="og:title"]', 'value']]
|
|
7621
|
+
},
|
|
7622
|
+
author: {
|
|
7623
|
+
selectors: ['.author-name .author-link']
|
|
7624
|
+
},
|
|
7625
|
+
date_published: {
|
|
7626
|
+
selectors: [['meta[name="date"]', 'value']]
|
|
7627
|
+
},
|
|
7628
|
+
dek: {
|
|
7629
|
+
selectors: [['meta[name="og:description"]', 'value']]
|
|
7630
|
+
},
|
|
7631
|
+
lead_image_url: {
|
|
7632
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
7633
|
+
},
|
|
7634
|
+
content: {
|
|
7635
|
+
selectors: ['[data-tracking-name="rich-text"]'],
|
|
7636
|
+
transforms: {
|
|
7637
|
+
// DW inline images are responsive: the real template lives in data-url
|
|
7638
|
+
// with a literal ${formatId} size token that JS would replace, leaving a
|
|
7639
|
+
// broken src in the raw HTML. Resolve it to a standard content size.
|
|
7640
|
+
img: function img(node) {
|
|
7641
|
+
var template = node.attr('data-url') || node.attr('src') || '';
|
|
7642
|
+
if (template.includes('${formatId}')) {
|
|
7643
|
+
node.attr('src', template.replace('${formatId}', '6'));
|
|
7644
|
+
}
|
|
7645
|
+
}
|
|
7646
|
+
},
|
|
7647
|
+
// Embedded tweets are non-functional fallback markup without JS.
|
|
7648
|
+
clean: ['blockquote.tweet.embed']
|
|
7649
|
+
}
|
|
7650
|
+
};
|
|
7651
|
+
var WwwAnimenewsnetworkComExtractor = {
|
|
7652
|
+
domain: 'www.animenewsnetwork.com',
|
|
7653
|
+
title: {
|
|
7654
|
+
selectors: [['meta[name="og:title"]', 'value']]
|
|
7655
|
+
},
|
|
7656
|
+
author: null,
|
|
7657
|
+
date_published: {
|
|
7658
|
+
selectors: [['small time', 'datetime']]
|
|
7659
|
+
},
|
|
7660
|
+
dek: {
|
|
7661
|
+
selectors: [['meta[name="description"]', 'value']]
|
|
7662
|
+
},
|
|
7663
|
+
lead_image_url: {
|
|
7664
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
7665
|
+
},
|
|
7666
|
+
content: {
|
|
7667
|
+
selectors: ['.KonaBody'],
|
|
7668
|
+
transforms: {
|
|
7669
|
+
// Images are lazy-loaded: real URL in data-src, a spacer.gif in src.
|
|
7670
|
+
// Promote data-src so the images survive cleaning and render.
|
|
7671
|
+
img: function img(node) {
|
|
7672
|
+
var dataSrc = node.attr('data-src');
|
|
7673
|
+
if (dataSrc) {
|
|
7674
|
+
var src = dataSrc.startsWith('/') ? "https://www.animenewsnetwork.com".concat(dataSrc) : dataSrc;
|
|
7675
|
+
node.attr('src', src);
|
|
7676
|
+
node.removeAttr('data-src');
|
|
7677
|
+
}
|
|
7678
|
+
}
|
|
7679
|
+
},
|
|
7680
|
+
// .intro duplicates the dek; instaread-player is an audio widget.
|
|
7681
|
+
clean: ['.intro', 'instaread-player']
|
|
7682
|
+
}
|
|
7683
|
+
};
|
|
7684
|
+
var CustomExtractors = /*#__PURE__*/_Object$freeze({
|
|
7576
7685
|
__proto__: null,
|
|
7686
|
+
AbcnewsGoComExtractor: AbcnewsGoComExtractor,
|
|
7687
|
+
ActualidadRtComExtractor: ActualidadRtComExtractor,
|
|
7688
|
+
ApartmentTherapyExtractor: ApartmentTherapyExtractor,
|
|
7689
|
+
ArstechnicaComExtractor: ArstechnicaComExtractor,
|
|
7577
7690
|
BalloonJuiceComExtractor: BalloonJuiceComExtractor,
|
|
7691
|
+
BialystokSePlExtractor: BialystokSePlExtractor,
|
|
7692
|
+
BiorxivOrgExtractor: BiorxivOrgExtractor,
|
|
7693
|
+
BlisterreviewComExtractor: BlisterreviewComExtractor,
|
|
7578
7694
|
BloggerExtractor: BloggerExtractor,
|
|
7579
|
-
|
|
7580
|
-
|
|
7581
|
-
|
|
7582
|
-
|
|
7583
|
-
TheAtlanticExtractor: TheAtlanticExtractor,
|
|
7584
|
-
NewYorkerExtractor: NewYorkerExtractor,
|
|
7585
|
-
WiredExtractor: WiredExtractor,
|
|
7586
|
-
MSNExtractor: MSNExtractor,
|
|
7587
|
-
YahooExtractor: YahooExtractor,
|
|
7695
|
+
BookwalkerJpExtractor: BookwalkerJpExtractor,
|
|
7696
|
+
BroadwayWorldExtractor: BroadwayWorldExtractor,
|
|
7697
|
+
BskyAppExtractor: BskyAppExtractor,
|
|
7698
|
+
BuzzapJpExtractor: BuzzapJpExtractor,
|
|
7588
7699
|
BuzzfeedExtractor: BuzzfeedExtractor,
|
|
7589
|
-
|
|
7590
|
-
|
|
7591
|
-
|
|
7700
|
+
ChicagoyimbyComExtractor: ChicagoyimbyComExtractor,
|
|
7701
|
+
ClinicaltrialsGovExtractor: ClinicaltrialsGovExtractor,
|
|
7702
|
+
DeadlineComExtractor: DeadlineComExtractor,
|
|
7592
7703
|
DeadspinExtractor: DeadspinExtractor,
|
|
7593
|
-
|
|
7594
|
-
|
|
7595
|
-
|
|
7596
|
-
WwwTmzComExtractor: WwwTmzComExtractor,
|
|
7597
|
-
WwwWashingtonpostComExtractor: WwwWashingtonpostComExtractor,
|
|
7598
|
-
WwwHuffingtonpostComExtractor: WwwHuffingtonpostComExtractor,
|
|
7599
|
-
NewrepublicComExtractor: NewrepublicComExtractor,
|
|
7600
|
-
MoneyCnnComExtractor: MoneyCnnComExtractor,
|
|
7601
|
-
WwwCnnComExtractor: WwwCnnComExtractor,
|
|
7602
|
-
WwwAolComExtractor: WwwAolComExtractor,
|
|
7603
|
-
WwwYoutubeComExtractor: WwwYoutubeComExtractor,
|
|
7604
|
-
WwwTheguardianComExtractor: WwwTheguardianComExtractor,
|
|
7605
|
-
WwwSbnationComExtractor: WwwSbnationComExtractor,
|
|
7606
|
-
WwwBloombergComExtractor: WwwBloombergComExtractor,
|
|
7607
|
-
WwwBustleComExtractor: WwwBustleComExtractor,
|
|
7608
|
-
WwwNprOrgExtractor: WwwNprOrgExtractor,
|
|
7609
|
-
WwwRecodeNetExtractor: WwwRecodeNetExtractor,
|
|
7610
|
-
QzComExtractor: QzComExtractor,
|
|
7611
|
-
WwwDmagazineComExtractor: WwwDmagazineComExtractor,
|
|
7612
|
-
WwwReutersComExtractor: WwwReutersComExtractor,
|
|
7613
|
-
MashableComExtractor: MashableComExtractor,
|
|
7614
|
-
WwwChicagotribuneComExtractor: WwwChicagotribuneComExtractor,
|
|
7615
|
-
WwwVoxComExtractor: WwwVoxComExtractor,
|
|
7616
|
-
NewsNationalgeographicComExtractor: NewsNationalgeographicComExtractor,
|
|
7617
|
-
WwwNationalgeographicComExtractor: WwwNationalgeographicComExtractor,
|
|
7618
|
-
WwwLatimesComExtractor: WwwLatimesComExtractor,
|
|
7619
|
-
PagesixComExtractor: PagesixComExtractor,
|
|
7620
|
-
ThefederalistpapersOrgExtractor: ThefederalistpapersOrgExtractor,
|
|
7621
|
-
WwwCbssportsComExtractor: WwwCbssportsComExtractor,
|
|
7622
|
-
WwwMsnbcComExtractor: WwwMsnbcComExtractor,
|
|
7623
|
-
WwwThepoliticalinsiderComExtractor: WwwThepoliticalinsiderComExtractor,
|
|
7624
|
-
WwwMentalflossComExtractor: WwwMentalflossComExtractor,
|
|
7625
|
-
AbcnewsGoComExtractor: AbcnewsGoComExtractor,
|
|
7626
|
-
WwwNydailynewsComExtractor: WwwNydailynewsComExtractor,
|
|
7627
|
-
WwwCnbcComExtractor: WwwCnbcComExtractor,
|
|
7628
|
-
WwwPopsugarComExtractor: WwwPopsugarComExtractor,
|
|
7629
|
-
ObserverComExtractor: ObserverComExtractor,
|
|
7630
|
-
PeopleComExtractor: PeopleComExtractor,
|
|
7631
|
-
WwwUsmagazineComExtractor: WwwUsmagazineComExtractor,
|
|
7632
|
-
WwwRollingstoneComExtractor: WwwRollingstoneComExtractor,
|
|
7633
|
-
twofortysevensportsComExtractor: twofortysevensportsComExtractor,
|
|
7634
|
-
UproxxComExtractor: UproxxComExtractor,
|
|
7635
|
-
WwwEonlineComExtractor: WwwEonlineComExtractor,
|
|
7636
|
-
WwwMiamiheraldComExtractor: WwwMiamiheraldComExtractor,
|
|
7637
|
-
WwwRefinery29ComExtractor: WwwRefinery29ComExtractor,
|
|
7638
|
-
WwwMacrumorsComExtractor: WwwMacrumorsComExtractor,
|
|
7639
|
-
WwwAndroidcentralComExtractor: WwwAndroidcentralComExtractor,
|
|
7640
|
-
WwwSiComExtractor: WwwSiComExtractor,
|
|
7641
|
-
WwwRawstoryComExtractor: WwwRawstoryComExtractor,
|
|
7642
|
-
WwwCnetComExtractor: WwwCnetComExtractor,
|
|
7643
|
-
WwwTodayComExtractor: WwwTodayComExtractor,
|
|
7644
|
-
WwwAlComExtractor: WwwAlComExtractor,
|
|
7645
|
-
WwwThepennyhoarderComExtractor: WwwThepennyhoarderComExtractor,
|
|
7646
|
-
WwwWesternjournalismComExtractor: WwwWesternjournalismComExtractor,
|
|
7647
|
-
WwwAmericanowComExtractor: WwwAmericanowComExtractor,
|
|
7648
|
-
ScienceflyComExtractor: ScienceflyComExtractor,
|
|
7649
|
-
HellogigglesComExtractor: HellogigglesComExtractor,
|
|
7650
|
-
ThoughtcatalogComExtractor: ThoughtcatalogComExtractor,
|
|
7651
|
-
WwwInquisitrComExtractor: WwwInquisitrComExtractor,
|
|
7652
|
-
WwwNbcnewsComExtractor: WwwNbcnewsComExtractor,
|
|
7704
|
+
EconomictimesIndiatimesComExtractor: EconomictimesIndiatimesComExtractor,
|
|
7705
|
+
EpaperZeitDeExtractor: EpaperZeitDeExtractor,
|
|
7706
|
+
FactorioComExtractor: FactorioComExtractor,
|
|
7653
7707
|
FortuneComExtractor: FortuneComExtractor,
|
|
7654
|
-
WwwLinkedinComExtractor: WwwLinkedinComExtractor,
|
|
7655
|
-
ObamawhitehouseArchivesGovExtractor: ObamawhitehouseArchivesGovExtractor,
|
|
7656
|
-
WwwOpposingviewsComExtractor: WwwOpposingviewsComExtractor,
|
|
7657
|
-
WwwProspectmagazineCoUkExtractor: WwwProspectmagazineCoUkExtractor,
|
|
7658
7708
|
ForwardComExtractor: ForwardComExtractor,
|
|
7659
|
-
|
|
7709
|
+
GeniusComExtractor: GeniusComExtractor,
|
|
7710
|
+
GetnewsJpExtractor: GetnewsJpExtractor,
|
|
7711
|
+
GithubComExtractor: GithubComExtractor,
|
|
7712
|
+
GonintendoComExtractor: GonintendoComExtractor,
|
|
7660
7713
|
GothamistComExtractor: GothamistComExtractor,
|
|
7661
|
-
|
|
7662
|
-
|
|
7714
|
+
GrEuronewsComExtractor: GrEuronewsComExtractor,
|
|
7715
|
+
HellogigglesComExtractor: HellogigglesComExtractor,
|
|
7663
7716
|
IciRadioCanadaCaExtractor: IciRadioCanadaCaExtractor,
|
|
7664
|
-
|
|
7665
|
-
|
|
7666
|
-
|
|
7717
|
+
JapanCnetComExtractor: JapanCnetComExtractor,
|
|
7718
|
+
JapanZdnetComExtractor: JapanZdnetComExtractor,
|
|
7719
|
+
JvndbJvnJpExtractor: JvndbJvnJpExtractor,
|
|
7720
|
+
LittleThingsExtractor: LittleThingsExtractor,
|
|
7721
|
+
LodzSePlExtractor: LodzSePlExtractor,
|
|
7722
|
+
LublinSePlExtractor: LublinSePlExtractor,
|
|
7723
|
+
MSNExtractor: MSNExtractor,
|
|
7724
|
+
MaTtiasBeExtractor: MaTtiasBeExtractor,
|
|
7725
|
+
MashableComExtractor: MashableComExtractor,
|
|
7726
|
+
MediumExtractor: MediumExtractor,
|
|
7727
|
+
MobilesyrupComExtractor: MobilesyrupComExtractor,
|
|
7728
|
+
MoneyCnnComExtractor: MoneyCnnComExtractor,
|
|
7729
|
+
NYMagExtractor: NYMagExtractor,
|
|
7730
|
+
NYTimesExtractor: NYTimesExtractor,
|
|
7731
|
+
NewYorkerExtractor: NewYorkerExtractor,
|
|
7732
|
+
NewrepublicComExtractor: NewrepublicComExtractor,
|
|
7667
7733
|
NewsMynaviJpExtractor: NewsMynaviJpExtractor,
|
|
7668
|
-
|
|
7669
|
-
|
|
7670
|
-
|
|
7734
|
+
NewsNationalgeographicComExtractor: NewsNationalgeographicComExtractor,
|
|
7735
|
+
NewsPtsOrgTwExtractor: NewsPtsOrgTwExtractor,
|
|
7736
|
+
Nineto5googleComExtractor: Nineto5googleComExtractor,
|
|
7737
|
+
Nineto5linuxComExtractor: Nineto5linuxComExtractor,
|
|
7738
|
+
Nineto5macComExtractor: Nineto5macComExtractor,
|
|
7739
|
+
ObamawhitehouseArchivesGovExtractor: ObamawhitehouseArchivesGovExtractor,
|
|
7740
|
+
ObserverComExtractor: ObserverComExtractor,
|
|
7741
|
+
OrfAtExtractor: OrfAtExtractor,
|
|
7671
7742
|
OtrsComExtractor: OtrsComExtractor,
|
|
7672
|
-
|
|
7673
|
-
|
|
7674
|
-
|
|
7675
|
-
WwwSanwaCoJpExtractor: WwwSanwaCoJpExtractor,
|
|
7676
|
-
WwwElecomCoJpExtractor: WwwElecomCoJpExtractor,
|
|
7677
|
-
ScanNetsecurityNeJpExtractor: ScanNetsecurityNeJpExtractor,
|
|
7678
|
-
JvndbJvnJpExtractor: JvndbJvnJpExtractor,
|
|
7679
|
-
GeniusComExtractor: GeniusComExtractor,
|
|
7680
|
-
WwwJnsaOrgExtractor: WwwJnsaOrgExtractor,
|
|
7743
|
+
PagesixComExtractor: PagesixComExtractor,
|
|
7744
|
+
PastebinComExtractor: PastebinComExtractor,
|
|
7745
|
+
PeopleComExtractor: PeopleComExtractor,
|
|
7681
7746
|
PhpspotOrgExtractor: PhpspotOrgExtractor,
|
|
7682
|
-
|
|
7683
|
-
|
|
7684
|
-
|
|
7685
|
-
|
|
7686
|
-
|
|
7687
|
-
|
|
7688
|
-
|
|
7689
|
-
|
|
7690
|
-
DeadlineComExtractor: DeadlineComExtractor,
|
|
7691
|
-
WwwGizmodoJpExtractor: WwwGizmodoJpExtractor,
|
|
7692
|
-
GetnewsJpExtractor: GetnewsJpExtractor,
|
|
7693
|
-
WwwLifehackerJpExtractor: WwwLifehackerJpExtractor,
|
|
7747
|
+
PitchforkComExtractor: PitchforkComExtractor,
|
|
7748
|
+
PoliticoExtractor: PoliticoExtractor,
|
|
7749
|
+
PolitykaSePlExtractor: PolitykaSePlExtractor,
|
|
7750
|
+
PolskisamorzadSePlExtractor: PolskisamorzadSePlExtractor,
|
|
7751
|
+
PortalobronnySePlExtractor: PortalobronnySePlExtractor,
|
|
7752
|
+
QzComExtractor: QzComExtractor,
|
|
7753
|
+
ScanNetsecurityNeJpExtractor: ScanNetsecurityNeJpExtractor,
|
|
7754
|
+
ScienceflyComExtractor: ScienceflyComExtractor,
|
|
7694
7755
|
SectIijAdJpExtractor: SectIijAdJpExtractor,
|
|
7695
|
-
|
|
7696
|
-
|
|
7697
|
-
|
|
7756
|
+
SgNewsYahooComExtractor: SgNewsYahooComExtractor,
|
|
7757
|
+
SpektrumExtractor: SpektrumExtractor,
|
|
7758
|
+
SportSePlExtractor: SportSePlExtractor,
|
|
7759
|
+
SubstackComExtractor: SubstackComExtractor,
|
|
7760
|
+
SuperbizSePlExtractor: SuperbizSePlExtractor,
|
|
7761
|
+
SuperserialeSePlExtractor: SuperserialeSePlExtractor,
|
|
7762
|
+
SzczecinSePlExtractor: SzczecinSePlExtractor,
|
|
7763
|
+
TakagihiromitsuJpExtractor: TakagihiromitsuJpExtractor,
|
|
7764
|
+
TarnkappeInfoExtractor: TarnkappeInfoExtractor,
|
|
7765
|
+
TechcrunchComExtractor: TechcrunchComExtractor,
|
|
7698
7766
|
TechlogIijAdJpExtractor: TechlogIijAdJpExtractor,
|
|
7699
|
-
|
|
7700
|
-
|
|
7701
|
-
|
|
7702
|
-
|
|
7703
|
-
WwwPhoronixComExtractor: WwwPhoronixComExtractor,
|
|
7704
|
-
PitchforkComExtractor: PitchforkComExtractor,
|
|
7705
|
-
BiorxivOrgExtractor: BiorxivOrgExtractor,
|
|
7706
|
-
EpaperZeitDeExtractor: EpaperZeitDeExtractor,
|
|
7707
|
-
WwwLadbibleComExtractor: WwwLadbibleComExtractor,
|
|
7767
|
+
TerminaltroveComExtractor: TerminaltroveComExtractor,
|
|
7768
|
+
TheAtlanticExtractor: TheAtlanticExtractor,
|
|
7769
|
+
ThefederalistpapersOrgExtractor: ThefederalistpapersOrgExtractor,
|
|
7770
|
+
ThoughtcatalogComExtractor: ThoughtcatalogComExtractor,
|
|
7708
7771
|
TimesofindiaIndiatimesComExtractor: TimesofindiaIndiatimesComExtractor,
|
|
7709
|
-
|
|
7710
|
-
|
|
7711
|
-
|
|
7712
|
-
|
|
7713
|
-
|
|
7714
|
-
|
|
7715
|
-
|
|
7716
|
-
|
|
7717
|
-
|
|
7718
|
-
|
|
7772
|
+
TldrTechExtractor: TldrTechExtractor,
|
|
7773
|
+
TwitterExtractor: TwitterExtractor,
|
|
7774
|
+
UproxxComExtractor: UproxxComExtractor,
|
|
7775
|
+
WccftechComExtractor: WccftechComExtractor,
|
|
7776
|
+
WeeklyAsciiJpExtractor: WeeklyAsciiJpExtractor,
|
|
7777
|
+
WikiaExtractor: WikiaExtractor,
|
|
7778
|
+
WikipediaExtractor: WikipediaExtractor,
|
|
7779
|
+
WiredExtractor: WiredExtractor,
|
|
7780
|
+
WiredJpExtractor: WiredJpExtractor,
|
|
7781
|
+
WroclawSePlExtractor: WroclawSePlExtractor,
|
|
7719
7782
|
Www1pezeshkComExtractor: Www1pezeshkComExtractor,
|
|
7783
|
+
WwwAbendblattDeExtractor: WwwAbendblattDeExtractor,
|
|
7784
|
+
WwwAlComExtractor: WwwAlComExtractor,
|
|
7785
|
+
WwwAmericanowComExtractor: WwwAmericanowComExtractor,
|
|
7720
7786
|
WwwAndroidauthorityComExtractor: WwwAndroidauthorityComExtractor,
|
|
7721
|
-
|
|
7722
|
-
|
|
7723
|
-
|
|
7724
|
-
|
|
7787
|
+
WwwAndroidcentralComExtractor: WwwAndroidcentralComExtractor,
|
|
7788
|
+
WwwAnimenewsnetworkComExtractor: WwwAnimenewsnetworkComExtractor,
|
|
7789
|
+
WwwAolComExtractor: WwwAolComExtractor,
|
|
7790
|
+
WwwAsahiComExtractor: WwwAsahiComExtractor,
|
|
7791
|
+
WwwBlickDeExtractor: WwwBlickDeExtractor,
|
|
7792
|
+
WwwBloombergComExtractor: WwwBloombergComExtractor,
|
|
7793
|
+
WwwBustleComExtractor: WwwBustleComExtractor,
|
|
7794
|
+
WwwCbcCaExtractor: WwwCbcCaExtractor,
|
|
7795
|
+
WwwCbssportsComExtractor: WwwCbssportsComExtractor,
|
|
7725
7796
|
WwwChannelnewsasiaComExtractor: WwwChannelnewsasiaComExtractor,
|
|
7726
|
-
|
|
7797
|
+
WwwChicagotribuneComExtractor: WwwChicagotribuneComExtractor,
|
|
7798
|
+
WwwCnbcComExtractor: WwwCnbcComExtractor,
|
|
7799
|
+
WwwCnetComExtractor: WwwCnetComExtractor,
|
|
7800
|
+
WwwCnnComExtractor: WwwCnnComExtractor,
|
|
7801
|
+
WwwDmagazineComExtractor: WwwDmagazineComExtractor,
|
|
7802
|
+
WwwDwComExtractor: WwwDwComExtractor,
|
|
7803
|
+
WwwElecomCoJpExtractor: WwwElecomCoJpExtractor,
|
|
7804
|
+
WwwEngadgetComExtractor: WwwEngadgetComExtractor,
|
|
7805
|
+
WwwEonlineComExtractor: WwwEonlineComExtractor,
|
|
7806
|
+
WwwEuronewsComExtractor: WwwEuronewsComExtractor,
|
|
7807
|
+
WwwFastcompanyComExtractor: WwwFastcompanyComExtractor,
|
|
7808
|
+
WwwFlatpanelshdComExtractor: WwwFlatpanelshdComExtractor,
|
|
7809
|
+
WwwFoolComExtractor: WwwFoolComExtractor,
|
|
7810
|
+
WwwFortinetComExtractor: WwwFortinetComExtractor,
|
|
7811
|
+
WwwFrandroidComExtractor: WwwFrandroidComExtractor,
|
|
7812
|
+
WwwFuturaSciencesComExtractor: WwwFuturaSciencesComExtractor,
|
|
7813
|
+
WwwGizmodoJpExtractor: WwwGizmodoJpExtractor,
|
|
7814
|
+
WwwGrueneDeExtractor: WwwGrueneDeExtractor,
|
|
7815
|
+
WwwHardwarezoneComSgExtractor: WwwHardwarezoneComSgExtractor,
|
|
7727
7816
|
WwwHeiseDeExtractor: WwwHeiseDeExtractor,
|
|
7728
|
-
|
|
7729
|
-
|
|
7730
|
-
|
|
7731
|
-
|
|
7732
|
-
|
|
7733
|
-
|
|
7734
|
-
|
|
7735
|
-
|
|
7736
|
-
|
|
7737
|
-
|
|
7738
|
-
|
|
7739
|
-
LodzSePlExtractor: LodzSePlExtractor,
|
|
7740
|
-
WroclawSePlExtractor: WroclawSePlExtractor,
|
|
7741
|
-
LublinSePlExtractor: LublinSePlExtractor,
|
|
7742
|
-
BialystokSePlExtractor: BialystokSePlExtractor,
|
|
7817
|
+
WwwHuffingtonpostComExtractor: WwwHuffingtonpostComExtractor,
|
|
7818
|
+
WwwIlfattoquotidianoItExtractor: WwwIlfattoquotidianoItExtractor,
|
|
7819
|
+
WwwInfoqComExtractor: WwwInfoqComExtractor,
|
|
7820
|
+
WwwInquisitrComExtractor: WwwInquisitrComExtractor,
|
|
7821
|
+
WwwInvestmentexecutiveComExtractor: WwwInvestmentexecutiveComExtractor,
|
|
7822
|
+
WwwIpaGoJpExtractor: WwwIpaGoJpExtractor,
|
|
7823
|
+
WwwItmediaCoJpExtractor: WwwItmediaCoJpExtractor,
|
|
7824
|
+
WwwJalopnikComExtractor: WwwJalopnikComExtractor,
|
|
7825
|
+
WwwJnsaOrgExtractor: WwwJnsaOrgExtractor,
|
|
7826
|
+
WwwLadbibleComExtractor: WwwLadbibleComExtractor,
|
|
7827
|
+
WwwLatimesComExtractor: WwwLatimesComExtractor,
|
|
7743
7828
|
WwwLebensmittelwarnungDeExtractor: WwwLebensmittelwarnungDeExtractor,
|
|
7829
|
+
WwwLemondeFrExtractor: WwwLemondeFrExtractor,
|
|
7830
|
+
WwwLifehackerJpExtractor: WwwLifehackerJpExtractor,
|
|
7831
|
+
WwwLinkedinComExtractor: WwwLinkedinComExtractor,
|
|
7832
|
+
WwwMacrumorsComExtractor: WwwMacrumorsComExtractor,
|
|
7833
|
+
WwwMentalflossComExtractor: WwwMentalflossComExtractor,
|
|
7834
|
+
WwwMiamiheraldComExtractor: WwwMiamiheraldComExtractor,
|
|
7835
|
+
WwwMoongiftJpExtractor: WwwMoongiftJpExtractor,
|
|
7836
|
+
WwwMotorsportComExtractor: WwwMotorsportComExtractor,
|
|
7837
|
+
WwwMsnbcComExtractor: WwwMsnbcComExtractor,
|
|
7838
|
+
WwwNationalgeographicComExtractor: WwwNationalgeographicComExtractor,
|
|
7839
|
+
WwwNbcnewsComExtractor: WwwNbcnewsComExtractor,
|
|
7840
|
+
WwwNdtvComExtractor: WwwNdtvComExtractor,
|
|
7841
|
+
WwwNotebookcheckNetExtractor: WwwNotebookcheckNetExtractor,
|
|
7842
|
+
WwwNprOrgExtractor: WwwNprOrgExtractor,
|
|
7843
|
+
WwwNtvDeExtractor: WwwNtvDeExtractor,
|
|
7844
|
+
WwwNumeramaComExtractor: WwwNumeramaComExtractor,
|
|
7845
|
+
WwwNydailynewsComExtractor: WwwNydailynewsComExtractor,
|
|
7846
|
+
WwwOpposingviewsComExtractor: WwwOpposingviewsComExtractor,
|
|
7847
|
+
WwwOreillyCoJpExtractor: WwwOreillyCoJpExtractor,
|
|
7848
|
+
WwwOssnewsJpExtractor: WwwOssnewsJpExtractor,
|
|
7849
|
+
WwwPhoronixComExtractor: WwwPhoronixComExtractor,
|
|
7850
|
+
WwwPolygonComExtractor: WwwPolygonComExtractor,
|
|
7851
|
+
WwwPopsugarComExtractor: WwwPopsugarComExtractor,
|
|
7852
|
+
WwwProspectmagazineCoUkExtractor: WwwProspectmagazineCoUkExtractor,
|
|
7853
|
+
WwwPublickey1JpExtractor: WwwPublickey1JpExtractor,
|
|
7744
7854
|
WwwQbitaiComExtractor: WwwQbitaiComExtractor,
|
|
7745
|
-
|
|
7746
|
-
|
|
7855
|
+
WwwQdailyComExtractor: WwwQdailyComExtractor,
|
|
7856
|
+
WwwRawstoryComExtractor: WwwRawstoryComExtractor,
|
|
7857
|
+
WwwRbbtodayComExtractor: WwwRbbtodayComExtractor,
|
|
7858
|
+
WwwRecodeNetExtractor: WwwRecodeNetExtractor,
|
|
7859
|
+
WwwRedditComExtractor: WwwRedditComExtractor,
|
|
7860
|
+
WwwRefinery29ComExtractor: WwwRefinery29ComExtractor,
|
|
7861
|
+
WwwReutersComExtractor: WwwReutersComExtractor,
|
|
7862
|
+
WwwRollingstoneComExtractor: WwwRollingstoneComExtractor,
|
|
7863
|
+
WwwSanwaCoJpExtractor: WwwSanwaCoJpExtractor,
|
|
7864
|
+
WwwSbnationComExtractor: WwwSbnationComExtractor,
|
|
7865
|
+
WwwSePlExtractor: WwwSePlExtractor,
|
|
7866
|
+
WwwSiComExtractor: WwwSiComExtractor,
|
|
7867
|
+
WwwSlateComExtractor: WwwSlateComExtractor,
|
|
7868
|
+
WwwSpiegelDeExtractor: WwwSpiegelDeExtractor,
|
|
7747
7869
|
WwwTagesschauDeExtractor: WwwTagesschauDeExtractor,
|
|
7748
|
-
Nineto5googleComExtractor: Nineto5googleComExtractor,
|
|
7749
|
-
WwwEngadgetComExtractor: WwwEngadgetComExtractor,
|
|
7750
|
-
TarnkappeInfoExtractor: TarnkappeInfoExtractor,
|
|
7751
|
-
WwwVortezNetExtractor: WwwVortezNetExtractor,
|
|
7752
|
-
WwwPolygonComExtractor: WwwPolygonComExtractor,
|
|
7753
|
-
WwwThevergeComExtractor: WwwThevergeComExtractor,
|
|
7754
7870
|
WwwTechpowerupComExtractor: WwwTechpowerupComExtractor,
|
|
7755
|
-
WwwFlatpanelshdComExtractor: WwwFlatpanelshdComExtractor,
|
|
7756
|
-
Nineto5macComExtractor: Nineto5macComExtractor,
|
|
7757
|
-
WwwNotebookcheckNetExtractor: WwwNotebookcheckNetExtractor,
|
|
7758
|
-
WwwFuturaSciencesComExtractor: WwwFuturaSciencesComExtractor,
|
|
7759
|
-
SgNewsYahooComExtractor: SgNewsYahooComExtractor,
|
|
7760
|
-
GonintendoComExtractor: GonintendoComExtractor,
|
|
7761
|
-
OrfAtExtractor: OrfAtExtractor,
|
|
7762
|
-
WwwVideogameschronicleComExtractor: WwwVideogameschronicleComExtractor,
|
|
7763
|
-
WwwNumeramaComExtractor: WwwNumeramaComExtractor,
|
|
7764
|
-
TerminaltroveComExtractor: TerminaltroveComExtractor,
|
|
7765
|
-
NewsPtsOrgTwExtractor: NewsPtsOrgTwExtractor,
|
|
7766
7871
|
WwwThedriveComExtractor: WwwThedriveComExtractor,
|
|
7767
|
-
|
|
7768
|
-
|
|
7769
|
-
|
|
7872
|
+
WwwTheguardianComExtractor: WwwTheguardianComExtractor,
|
|
7873
|
+
WwwThepennyhoarderComExtractor: WwwThepennyhoarderComExtractor,
|
|
7874
|
+
WwwThepoliticalinsiderComExtractor: WwwThepoliticalinsiderComExtractor,
|
|
7875
|
+
WwwThevergeComExtractor: WwwThevergeComExtractor,
|
|
7876
|
+
WwwTmzComExtractor: WwwTmzComExtractor,
|
|
7877
|
+
WwwTodayComExtractor: WwwTodayComExtractor,
|
|
7770
7878
|
WwwTransfermarktDeExtractor: WwwTransfermarktDeExtractor,
|
|
7771
|
-
|
|
7772
|
-
|
|
7773
|
-
|
|
7774
|
-
|
|
7879
|
+
WwwTweaktownComExtractor: WwwTweaktownComExtractor,
|
|
7880
|
+
WwwUsmagazineComExtractor: WwwUsmagazineComExtractor,
|
|
7881
|
+
WwwVersantsComExtractor: WwwVersantsComExtractor,
|
|
7882
|
+
WwwVideogameschronicleComExtractor: WwwVideogameschronicleComExtractor,
|
|
7883
|
+
WwwVortezNetExtractor: WwwVortezNetExtractor,
|
|
7884
|
+
WwwVoxComExtractor: WwwVoxComExtractor,
|
|
7885
|
+
WwwWashingtonpostComExtractor: WwwWashingtonpostComExtractor,
|
|
7886
|
+
WwwWesternjournalismComExtractor: WwwWesternjournalismComExtractor,
|
|
7887
|
+
WwwYomiuriCoJpExtractor: WwwYomiuriCoJpExtractor,
|
|
7888
|
+
WwwYoutubeComExtractor: WwwYoutubeComExtractor,
|
|
7889
|
+
YahooExtractor: YahooExtractor,
|
|
7890
|
+
twofortysevensportsComExtractor: twofortysevensportsComExtractor
|
|
7775
7891
|
});
|
|
7776
7892
|
function ownKeys$5(e, r) {
|
|
7777
|
-
var t = _Object$
|
|
7778
|
-
if (_Object$
|
|
7779
|
-
var o = _Object$
|
|
7893
|
+
var t = _Object$keys$1(e);
|
|
7894
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
7895
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
7780
7896
|
r && (o = o.filter(function (r) {
|
|
7781
|
-
return _Object$
|
|
7897
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
7782
7898
|
})), t.push.apply(t, o);
|
|
7783
7899
|
}
|
|
7784
7900
|
return t;
|
|
@@ -7786,15 +7902,15 @@ function requireMercury() {
|
|
|
7786
7902
|
function _objectSpread$5(e) {
|
|
7787
7903
|
for (var r = 1; r < arguments.length; r++) {
|
|
7788
7904
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
7789
|
-
r % 2 ? ownKeys$5(Object(t),
|
|
7790
|
-
|
|
7791
|
-
}) : _Object$
|
|
7792
|
-
_Object$
|
|
7905
|
+
r % 2 ? ownKeys$5(Object(t), true).forEach(function (r) {
|
|
7906
|
+
_defineProperty$1(e, r, t[r]);
|
|
7907
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$5(Object(t)).forEach(function (r) {
|
|
7908
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
7793
7909
|
});
|
|
7794
7910
|
}
|
|
7795
7911
|
return e;
|
|
7796
7912
|
}
|
|
7797
|
-
var Extractors = _Object$
|
|
7913
|
+
var Extractors = _Object$keys$1(CustomExtractors).reduce(function (acc, key) {
|
|
7798
7914
|
var extractor = CustomExtractors[key];
|
|
7799
7915
|
return _objectSpread$5(_objectSpread$5({}, acc), mergeSupportedDomains(extractor));
|
|
7800
7916
|
}, {});
|
|
@@ -7862,9 +7978,9 @@ function requireMercury() {
|
|
|
7862
7978
|
if (TEXT_LINK_RE.test(dekText)) return null;
|
|
7863
7979
|
return normalizeSpaces(dekText.trim());
|
|
7864
7980
|
}
|
|
7865
|
-
|
|
7866
|
-
|
|
7867
|
-
|
|
7981
|
+
dayjs.extend(utc);
|
|
7982
|
+
dayjs.extend(timezonePlugin);
|
|
7983
|
+
dayjs.extend(customParseFormat);
|
|
7868
7984
|
var TIMEZONE_ABBR_RE = /\b(EST|EDT|CST|CDT|MST|MDT|PST|PDT|ET|CT|MT|PT|GMT|UTC)\b/gi;
|
|
7869
7985
|
// Check if string contains timezone offset info (e.g., +0000, GMT+0000, Z)
|
|
7870
7986
|
var HAS_TIMEZONE_RE = /([+-]\d{2}:?\d{2}|Z|\bGMT[+-]\d+|\bUTC\b)/i;
|
|
@@ -7884,53 +8000,53 @@ function requireMercury() {
|
|
|
7884
8000
|
}
|
|
7885
8001
|
function createDate(dateString, timezone, format) {
|
|
7886
8002
|
if (TIME_WITH_OFFSET_RE.test(dateString)) {
|
|
7887
|
-
return
|
|
8003
|
+
return dayjs(new Date(dateString));
|
|
7888
8004
|
}
|
|
7889
8005
|
if (TIME_AGO_STRING.test(dateString)) {
|
|
7890
8006
|
var fragments = TIME_AGO_STRING.exec(dateString);
|
|
7891
|
-
return
|
|
8007
|
+
return dayjs().subtract(fragments[1], fragments[2]);
|
|
7892
8008
|
}
|
|
7893
8009
|
if (TIME_NOW_STRING.test(dateString)) {
|
|
7894
|
-
return
|
|
8010
|
+
return dayjs();
|
|
7895
8011
|
}
|
|
7896
8012
|
var stringHasTimezone = hasTimezoneInfo(dateString);
|
|
7897
8013
|
var cleanedDateString = stripTimezoneAbbr(dateString);
|
|
7898
8014
|
if (stringHasTimezone) {
|
|
7899
8015
|
var _nativeDate = new Date(dateString);
|
|
7900
|
-
if (!_Number$
|
|
7901
|
-
return
|
|
8016
|
+
if (!_Number$isNaN(_nativeDate.getTime())) {
|
|
8017
|
+
return dayjs(_nativeDate);
|
|
7902
8018
|
}
|
|
7903
8019
|
}
|
|
7904
8020
|
if (timezone && !stringHasTimezone) {
|
|
7905
8021
|
if (format) {
|
|
7906
8022
|
var cleanedFormat = stripTimezoneFromFormat(format);
|
|
7907
8023
|
try {
|
|
7908
|
-
var _parsed =
|
|
8024
|
+
var _parsed = dayjs.tz(cleanedDateString, cleanedFormat, timezone);
|
|
7909
8025
|
if (_parsed.isValid()) return _parsed;
|
|
7910
8026
|
} catch (_unused) {
|
|
7911
8027
|
// Fall through
|
|
7912
8028
|
}
|
|
7913
8029
|
}
|
|
7914
8030
|
var _nativeDate2 = new Date(cleanedDateString);
|
|
7915
|
-
if (!_Number$
|
|
7916
|
-
return
|
|
8031
|
+
if (!_Number$isNaN(_nativeDate2.getTime())) {
|
|
8032
|
+
return dayjs(_nativeDate2).tz(timezone, true);
|
|
7917
8033
|
}
|
|
7918
|
-
var parsed =
|
|
8034
|
+
var parsed = dayjs(cleanedDateString);
|
|
7919
8035
|
if (parsed.isValid()) {
|
|
7920
8036
|
return parsed.tz(timezone, true);
|
|
7921
8037
|
}
|
|
7922
|
-
return
|
|
8038
|
+
return dayjs(null);
|
|
7923
8039
|
}
|
|
7924
8040
|
if (format) {
|
|
7925
8041
|
var _cleanedFormat = stripTimezoneFromFormat(format);
|
|
7926
|
-
var _parsed2 =
|
|
8042
|
+
var _parsed2 = dayjs(cleanedDateString, _cleanedFormat);
|
|
7927
8043
|
if (_parsed2.isValid()) return _parsed2;
|
|
7928
8044
|
}
|
|
7929
8045
|
var nativeDate = new Date(cleanedDateString);
|
|
7930
|
-
if (!_Number$
|
|
7931
|
-
return
|
|
8046
|
+
if (!_Number$isNaN(nativeDate.getTime())) {
|
|
8047
|
+
return dayjs(nativeDate);
|
|
7932
8048
|
}
|
|
7933
|
-
return
|
|
8049
|
+
return dayjs(cleanedDateString);
|
|
7934
8050
|
}
|
|
7935
8051
|
|
|
7936
8052
|
// Take a date published string, and hopefully return a date out of
|
|
@@ -7941,10 +8057,10 @@ function requireMercury() {
|
|
|
7941
8057
|
format = _ref.format;
|
|
7942
8058
|
// If string is in milliseconds or seconds, convert to int and return
|
|
7943
8059
|
if (MS_DATE_STRING.test(dateString)) {
|
|
7944
|
-
return new Date(
|
|
8060
|
+
return new Date(_parseInt$1(dateString, 10)).toISOString();
|
|
7945
8061
|
}
|
|
7946
8062
|
if (SEC_DATE_STRING.test(dateString)) {
|
|
7947
|
-
return new Date(
|
|
8063
|
+
return new Date(_parseInt$1(dateString, 10) * 1000).toISOString();
|
|
7948
8064
|
}
|
|
7949
8065
|
var date = createDate(dateString, timezone, format);
|
|
7950
8066
|
if (!date.isValid()) {
|
|
@@ -8018,13 +8134,13 @@ function requireMercury() {
|
|
|
8018
8134
|
acc[titleText] = acc[titleText] ? acc[titleText] + 1 : 1;
|
|
8019
8135
|
return acc;
|
|
8020
8136
|
}, {});
|
|
8021
|
-
var _Reflect$ownKeys$redu = _Reflect$
|
|
8137
|
+
var _Reflect$ownKeys$redu = _Reflect$ownKeys$1(termCounts).reduce(function (acc, key) {
|
|
8022
8138
|
if (acc[1] < termCounts[key]) {
|
|
8023
8139
|
return [key, termCounts[key]];
|
|
8024
8140
|
}
|
|
8025
8141
|
return acc;
|
|
8026
8142
|
}, [0, 0]),
|
|
8027
|
-
_Reflect$ownKeys$redu2 =
|
|
8143
|
+
_Reflect$ownKeys$redu2 = _slicedToArray$1(_Reflect$ownKeys$redu, 2),
|
|
8028
8144
|
maxTerm = _Reflect$ownKeys$redu2[0],
|
|
8029
8145
|
termCount = _Reflect$ownKeys$redu2[1];
|
|
8030
8146
|
|
|
@@ -8053,16 +8169,16 @@ function requireMercury() {
|
|
|
8053
8169
|
//
|
|
8054
8170
|
// Strip out the big TLDs - it just makes the matching a bit more
|
|
8055
8171
|
// accurate. Not the end of the world if it doesn't strip right.
|
|
8056
|
-
var _URL$parse =
|
|
8172
|
+
var _URL$parse = URL$1$1.parse(url),
|
|
8057
8173
|
host = _URL$parse.host;
|
|
8058
8174
|
var nakedDomain = host.replace(DOMAIN_ENDINGS_RE, '');
|
|
8059
8175
|
var startSlug = splitTitle[0].toLowerCase().replace(' ', '');
|
|
8060
|
-
var startSlugRatio =
|
|
8176
|
+
var startSlugRatio = wuzzy.levenshtein(startSlug, nakedDomain);
|
|
8061
8177
|
if (startSlugRatio > 0.4 && startSlug.length > 5) {
|
|
8062
8178
|
return splitTitle.slice(2).join('');
|
|
8063
8179
|
}
|
|
8064
8180
|
var endSlug = splitTitle.slice(-1)[0].toLowerCase().replace(' ', '');
|
|
8065
|
-
var endSlugRatio =
|
|
8181
|
+
var endSlugRatio = wuzzy.levenshtein(endSlug, nakedDomain);
|
|
8066
8182
|
if (endSlugRatio > 0.4 && endSlug.length >= 5) {
|
|
8067
8183
|
return splitTitle.slice(0, -2).join('');
|
|
8068
8184
|
}
|
|
@@ -8159,7 +8275,7 @@ function requireMercury() {
|
|
|
8159
8275
|
// First, look for special hNews based selectors and give them a big
|
|
8160
8276
|
// boost, if they exist
|
|
8161
8277
|
HNEWS_CONTENT_SELECTORS.forEach(function (_ref) {
|
|
8162
|
-
var _ref2 =
|
|
8278
|
+
var _ref2 = _slicedToArray$1(_ref, 2),
|
|
8163
8279
|
parentSelector = _ref2[0],
|
|
8164
8280
|
childSelector = _ref2[1];
|
|
8165
8281
|
$("".concat(parentSelector, " ").concat(childSelector)).each(function (index, node) {
|
|
@@ -8291,9 +8407,9 @@ function requireMercury() {
|
|
|
8291
8407
|
return $topCandidate;
|
|
8292
8408
|
}
|
|
8293
8409
|
function _createForOfIteratorHelper$2(r, e) {
|
|
8294
|
-
var t = "undefined" != typeof
|
|
8410
|
+
var t = "undefined" != typeof _Symbol$1 && r[_Symbol$iterator$1] || r["@@iterator"];
|
|
8295
8411
|
if (!t) {
|
|
8296
|
-
if (_Array$
|
|
8412
|
+
if (_Array$isArray$1(r) || (t = _unsupportedIterableToArray$2(r)) || e) {
|
|
8297
8413
|
t && (r = t);
|
|
8298
8414
|
var _n = 0,
|
|
8299
8415
|
F = function F() {};
|
|
@@ -8301,9 +8417,9 @@ function requireMercury() {
|
|
|
8301
8417
|
s: F,
|
|
8302
8418
|
n: function n() {
|
|
8303
8419
|
return _n >= r.length ? {
|
|
8304
|
-
done:
|
|
8420
|
+
done: true
|
|
8305
8421
|
} : {
|
|
8306
|
-
done:
|
|
8422
|
+
done: false,
|
|
8307
8423
|
value: r[_n++]
|
|
8308
8424
|
};
|
|
8309
8425
|
},
|
|
@@ -8316,8 +8432,8 @@ function requireMercury() {
|
|
|
8316
8432
|
throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.");
|
|
8317
8433
|
}
|
|
8318
8434
|
var o,
|
|
8319
|
-
a =
|
|
8320
|
-
u =
|
|
8435
|
+
a = true,
|
|
8436
|
+
u = false;
|
|
8321
8437
|
return {
|
|
8322
8438
|
s: function s() {
|
|
8323
8439
|
t = t.call(r);
|
|
@@ -8327,7 +8443,7 @@ function requireMercury() {
|
|
|
8327
8443
|
return a = r.done, r;
|
|
8328
8444
|
},
|
|
8329
8445
|
e: function e(r) {
|
|
8330
|
-
u =
|
|
8446
|
+
u = true, o = r;
|
|
8331
8447
|
},
|
|
8332
8448
|
f: function f() {
|
|
8333
8449
|
try {
|
|
@@ -8342,7 +8458,7 @@ function requireMercury() {
|
|
|
8342
8458
|
if (r) {
|
|
8343
8459
|
if ("string" == typeof r) return _arrayLikeToArray$2(r, a);
|
|
8344
8460
|
var t = {}.toString.call(r).slice(8, -1);
|
|
8345
|
-
return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? _Array$
|
|
8461
|
+
return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? _Array$from$1(r) : "Arguments" === t || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t) ? _arrayLikeToArray$2(r, a) : void 0;
|
|
8346
8462
|
}
|
|
8347
8463
|
}
|
|
8348
8464
|
function _arrayLikeToArray$2(r, a) {
|
|
@@ -8351,11 +8467,11 @@ function requireMercury() {
|
|
|
8351
8467
|
return n;
|
|
8352
8468
|
}
|
|
8353
8469
|
function ownKeys$4(e, r) {
|
|
8354
|
-
var t = _Object$
|
|
8355
|
-
if (_Object$
|
|
8356
|
-
var o = _Object$
|
|
8470
|
+
var t = _Object$keys$1(e);
|
|
8471
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
8472
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
8357
8473
|
r && (o = o.filter(function (r) {
|
|
8358
|
-
return _Object$
|
|
8474
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
8359
8475
|
})), t.push.apply(t, o);
|
|
8360
8476
|
}
|
|
8361
8477
|
return t;
|
|
@@ -8363,10 +8479,10 @@ function requireMercury() {
|
|
|
8363
8479
|
function _objectSpread$4(e) {
|
|
8364
8480
|
for (var r = 1; r < arguments.length; r++) {
|
|
8365
8481
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
8366
|
-
r % 2 ? ownKeys$4(Object(t),
|
|
8367
|
-
|
|
8368
|
-
}) : _Object$
|
|
8369
|
-
_Object$
|
|
8482
|
+
r % 2 ? ownKeys$4(Object(t), true).forEach(function (r) {
|
|
8483
|
+
_defineProperty$1(e, r, t[r]);
|
|
8484
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$4(Object(t)).forEach(function (r) {
|
|
8485
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
8370
8486
|
});
|
|
8371
8487
|
}
|
|
8372
8488
|
return e;
|
|
@@ -8413,8 +8529,7 @@ function requireMercury() {
|
|
|
8413
8529
|
|
|
8414
8530
|
// We didn't succeed on first pass, one by one disable our
|
|
8415
8531
|
// extraction opts and try again.
|
|
8416
|
-
|
|
8417
|
-
var _iterator = _createForOfIteratorHelper$2(_Reflect$ownKeys__default$1["default"](opts).filter(function (k) {
|
|
8532
|
+
var _iterator = _createForOfIteratorHelper$2(_Reflect$ownKeys$1(opts).filter(function (k) {
|
|
8418
8533
|
return opts[k] === true;
|
|
8419
8534
|
})),
|
|
8420
8535
|
_step;
|
|
@@ -8541,9 +8656,9 @@ function requireMercury() {
|
|
|
8541
8656
|
var bylineRe = /^[\n\s]*By/i;
|
|
8542
8657
|
var BYLINE_SELECTORS_RE = [['#byline', bylineRe], ['.byline', bylineRe]];
|
|
8543
8658
|
function _createForOfIteratorHelper$1(r, e) {
|
|
8544
|
-
var t = "undefined" != typeof
|
|
8659
|
+
var t = "undefined" != typeof _Symbol$1 && r[_Symbol$iterator$1] || r["@@iterator"];
|
|
8545
8660
|
if (!t) {
|
|
8546
|
-
if (_Array$
|
|
8661
|
+
if (_Array$isArray$1(r) || (t = _unsupportedIterableToArray$1(r)) || e) {
|
|
8547
8662
|
t && (r = t);
|
|
8548
8663
|
var _n = 0,
|
|
8549
8664
|
F = function F() {};
|
|
@@ -8551,9 +8666,9 @@ function requireMercury() {
|
|
|
8551
8666
|
s: F,
|
|
8552
8667
|
n: function n() {
|
|
8553
8668
|
return _n >= r.length ? {
|
|
8554
|
-
done:
|
|
8669
|
+
done: true
|
|
8555
8670
|
} : {
|
|
8556
|
-
done:
|
|
8671
|
+
done: false,
|
|
8557
8672
|
value: r[_n++]
|
|
8558
8673
|
};
|
|
8559
8674
|
},
|
|
@@ -8566,8 +8681,8 @@ function requireMercury() {
|
|
|
8566
8681
|
throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.");
|
|
8567
8682
|
}
|
|
8568
8683
|
var o,
|
|
8569
|
-
a =
|
|
8570
|
-
u =
|
|
8684
|
+
a = true,
|
|
8685
|
+
u = false;
|
|
8571
8686
|
return {
|
|
8572
8687
|
s: function s() {
|
|
8573
8688
|
t = t.call(r);
|
|
@@ -8577,7 +8692,7 @@ function requireMercury() {
|
|
|
8577
8692
|
return a = r.done, r;
|
|
8578
8693
|
},
|
|
8579
8694
|
e: function e(r) {
|
|
8580
|
-
u =
|
|
8695
|
+
u = true, o = r;
|
|
8581
8696
|
},
|
|
8582
8697
|
f: function f() {
|
|
8583
8698
|
try {
|
|
@@ -8592,7 +8707,7 @@ function requireMercury() {
|
|
|
8592
8707
|
if (r) {
|
|
8593
8708
|
if ("string" == typeof r) return _arrayLikeToArray$1(r, a);
|
|
8594
8709
|
var t = {}.toString.call(r).slice(8, -1);
|
|
8595
|
-
return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? _Array$
|
|
8710
|
+
return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? _Array$from$1(r) : "Arguments" === t || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t) ? _arrayLikeToArray$1(r, a) : void 0;
|
|
8596
8711
|
}
|
|
8597
8712
|
}
|
|
8598
8713
|
function _arrayLikeToArray$1(r, a) {
|
|
@@ -8621,12 +8736,11 @@ function requireMercury() {
|
|
|
8621
8736
|
|
|
8622
8737
|
// Last, use our looser regular-expression based selectors for
|
|
8623
8738
|
// potential authors.
|
|
8624
|
-
// eslint-disable-next-line no-restricted-syntax
|
|
8625
8739
|
var _iterator = _createForOfIteratorHelper$1(BYLINE_SELECTORS_RE),
|
|
8626
8740
|
_step;
|
|
8627
8741
|
try {
|
|
8628
8742
|
for (_iterator.s(); !(_step = _iterator.n()).done;) {
|
|
8629
|
-
var _step$value =
|
|
8743
|
+
var _step$value = _slicedToArray$1(_step.value, 2),
|
|
8630
8744
|
selector = _step$value[0],
|
|
8631
8745
|
regex = _step$value[1];
|
|
8632
8746
|
var node = $(selector);
|
|
@@ -8781,8 +8895,8 @@ function requireMercury() {
|
|
|
8781
8895
|
}
|
|
8782
8896
|
function scoreByDimensions($img) {
|
|
8783
8897
|
var score = 0;
|
|
8784
|
-
var width =
|
|
8785
|
-
var height =
|
|
8898
|
+
var width = _parseFloat$1($img.attr('width'));
|
|
8899
|
+
var height = _parseFloat$1($img.attr('height'));
|
|
8786
8900
|
var src = $img.attr('src');
|
|
8787
8901
|
|
|
8788
8902
|
// Penalty for skinny images
|
|
@@ -8809,9 +8923,9 @@ function requireMercury() {
|
|
|
8809
8923
|
return $imgs.length / 2 - index;
|
|
8810
8924
|
}
|
|
8811
8925
|
function _createForOfIteratorHelper(r, e) {
|
|
8812
|
-
var t = "undefined" != typeof
|
|
8926
|
+
var t = "undefined" != typeof _Symbol$1 && r[_Symbol$iterator$1] || r["@@iterator"];
|
|
8813
8927
|
if (!t) {
|
|
8814
|
-
if (_Array$
|
|
8928
|
+
if (_Array$isArray$1(r) || (t = _unsupportedIterableToArray(r)) || e) {
|
|
8815
8929
|
t && (r = t);
|
|
8816
8930
|
var _n = 0,
|
|
8817
8931
|
F = function F() {};
|
|
@@ -8819,9 +8933,9 @@ function requireMercury() {
|
|
|
8819
8933
|
s: F,
|
|
8820
8934
|
n: function n() {
|
|
8821
8935
|
return _n >= r.length ? {
|
|
8822
|
-
done:
|
|
8936
|
+
done: true
|
|
8823
8937
|
} : {
|
|
8824
|
-
done:
|
|
8938
|
+
done: false,
|
|
8825
8939
|
value: r[_n++]
|
|
8826
8940
|
};
|
|
8827
8941
|
},
|
|
@@ -8834,8 +8948,8 @@ function requireMercury() {
|
|
|
8834
8948
|
throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.");
|
|
8835
8949
|
}
|
|
8836
8950
|
var o,
|
|
8837
|
-
a =
|
|
8838
|
-
u =
|
|
8951
|
+
a = true,
|
|
8952
|
+
u = false;
|
|
8839
8953
|
return {
|
|
8840
8954
|
s: function s() {
|
|
8841
8955
|
t = t.call(r);
|
|
@@ -8845,7 +8959,7 @@ function requireMercury() {
|
|
|
8845
8959
|
return a = r.done, r;
|
|
8846
8960
|
},
|
|
8847
8961
|
e: function e(r) {
|
|
8848
|
-
u =
|
|
8962
|
+
u = true, o = r;
|
|
8849
8963
|
},
|
|
8850
8964
|
f: function f() {
|
|
8851
8965
|
try {
|
|
@@ -8860,7 +8974,7 @@ function requireMercury() {
|
|
|
8860
8974
|
if (r) {
|
|
8861
8975
|
if ("string" == typeof r) return _arrayLikeToArray(r, a);
|
|
8862
8976
|
var t = {}.toString.call(r).slice(8, -1);
|
|
8863
|
-
return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? _Array$
|
|
8977
|
+
return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? _Array$from$1(r) : "Arguments" === t || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t) ? _arrayLikeToArray(r, a) : void 0;
|
|
8864
8978
|
}
|
|
8865
8979
|
}
|
|
8866
8980
|
function _arrayLikeToArray(r, a) {
|
|
@@ -8916,10 +9030,10 @@ function requireMercury() {
|
|
|
8916
9030
|
score += scoreByPosition(imgs, index);
|
|
8917
9031
|
imgScores[src] = score;
|
|
8918
9032
|
});
|
|
8919
|
-
var _Reflect$ownKeys$redu = _Reflect$
|
|
9033
|
+
var _Reflect$ownKeys$redu = _Reflect$ownKeys$1(imgScores).reduce(function (acc, key) {
|
|
8920
9034
|
return imgScores[key] > acc[1] ? [key, imgScores[key]] : acc;
|
|
8921
9035
|
}, [null, 0]),
|
|
8922
|
-
_Reflect$ownKeys$redu2 =
|
|
9036
|
+
_Reflect$ownKeys$redu2 = _slicedToArray$1(_Reflect$ownKeys$redu, 2),
|
|
8923
9037
|
topUrl = _Reflect$ownKeys$redu2[0],
|
|
8924
9038
|
topScore = _Reflect$ownKeys$redu2[1];
|
|
8925
9039
|
if (topScore > 0) {
|
|
@@ -8929,7 +9043,6 @@ function requireMercury() {
|
|
|
8929
9043
|
|
|
8930
9044
|
// If nothing else worked, check to see if there are any really
|
|
8931
9045
|
// probable nodes in the doc, like <link rel="image_src" />.
|
|
8932
|
-
// eslint-disable-next-line no-restricted-syntax
|
|
8933
9046
|
var _iterator = _createForOfIteratorHelper(LEAD_IMAGE_URL_SELECTORS),
|
|
8934
9047
|
_step;
|
|
8935
9048
|
try {
|
|
@@ -8967,7 +9080,7 @@ function requireMercury() {
|
|
|
8967
9080
|
// sliding scale, subtract points from this link based on
|
|
8968
9081
|
// similarity.
|
|
8969
9082
|
if (score > 0) {
|
|
8970
|
-
var similarity = new
|
|
9083
|
+
var similarity = new difflib.SequenceMatcher(null, articleUrl, href).ratio();
|
|
8971
9084
|
// Subtract .1 from diff_percent when calculating modifier,
|
|
8972
9085
|
// which means that if it's less than 10% different, we give a
|
|
8973
9086
|
// bonus instead. Ex:
|
|
@@ -8987,7 +9100,7 @@ function requireMercury() {
|
|
|
8987
9100
|
// get scored, and sorted properly by score.
|
|
8988
9101
|
var score = 0;
|
|
8989
9102
|
if (IS_DIGIT_RE.test(linkText.trim())) {
|
|
8990
|
-
var linkTextAsNum =
|
|
9103
|
+
var linkTextAsNum = _parseInt$1(linkText, 10);
|
|
8991
9104
|
// If it's the first page, we already got it on the first call.
|
|
8992
9105
|
// Give it a negative score. Otherwise, up to page 10, give a
|
|
8993
9106
|
// small bonus.
|
|
@@ -9052,7 +9165,7 @@ function requireMercury() {
|
|
|
9052
9165
|
var positiveMatch = false;
|
|
9053
9166
|
var negativeMatch = false;
|
|
9054
9167
|
var score = 0;
|
|
9055
|
-
_Array$
|
|
9168
|
+
_Array$from$1(range(0, 4)).forEach(function () {
|
|
9056
9169
|
if ($parent.length === 0) {
|
|
9057
9170
|
return;
|
|
9058
9171
|
}
|
|
@@ -9100,7 +9213,7 @@ function requireMercury() {
|
|
|
9100
9213
|
return false;
|
|
9101
9214
|
}
|
|
9102
9215
|
var hostname = parsedUrl.hostname;
|
|
9103
|
-
var _URL$parse =
|
|
9216
|
+
var _URL$parse = URL$1$1.parse(href),
|
|
9104
9217
|
linkHost = _URL$parse.hostname;
|
|
9105
9218
|
|
|
9106
9219
|
// Domain mismatch.
|
|
@@ -9171,7 +9284,7 @@ function requireMercury() {
|
|
|
9171
9284
|
$ = _ref.$,
|
|
9172
9285
|
_ref$previousUrls = _ref.previousUrls,
|
|
9173
9286
|
previousUrls = _ref$previousUrls === void 0 ? [] : _ref$previousUrls;
|
|
9174
|
-
parsedUrl = parsedUrl ||
|
|
9287
|
+
parsedUrl = parsedUrl || URL$1$1.parse(articleUrl);
|
|
9175
9288
|
var baseRegex = makeBaseRegex(baseUrl);
|
|
9176
9289
|
var isWp = isWordpress($);
|
|
9177
9290
|
|
|
@@ -9222,7 +9335,7 @@ function requireMercury() {
|
|
|
9222
9335
|
possiblePage.score = score;
|
|
9223
9336
|
return possiblePages;
|
|
9224
9337
|
}, {});
|
|
9225
|
-
return _Reflect$
|
|
9338
|
+
return _Reflect$ownKeys$1(scoredPages).length === 0 ? null : scoredPages;
|
|
9226
9339
|
}
|
|
9227
9340
|
|
|
9228
9341
|
// Looks for and returns next page url
|
|
@@ -9234,7 +9347,7 @@ function requireMercury() {
|
|
|
9234
9347
|
parsedUrl = _ref.parsedUrl,
|
|
9235
9348
|
_ref$previousUrls = _ref.previousUrls,
|
|
9236
9349
|
previousUrls = _ref$previousUrls === void 0 ? [] : _ref$previousUrls;
|
|
9237
|
-
parsedUrl = parsedUrl ||
|
|
9350
|
+
parsedUrl = parsedUrl || URL$1$1.parse(url);
|
|
9238
9351
|
var articleUrl = removeAnchor(url);
|
|
9239
9352
|
var baseUrl = articleBaseUrl(url, parsedUrl);
|
|
9240
9353
|
var links = $('a[href]').toArray();
|
|
@@ -9252,7 +9365,7 @@ function requireMercury() {
|
|
|
9252
9365
|
|
|
9253
9366
|
// now that we've scored all possible pages,
|
|
9254
9367
|
// find the biggest one.
|
|
9255
|
-
var topPage = _Reflect$
|
|
9368
|
+
var topPage = _Reflect$ownKeys$1(scoredLinks).reduce(function (acc, link) {
|
|
9256
9369
|
var scoredLink = scoredLinks[link];
|
|
9257
9370
|
return scoredLink.score > acc.score ? scoredLink : acc;
|
|
9258
9371
|
}, {
|
|
@@ -9269,7 +9382,7 @@ function requireMercury() {
|
|
|
9269
9382
|
};
|
|
9270
9383
|
var CANONICAL_META_SELECTORS = ['og:url'];
|
|
9271
9384
|
function parseDomain(url) {
|
|
9272
|
-
var parsedUrl =
|
|
9385
|
+
var parsedUrl = URL$1$1.parse(url);
|
|
9273
9386
|
var hostname = parsedUrl.hostname;
|
|
9274
9387
|
return hostname;
|
|
9275
9388
|
}
|
|
@@ -9339,7 +9452,7 @@ function requireMercury() {
|
|
|
9339
9452
|
if (typeof str !== 'string' || str.length === 0) return '';
|
|
9340
9453
|
if (max === 0) return '';
|
|
9341
9454
|
opts = opts || {};
|
|
9342
|
-
_Object$
|
|
9455
|
+
_Object$keys$1(defaults).forEach(function (key) {
|
|
9343
9456
|
if (opts[key] === null || typeof opts[key] === 'undefined') {
|
|
9344
9457
|
opts[key] = defaults[key];
|
|
9345
9458
|
}
|
|
@@ -9392,11 +9505,11 @@ function requireMercury() {
|
|
|
9392
9505
|
}
|
|
9393
9506
|
};
|
|
9394
9507
|
function ownKeys$3(e, r) {
|
|
9395
|
-
var t = _Object$
|
|
9396
|
-
if (_Object$
|
|
9397
|
-
var o = _Object$
|
|
9508
|
+
var t = _Object$keys$1(e);
|
|
9509
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
9510
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
9398
9511
|
r && (o = o.filter(function (r) {
|
|
9399
|
-
return _Object$
|
|
9512
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
9400
9513
|
})), t.push.apply(t, o);
|
|
9401
9514
|
}
|
|
9402
9515
|
return t;
|
|
@@ -9404,10 +9517,10 @@ function requireMercury() {
|
|
|
9404
9517
|
function _objectSpread$3(e) {
|
|
9405
9518
|
for (var r = 1; r < arguments.length; r++) {
|
|
9406
9519
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
9407
|
-
r % 2 ? ownKeys$3(Object(t),
|
|
9408
|
-
|
|
9409
|
-
}) : _Object$
|
|
9410
|
-
_Object$
|
|
9520
|
+
r % 2 ? ownKeys$3(Object(t), true).forEach(function (r) {
|
|
9521
|
+
_defineProperty$1(e, r, t[r]);
|
|
9522
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$3(Object(t)).forEach(function (r) {
|
|
9523
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
9411
9524
|
});
|
|
9412
9525
|
}
|
|
9413
9526
|
return e;
|
|
@@ -9427,7 +9540,7 @@ function requireMercury() {
|
|
|
9427
9540
|
word_count: GenericWordCountExtractor.extract,
|
|
9428
9541
|
direction: function direction(_ref) {
|
|
9429
9542
|
var title = _ref.title;
|
|
9430
|
-
return
|
|
9543
|
+
return stringDirection.getDirection(title);
|
|
9431
9544
|
},
|
|
9432
9545
|
extract: function extract(options) {
|
|
9433
9546
|
var html = options.html,
|
|
@@ -9482,24 +9595,24 @@ function requireMercury() {
|
|
|
9482
9595
|
'meta[name="generator"][value="blogger"]': BloggerExtractor
|
|
9483
9596
|
};
|
|
9484
9597
|
function detectByHtml($) {
|
|
9485
|
-
var selector = _Reflect$
|
|
9598
|
+
var selector = _Reflect$ownKeys$1(Detectors).find(function (s) {
|
|
9486
9599
|
return $(s).length > 0;
|
|
9487
9600
|
});
|
|
9488
9601
|
return Detectors[selector];
|
|
9489
9602
|
}
|
|
9490
9603
|
function getExtractor(url, parsedUrl, $) {
|
|
9491
|
-
parsedUrl = parsedUrl ||
|
|
9604
|
+
parsedUrl = parsedUrl || URL$1$1.parse(url);
|
|
9492
9605
|
var _parsedUrl = parsedUrl,
|
|
9493
9606
|
hostname = _parsedUrl.hostname;
|
|
9494
9607
|
var baseDomain = hostname.split('.').slice(-2).join('.');
|
|
9495
9608
|
return apiExtractors[hostname] || apiExtractors[baseDomain] || Extractors[hostname] || Extractors[baseDomain] || detectByHtml($) || GenericExtractor;
|
|
9496
9609
|
}
|
|
9497
9610
|
function ownKeys$2(e, r) {
|
|
9498
|
-
var t = _Object$
|
|
9499
|
-
if (_Object$
|
|
9500
|
-
var o = _Object$
|
|
9611
|
+
var t = _Object$keys$1(e);
|
|
9612
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
9613
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
9501
9614
|
r && (o = o.filter(function (r) {
|
|
9502
|
-
return _Object$
|
|
9615
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
9503
9616
|
})), t.push.apply(t, o);
|
|
9504
9617
|
}
|
|
9505
9618
|
return t;
|
|
@@ -9507,10 +9620,10 @@ function requireMercury() {
|
|
|
9507
9620
|
function _objectSpread$2(e) {
|
|
9508
9621
|
for (var r = 1; r < arguments.length; r++) {
|
|
9509
9622
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
9510
|
-
r % 2 ? ownKeys$2(Object(t),
|
|
9511
|
-
|
|
9512
|
-
}) : _Object$
|
|
9513
|
-
_Object$
|
|
9623
|
+
r % 2 ? ownKeys$2(Object(t), true).forEach(function (r) {
|
|
9624
|
+
_defineProperty$1(e, r, t[r]);
|
|
9625
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$2(Object(t)).forEach(function (r) {
|
|
9626
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
9514
9627
|
});
|
|
9515
9628
|
}
|
|
9516
9629
|
return e;
|
|
@@ -9528,7 +9641,7 @@ function requireMercury() {
|
|
|
9528
9641
|
function transformElements($content, $, _ref2) {
|
|
9529
9642
|
var transforms = _ref2.transforms;
|
|
9530
9643
|
if (!transforms) return $content;
|
|
9531
|
-
_Reflect$
|
|
9644
|
+
_Reflect$ownKeys$1(transforms).forEach(function (key) {
|
|
9532
9645
|
var $matches = $(key, $content);
|
|
9533
9646
|
var value = transforms[key];
|
|
9534
9647
|
|
|
@@ -9552,13 +9665,13 @@ function requireMercury() {
|
|
|
9552
9665
|
}
|
|
9553
9666
|
function findMatchingSelector($, selectors, extractHtml, allowMultiple) {
|
|
9554
9667
|
return selectors.find(function (selector) {
|
|
9555
|
-
if (_Array$
|
|
9668
|
+
if (_Array$isArray$1(selector)) {
|
|
9556
9669
|
if (extractHtml) {
|
|
9557
9670
|
return selector.reduce(function (acc, s) {
|
|
9558
9671
|
return acc && $(s).length > 0;
|
|
9559
9672
|
}, true);
|
|
9560
9673
|
}
|
|
9561
|
-
var _selector =
|
|
9674
|
+
var _selector = _slicedToArray$1(selector, 2),
|
|
9562
9675
|
s = _selector[0],
|
|
9563
9676
|
attr = _selector[1];
|
|
9564
9677
|
return (allowMultiple || !allowMultiple && $(s).length === 1) && $(s).attr(attr) && $(s).attr(attr).trim() !== '';
|
|
@@ -9600,7 +9713,7 @@ function requireMercury() {
|
|
|
9600
9713
|
// multi-match selection, which allows the parser to choose several
|
|
9601
9714
|
// selectors to include in the result. Note that all selectors in the
|
|
9602
9715
|
// array must match in order for this selector to trigger
|
|
9603
|
-
if (_Array$
|
|
9716
|
+
if (_Array$isArray$1(matchingSelector)) {
|
|
9604
9717
|
$content = $(matchingSelector.join(','));
|
|
9605
9718
|
var $wrapper = $('<div></div>');
|
|
9606
9719
|
$content.each(function (_, element) {
|
|
@@ -9634,8 +9747,8 @@ function requireMercury() {
|
|
|
9634
9747
|
var result;
|
|
9635
9748
|
// if selector is an array (e.g., ['img', 'src']),
|
|
9636
9749
|
// extract the attr
|
|
9637
|
-
if (_Array$
|
|
9638
|
-
var _matchingSelector =
|
|
9750
|
+
if (_Array$isArray$1(matchingSelector)) {
|
|
9751
|
+
var _matchingSelector = _slicedToArray$1(matchingSelector, 3),
|
|
9639
9752
|
selector = _matchingSelector[0],
|
|
9640
9753
|
attr = _matchingSelector[1],
|
|
9641
9754
|
transform = _matchingSelector[2];
|
|
@@ -9652,7 +9765,7 @@ function requireMercury() {
|
|
|
9652
9765
|
return $(el).text().trim();
|
|
9653
9766
|
});
|
|
9654
9767
|
}
|
|
9655
|
-
result = _Array$
|
|
9768
|
+
result = _Array$isArray$1(result.toArray()) && allowMultiple ? result.toArray() : result[0];
|
|
9656
9769
|
// Allow custom extractor to skip default cleaner
|
|
9657
9770
|
// for this type; defaults to true
|
|
9658
9771
|
if (defaultCleaner && Cleaners[type]) {
|
|
@@ -9662,7 +9775,7 @@ function requireMercury() {
|
|
|
9662
9775
|
}
|
|
9663
9776
|
function selectExtendedTypes(extend, opts) {
|
|
9664
9777
|
var results = {};
|
|
9665
|
-
_Reflect$
|
|
9778
|
+
_Reflect$ownKeys$1(extend).forEach(function (t) {
|
|
9666
9779
|
if (!results[t]) {
|
|
9667
9780
|
results[t] = select(_objectSpread$2(_objectSpread$2({}, opts), {}, {
|
|
9668
9781
|
type: t,
|
|
@@ -9780,11 +9893,11 @@ function requireMercury() {
|
|
|
9780
9893
|
}
|
|
9781
9894
|
};
|
|
9782
9895
|
function ownKeys$1(e, r) {
|
|
9783
|
-
var t = _Object$
|
|
9784
|
-
if (_Object$
|
|
9785
|
-
var o = _Object$
|
|
9896
|
+
var t = _Object$keys$1(e);
|
|
9897
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
9898
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
9786
9899
|
r && (o = o.filter(function (r) {
|
|
9787
|
-
return _Object$
|
|
9900
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
9788
9901
|
})), t.push.apply(t, o);
|
|
9789
9902
|
}
|
|
9790
9903
|
return t;
|
|
@@ -9792,10 +9905,10 @@ function requireMercury() {
|
|
|
9792
9905
|
function _objectSpread$1(e) {
|
|
9793
9906
|
for (var r = 1; r < arguments.length; r++) {
|
|
9794
9907
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
9795
|
-
r % 2 ? ownKeys$1(Object(t),
|
|
9796
|
-
|
|
9797
|
-
}) : _Object$
|
|
9798
|
-
_Object$
|
|
9908
|
+
r % 2 ? ownKeys$1(Object(t), true).forEach(function (r) {
|
|
9909
|
+
_defineProperty$1(e, r, t[r]);
|
|
9910
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$1(Object(t)).forEach(function (r) {
|
|
9911
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
9799
9912
|
});
|
|
9800
9913
|
}
|
|
9801
9914
|
return e;
|
|
@@ -9804,9 +9917,9 @@ function requireMercury() {
|
|
|
9804
9917
|
return _collectAllPages.apply(this, arguments);
|
|
9805
9918
|
}
|
|
9806
9919
|
function _collectAllPages() {
|
|
9807
|
-
_collectAllPages =
|
|
9920
|
+
_collectAllPages = _asyncToGenerator(/*#__PURE__*/_regeneratorRuntime.mark(function _callee(_ref) {
|
|
9808
9921
|
var next_page_url, html, $, metaCache, result, Extractor, title, url, pages, previousUrls, extractorOpts, nextPageResult, word_count;
|
|
9809
|
-
return
|
|
9922
|
+
return _regeneratorRuntime.wrap(function (_context) {
|
|
9810
9923
|
while (1) switch (_context.prev = _context.next) {
|
|
9811
9924
|
case 0:
|
|
9812
9925
|
next_page_url = _ref.next_page_url, html = _ref.html, $ = _ref.$, metaCache = _ref.metaCache, result = _ref.result, Extractor = _ref.Extractor, title = _ref.title, url = _ref.url;
|
|
@@ -9821,7 +9934,6 @@ function requireMercury() {
|
|
|
9821
9934
|
break;
|
|
9822
9935
|
}
|
|
9823
9936
|
pages += 1;
|
|
9824
|
-
// eslint-disable-next-line no-await-in-loop
|
|
9825
9937
|
_context.next = 2;
|
|
9826
9938
|
return Resource.create(next_page_url);
|
|
9827
9939
|
case 2:
|
|
@@ -9862,11 +9974,11 @@ function requireMercury() {
|
|
|
9862
9974
|
}
|
|
9863
9975
|
var _excluded = ["html"];
|
|
9864
9976
|
function ownKeys(e, r) {
|
|
9865
|
-
var t = _Object$
|
|
9866
|
-
if (_Object$
|
|
9867
|
-
var o = _Object$
|
|
9977
|
+
var t = _Object$keys$1(e);
|
|
9978
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
9979
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
9868
9980
|
r && (o = o.filter(function (r) {
|
|
9869
|
-
return _Object$
|
|
9981
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
9870
9982
|
})), t.push.apply(t, o);
|
|
9871
9983
|
}
|
|
9872
9984
|
return t;
|
|
@@ -9874,10 +9986,10 @@ function requireMercury() {
|
|
|
9874
9986
|
function _objectSpread(e) {
|
|
9875
9987
|
for (var r = 1; r < arguments.length; r++) {
|
|
9876
9988
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
9877
|
-
r % 2 ? ownKeys(Object(t),
|
|
9878
|
-
|
|
9879
|
-
}) : _Object$
|
|
9880
|
-
_Object$
|
|
9989
|
+
r % 2 ? ownKeys(Object(t), true).forEach(function (r) {
|
|
9990
|
+
_defineProperty$1(e, r, t[r]);
|
|
9991
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys(Object(t)).forEach(function (r) {
|
|
9992
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
9881
9993
|
});
|
|
9882
9994
|
}
|
|
9883
9995
|
return e;
|
|
@@ -9885,12 +9997,12 @@ function requireMercury() {
|
|
|
9885
9997
|
var Parser = {
|
|
9886
9998
|
parse: function parse(url) {
|
|
9887
9999
|
var _arguments = arguments;
|
|
9888
|
-
return
|
|
10000
|
+
return _asyncToGenerator(/*#__PURE__*/_regeneratorRuntime.mark(function _callee() {
|
|
9889
10001
|
var _ref, html, opts, _opts$fetchAllPages, fetchAllPages, _opts$fallback, fallback, _opts$contentType, contentType, _opts$headers, headers, extend, customExtractor, parsedUrl, $, Extractor, metaCache, extendedTypes, result, _result, title, next_page_url, turndownService;
|
|
9890
|
-
return
|
|
10002
|
+
return _regeneratorRuntime.wrap(function (_context) {
|
|
9891
10003
|
while (1) switch (_context.prev = _context.next) {
|
|
9892
10004
|
case 0:
|
|
9893
|
-
_ref = _arguments.length > 1 && _arguments[1] !== undefined ? _arguments[1] : {}, html = _ref.html, opts =
|
|
10005
|
+
_ref = _arguments.length > 1 && _arguments[1] !== undefined ? _arguments[1] : {}, html = _ref.html, opts = _objectWithoutProperties(_ref, _excluded);
|
|
9894
10006
|
_opts$fetchAllPages = opts.fetchAllPages, fetchAllPages = _opts$fetchAllPages === void 0 ? true : _opts$fetchAllPages, _opts$fallback = opts.fallback, fallback = _opts$fallback === void 0 ? true : _opts$fallback, _opts$contentType = opts.contentType, contentType = _opts$contentType === void 0 ? 'html' : _opts$contentType, _opts$headers = opts.headers, headers = _opts$headers === void 0 ? {} : _opts$headers, extend = opts.extend, customExtractor = opts.customExtractor; // if no url was passed and this is the browser version,
|
|
9895
10007
|
// set url to window.location.href and load the html
|
|
9896
10008
|
// from the current page
|
|
@@ -9898,7 +10010,7 @@ function requireMercury() {
|
|
|
9898
10010
|
url = window.location.href; // eslint-disable-line no-undef
|
|
9899
10011
|
html = html || document.documentElement.outerHTML; // eslint-disable-line no-undef
|
|
9900
10012
|
}
|
|
9901
|
-
parsedUrl =
|
|
10013
|
+
parsedUrl = URL$1$1.parse(url);
|
|
9902
10014
|
if (validateUrl(parsedUrl)) {
|
|
9903
10015
|
_context.next = 1;
|
|
9904
10016
|
break;
|
|
@@ -9978,7 +10090,7 @@ function requireMercury() {
|
|
|
9978
10090
|
});
|
|
9979
10091
|
case 6:
|
|
9980
10092
|
if (contentType === 'markdown') {
|
|
9981
|
-
turndownService = new
|
|
10093
|
+
turndownService = new TurndownService();
|
|
9982
10094
|
result.content = turndownService.turndown(result.content);
|
|
9983
10095
|
} else if (contentType === 'text') {
|
|
9984
10096
|
result.content = $.text($(result.content));
|
|
@@ -10034,7 +10146,7 @@ function template(strings) {
|
|
|
10034
10146
|
}
|
|
10035
10147
|
var compiled = insertValues.apply(void 0, [strings].concat(values));
|
|
10036
10148
|
var _ref = compiled.match(bodyPattern) || [],
|
|
10037
|
-
_ref2 =
|
|
10149
|
+
_ref2 = _slicedToArray(_ref, 1),
|
|
10038
10150
|
body = _ref2[0];
|
|
10039
10151
|
var indentLevel = /^\s{0,4}(.+)$/g;
|
|
10040
10152
|
if (!body) {
|
|
@@ -10052,7 +10164,7 @@ function template(strings) {
|
|
|
10052
10164
|
|
|
10053
10165
|
var _templateObject$1;
|
|
10054
10166
|
function extractorTemplate (hostname, name) {
|
|
10055
|
-
return template(_templateObject$1 || (_templateObject$1 =
|
|
10167
|
+
return template(_templateObject$1 || (_templateObject$1 = _taggedTemplateLiteral(["\n export const ", " = {\n domain: '", "',\n\n title: {\n selectors: [\n // enter title selectors\n ],\n },\n\n author: {\n selectors: [\n // enter author selectors\n ],\n },\n\n date_published: {\n selectors: [\n // enter selectors\n ],\n },\n\n lead_image_url: {\n selectors: [\n // enter selectors\n ],\n },\n\n content: {\n selectors: [\n // enter content selectors\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ]\n },\n }\n "])), name, hostname);
|
|
10056
10168
|
}
|
|
10057
10169
|
|
|
10058
10170
|
var _templateObject, _templateObject2;
|
|
@@ -10061,10 +10173,10 @@ function testFor(key, value, dir) {
|
|
|
10061
10173
|
if (IGNORE.find(function (k) {
|
|
10062
10174
|
return k === key;
|
|
10063
10175
|
})) return '';
|
|
10064
|
-
return template(_templateObject || (_templateObject =
|
|
10176
|
+
return template(_templateObject || (_templateObject = _taggedTemplateLiteral(["\n it('returns the ", "', async () => {\n // To pass this test, fill out the ", " selector\n // in ", "/index.js.\n const { ", " } = await result\n\n // Update these values with the expected values from\n // the article.\n assert.strictEqual(", ", ", ")\n });\n "])), key, key, dir, key, key, value ? "`".concat(value, "`") : "''");
|
|
10065
10177
|
}
|
|
10066
10178
|
function extractorTestTemplate (file, url, dir, result, name) {
|
|
10067
|
-
return template(_templateObject2 || (_templateObject2 =
|
|
10179
|
+
return template(_templateObject2 || (_templateObject2 = _taggedTemplateLiteral(["\n import assert from 'assert';\n import * as cheerio from 'cheerio';\n\n import Parser from 'mercury';\n import getExtractor from 'extractors/get-extractor';\n import { excerptContent } from 'utils/text';\n\n const fs = require('fs');\n\n describe('", "', () => {\n describe('initial test case', () => {\n let result;\n let url;\n beforeAll(() => {\n url =\n '", "';\n const html =\n fs.readFileSync('", "');\n result =\n Parser.parse(url, { html, fallback: false });\n });\n\n it('is selected properly', () => {\n // This test should be passing by default.\n // It sanity checks that the correct parser\n // is being selected for URLs from this domain\n const extractor = getExtractor(url);\n assert.strictEqual(extractor.domain, new URL(url).hostname)\n })\n\n ", "\n\n it('returns the content', async () => {\n // To pass this test, fill out the content selector\n // in ", "/index.js.\n // You may also want to make use of the clean and transform\n // options.\n const { content } = await result;\n\n const $ = cheerio.load(content || '');\n\n const first13 = excerptContent($('*').first().text(), 13)\n\n // Update these values with the expected values from the article.\n // Add the first 13 words of the article here\n assert.strictEqual(first13, null);\n });\n });\n });\n "])), name, url, file, _Reflect$ownKeys(result).map(function (k) {
|
|
10068
10180
|
return testFor(k, result[k], dir);
|
|
10069
10181
|
}).join('\n\n'), dir);
|
|
10070
10182
|
}
|
|
@@ -10082,11 +10194,11 @@ var questions = [{
|
|
|
10082
10194
|
}];
|
|
10083
10195
|
var spinner;
|
|
10084
10196
|
function confirm(fn, args, msg, newParser) {
|
|
10085
|
-
spinner =
|
|
10197
|
+
spinner = ora({
|
|
10086
10198
|
text: msg
|
|
10087
10199
|
});
|
|
10088
10200
|
spinner.start();
|
|
10089
|
-
var result = fn.apply(void 0,
|
|
10201
|
+
var result = fn.apply(void 0, _toConsumableArray(args));
|
|
10090
10202
|
if (result && result.then) {
|
|
10091
10203
|
result.then(function (r) {
|
|
10092
10204
|
if (r && r.error) {
|
|
@@ -10106,8 +10218,8 @@ function confirm(fn, args, msg, newParser) {
|
|
|
10106
10218
|
return result;
|
|
10107
10219
|
}
|
|
10108
10220
|
function confirmCreateDir(dir, msg) {
|
|
10109
|
-
if (!
|
|
10110
|
-
confirm(
|
|
10221
|
+
if (!fs.existsSync(dir)) {
|
|
10222
|
+
confirm(fs.mkdirSync, [dir], msg);
|
|
10111
10223
|
}
|
|
10112
10224
|
}
|
|
10113
10225
|
function getDir(url) {
|
|
@@ -10120,7 +10232,7 @@ function scaffoldCustomParser(url) {
|
|
|
10120
10232
|
var _URL3 = new URL(url),
|
|
10121
10233
|
hostname = _URL3.hostname;
|
|
10122
10234
|
var newParser = false;
|
|
10123
|
-
if (!
|
|
10235
|
+
if (!fs.existsSync(dir)) {
|
|
10124
10236
|
newParser = true;
|
|
10125
10237
|
confirmCreateDir(dir, "Creating ".concat(hostname, " directory"));
|
|
10126
10238
|
confirmCreateDir("./fixtures/".concat(hostname), 'Creating fixtures directory');
|
|
@@ -10133,7 +10245,7 @@ var urlArg = process.argv[2];
|
|
|
10133
10245
|
if (urlArg) {
|
|
10134
10246
|
scaffoldCustomParser(urlArg);
|
|
10135
10247
|
} else {
|
|
10136
|
-
|
|
10248
|
+
inquirer.prompt(questions).then(function (answers) {
|
|
10137
10249
|
scaffoldCustomParser(answers.website);
|
|
10138
10250
|
});
|
|
10139
10251
|
}
|
|
@@ -10142,13 +10254,13 @@ function generateScaffold(url, file, result) {
|
|
|
10142
10254
|
hostname = _URL4.hostname;
|
|
10143
10255
|
var extractor = extractorTemplate(hostname, extractorName(hostname));
|
|
10144
10256
|
var extractorTest = extractorTestTemplate(file, url, getDir(url), result, extractorName(hostname));
|
|
10145
|
-
|
|
10146
|
-
|
|
10147
|
-
|
|
10257
|
+
fs.writeFileSync("".concat(getDir(url), "/index.js"), extractor);
|
|
10258
|
+
fs.writeFileSync("".concat(getDir(url), "/index.test.js"), extractorTest);
|
|
10259
|
+
fs.appendFileSync('./src/extractors/custom/index.js', exportString(url));
|
|
10148
10260
|
child_process.exec("npm run lint-fix-quiet -- ".concat(getDir(url), "/*.js"));
|
|
10149
10261
|
}
|
|
10150
10262
|
function savePage($, _ref, newParser) {
|
|
10151
|
-
var _ref2 =
|
|
10263
|
+
var _ref2 = _slicedToArray(_ref, 1),
|
|
10152
10264
|
url = _ref2[0];
|
|
10153
10265
|
var _URL5 = new URL(url),
|
|
10154
10266
|
hostname = _URL5.hostname;
|
|
@@ -10165,7 +10277,7 @@ function savePage($, _ref, newParser) {
|
|
|
10165
10277
|
}
|
|
10166
10278
|
});
|
|
10167
10279
|
var html = stripJunkTags($('*').first(), $, ['script']).html();
|
|
10168
|
-
|
|
10280
|
+
fs.writeFileSync(file, html);
|
|
10169
10281
|
Parser.parse(url, {
|
|
10170
10282
|
html: html
|
|
10171
10283
|
}).then(function (result) {
|