magpie-html 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +63 -18
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +67 -22
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { Readability } from '@mozilla/readability';
|
|
2
2
|
import { parseHTML as parseHTML$1 } from 'linkedom';
|
|
3
3
|
import vm4 from 'vm';
|
|
4
|
-
import { setTimeout
|
|
4
|
+
import { setTimeout, clearTimeout, setInterval, clearInterval, setImmediate, clearImmediate } from 'timers';
|
|
5
5
|
|
|
6
6
|
// src/content/quality.ts
|
|
7
7
|
function countWords(text) {
|
|
@@ -832,6 +832,32 @@ function parseAtomDate(dateString) {
|
|
|
832
832
|
}
|
|
833
833
|
|
|
834
834
|
// src/feed/atom/extract-entry.ts
|
|
835
|
+
function extractAtomDate(element) {
|
|
836
|
+
let dateText = element.querySelector("updated")?.textContent;
|
|
837
|
+
if (dateText) {
|
|
838
|
+
const parsed = parseAtomDate(dateText);
|
|
839
|
+
if (parsed) return parsed;
|
|
840
|
+
}
|
|
841
|
+
dateText = element.querySelector("modified")?.textContent;
|
|
842
|
+
if (dateText) {
|
|
843
|
+
const parsed = parseAtomDate(dateText);
|
|
844
|
+
if (parsed) return parsed;
|
|
845
|
+
}
|
|
846
|
+
dateText = element.querySelector("issued")?.textContent;
|
|
847
|
+
if (dateText) {
|
|
848
|
+
const parsed = parseAtomDate(dateText);
|
|
849
|
+
if (parsed) return parsed;
|
|
850
|
+
}
|
|
851
|
+
const dcDateElements = element.children.filter((child) => child.tagName === "dc:date");
|
|
852
|
+
if (dcDateElements.length > 0) {
|
|
853
|
+
dateText = dcDateElements[0].textContent;
|
|
854
|
+
if (dateText) {
|
|
855
|
+
const parsed = parseAtomDate(dateText);
|
|
856
|
+
if (parsed) return parsed;
|
|
857
|
+
}
|
|
858
|
+
}
|
|
859
|
+
return null;
|
|
860
|
+
}
|
|
835
861
|
function extractPerson(element) {
|
|
836
862
|
const name = element.querySelector("name")?.textContent;
|
|
837
863
|
if (!name) {
|
|
@@ -974,13 +1000,11 @@ function extractEntry(entryElement) {
|
|
|
974
1000
|
if (!title) {
|
|
975
1001
|
throw new Error("Invalid Atom entry: missing required <title> element");
|
|
976
1002
|
}
|
|
977
|
-
const
|
|
978
|
-
if (!updatedRaw) {
|
|
979
|
-
throw new Error("Invalid Atom entry: missing required <updated> element");
|
|
980
|
-
}
|
|
981
|
-
const updated = parseAtomDate(updatedRaw);
|
|
1003
|
+
const updated = extractAtomDate(entryElement);
|
|
982
1004
|
if (!updated) {
|
|
983
|
-
throw new Error(
|
|
1005
|
+
throw new Error(
|
|
1006
|
+
"Invalid Atom entry: missing or invalid date (tried <updated>, <modified>, <issued>, <dc:date>)"
|
|
1007
|
+
);
|
|
984
1008
|
}
|
|
985
1009
|
const entry = {
|
|
986
1010
|
id: cleanText(id),
|
|
@@ -1230,6 +1254,32 @@ function parseXML(xml) {
|
|
|
1230
1254
|
}
|
|
1231
1255
|
|
|
1232
1256
|
// src/feed/atom/extract-feed.ts
|
|
1257
|
+
function extractAtomDate2(element) {
|
|
1258
|
+
let dateText = element.querySelector("updated")?.textContent;
|
|
1259
|
+
if (dateText) {
|
|
1260
|
+
const parsed = parseAtomDate(dateText);
|
|
1261
|
+
if (parsed) return parsed;
|
|
1262
|
+
}
|
|
1263
|
+
dateText = element.querySelector("modified")?.textContent;
|
|
1264
|
+
if (dateText) {
|
|
1265
|
+
const parsed = parseAtomDate(dateText);
|
|
1266
|
+
if (parsed) return parsed;
|
|
1267
|
+
}
|
|
1268
|
+
dateText = element.querySelector("issued")?.textContent;
|
|
1269
|
+
if (dateText) {
|
|
1270
|
+
const parsed = parseAtomDate(dateText);
|
|
1271
|
+
if (parsed) return parsed;
|
|
1272
|
+
}
|
|
1273
|
+
const dcDateElements = element.children.filter((child) => child.tagName === "dc:date");
|
|
1274
|
+
if (dcDateElements.length > 0) {
|
|
1275
|
+
dateText = dcDateElements[0].textContent;
|
|
1276
|
+
if (dateText) {
|
|
1277
|
+
const parsed = parseAtomDate(dateText);
|
|
1278
|
+
if (parsed) return parsed;
|
|
1279
|
+
}
|
|
1280
|
+
}
|
|
1281
|
+
return null;
|
|
1282
|
+
}
|
|
1233
1283
|
function extractPerson2(element) {
|
|
1234
1284
|
const name = element.querySelector("name")?.textContent;
|
|
1235
1285
|
if (!name) {
|
|
@@ -1377,13 +1427,11 @@ function extractFeed(xml) {
|
|
|
1377
1427
|
if (!title) {
|
|
1378
1428
|
throw new Error("Invalid Atom feed: missing required <title> element");
|
|
1379
1429
|
}
|
|
1380
|
-
const
|
|
1381
|
-
if (!updatedRaw) {
|
|
1382
|
-
throw new Error("Invalid Atom feed: missing required <updated> element");
|
|
1383
|
-
}
|
|
1384
|
-
const updated = parseAtomDate(updatedRaw);
|
|
1430
|
+
const updated = extractAtomDate2(feed);
|
|
1385
1431
|
if (!updated) {
|
|
1386
|
-
throw new Error(
|
|
1432
|
+
throw new Error(
|
|
1433
|
+
"Invalid Atom feed: missing or invalid date (tried <updated>, <modified>, <issued>, <dc:date>)"
|
|
1434
|
+
);
|
|
1387
1435
|
}
|
|
1388
1436
|
const result = {
|
|
1389
1437
|
id: cleanText(id),
|
|
@@ -2445,13 +2493,12 @@ async function pluck(input, init) {
|
|
|
2445
2493
|
const startTime = Date.now();
|
|
2446
2494
|
const options = normalizeOptions2(init);
|
|
2447
2495
|
const originalUrl = typeof input === "string" || input instanceof URL ? String(input) : input.url;
|
|
2448
|
-
const
|
|
2449
|
-
const timeoutId = setTimeout(() => abortController.abort(), options.timeout);
|
|
2496
|
+
const signal = AbortSignal.timeout(options.timeout);
|
|
2450
2497
|
try {
|
|
2451
2498
|
const { response, redirectChain, redirectDuration } = await followRedirects(
|
|
2452
2499
|
input,
|
|
2453
2500
|
options,
|
|
2454
|
-
|
|
2501
|
+
signal
|
|
2455
2502
|
);
|
|
2456
2503
|
const finalUrl = response.url;
|
|
2457
2504
|
if (options.throwOnHttpError && !response.ok) {
|
|
@@ -2488,15 +2535,13 @@ async function pluck(input, init) {
|
|
|
2488
2535
|
if (error instanceof PluckTimeoutError || error instanceof PluckNetworkError) {
|
|
2489
2536
|
throw error;
|
|
2490
2537
|
}
|
|
2491
|
-
if (error.name === "
|
|
2538
|
+
if (error.name === "TimeoutError") {
|
|
2492
2539
|
throw new PluckTimeoutError(`Request timeout after ${options.timeout}ms`, options.timeout);
|
|
2493
2540
|
}
|
|
2494
2541
|
if (error instanceof TypeError) {
|
|
2495
2542
|
throw new PluckNetworkError(`Network error: ${error.message}`, error);
|
|
2496
2543
|
}
|
|
2497
2544
|
throw error;
|
|
2498
|
-
} finally {
|
|
2499
|
-
clearTimeout(timeoutId);
|
|
2500
2545
|
}
|
|
2501
2546
|
}
|
|
2502
2547
|
function normalizeOptions2(init) {
|
|
@@ -5111,8 +5156,8 @@ ${err.stack}` : ""}`.trim());
|
|
|
5111
5156
|
}
|
|
5112
5157
|
function installAsyncEnv(init) {
|
|
5113
5158
|
const { globalObj } = init;
|
|
5114
|
-
const hostSetTimeout = setTimeout
|
|
5115
|
-
const hostClearTimeout = clearTimeout
|
|
5159
|
+
const hostSetTimeout = setTimeout;
|
|
5160
|
+
const hostClearTimeout = clearTimeout;
|
|
5116
5161
|
const hostSetInterval = setInterval;
|
|
5117
5162
|
const hostClearInterval = clearInterval;
|
|
5118
5163
|
const hostSetImmediate = setImmediate;
|
|
@@ -6211,7 +6256,7 @@ function isNodeRuntime() {
|
|
|
6211
6256
|
return typeof process !== "undefined" && typeof process.versions === "object" && typeof process.versions.node === "string";
|
|
6212
6257
|
}
|
|
6213
6258
|
function sleep(ms) {
|
|
6214
|
-
return new Promise((resolve) => setTimeout
|
|
6259
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
6215
6260
|
}
|
|
6216
6261
|
function normalizeInit(init) {
|
|
6217
6262
|
return {
|