magpie-html 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,7 +1,7 @@
1
1
  import { Readability } from '@mozilla/readability';
2
2
  import { parseHTML as parseHTML$1 } from 'linkedom';
3
3
  import vm4 from 'vm';
4
- import { setTimeout as setTimeout$1, clearTimeout as clearTimeout$1, setInterval, clearInterval, setImmediate, clearImmediate } from 'timers';
4
+ import { setTimeout, clearTimeout, setInterval, clearInterval, setImmediate, clearImmediate } from 'timers';
5
5
 
6
6
  // src/content/quality.ts
7
7
  function countWords(text) {
@@ -832,6 +832,32 @@ function parseAtomDate(dateString) {
832
832
  }
833
833
 
834
834
  // src/feed/atom/extract-entry.ts
835
+ function extractAtomDate(element) {
836
+ let dateText = element.querySelector("updated")?.textContent;
837
+ if (dateText) {
838
+ const parsed = parseAtomDate(dateText);
839
+ if (parsed) return parsed;
840
+ }
841
+ dateText = element.querySelector("modified")?.textContent;
842
+ if (dateText) {
843
+ const parsed = parseAtomDate(dateText);
844
+ if (parsed) return parsed;
845
+ }
846
+ dateText = element.querySelector("issued")?.textContent;
847
+ if (dateText) {
848
+ const parsed = parseAtomDate(dateText);
849
+ if (parsed) return parsed;
850
+ }
851
+ const dcDateElements = element.children.filter((child) => child.tagName === "dc:date");
852
+ if (dcDateElements.length > 0) {
853
+ dateText = dcDateElements[0].textContent;
854
+ if (dateText) {
855
+ const parsed = parseAtomDate(dateText);
856
+ if (parsed) return parsed;
857
+ }
858
+ }
859
+ return null;
860
+ }
835
861
  function extractPerson(element) {
836
862
  const name = element.querySelector("name")?.textContent;
837
863
  if (!name) {
@@ -974,13 +1000,11 @@ function extractEntry(entryElement) {
974
1000
  if (!title) {
975
1001
  throw new Error("Invalid Atom entry: missing required <title> element");
976
1002
  }
977
- const updatedRaw = entryElement.querySelector("updated")?.textContent;
978
- if (!updatedRaw) {
979
- throw new Error("Invalid Atom entry: missing required <updated> element");
980
- }
981
- const updated = parseAtomDate(updatedRaw);
1003
+ const updated = extractAtomDate(entryElement);
982
1004
  if (!updated) {
983
- throw new Error("Invalid Atom entry: invalid <updated> date");
1005
+ throw new Error(
1006
+ "Invalid Atom entry: missing or invalid date (tried <updated>, <modified>, <issued>, <dc:date>)"
1007
+ );
984
1008
  }
985
1009
  const entry = {
986
1010
  id: cleanText(id),
@@ -1230,6 +1254,32 @@ function parseXML(xml) {
1230
1254
  }
1231
1255
 
1232
1256
  // src/feed/atom/extract-feed.ts
1257
+ function extractAtomDate2(element) {
1258
+ let dateText = element.querySelector("updated")?.textContent;
1259
+ if (dateText) {
1260
+ const parsed = parseAtomDate(dateText);
1261
+ if (parsed) return parsed;
1262
+ }
1263
+ dateText = element.querySelector("modified")?.textContent;
1264
+ if (dateText) {
1265
+ const parsed = parseAtomDate(dateText);
1266
+ if (parsed) return parsed;
1267
+ }
1268
+ dateText = element.querySelector("issued")?.textContent;
1269
+ if (dateText) {
1270
+ const parsed = parseAtomDate(dateText);
1271
+ if (parsed) return parsed;
1272
+ }
1273
+ const dcDateElements = element.children.filter((child) => child.tagName === "dc:date");
1274
+ if (dcDateElements.length > 0) {
1275
+ dateText = dcDateElements[0].textContent;
1276
+ if (dateText) {
1277
+ const parsed = parseAtomDate(dateText);
1278
+ if (parsed) return parsed;
1279
+ }
1280
+ }
1281
+ return null;
1282
+ }
1233
1283
  function extractPerson2(element) {
1234
1284
  const name = element.querySelector("name")?.textContent;
1235
1285
  if (!name) {
@@ -1377,13 +1427,11 @@ function extractFeed(xml) {
1377
1427
  if (!title) {
1378
1428
  throw new Error("Invalid Atom feed: missing required <title> element");
1379
1429
  }
1380
- const updatedRaw = feed.querySelector("updated")?.textContent;
1381
- if (!updatedRaw) {
1382
- throw new Error("Invalid Atom feed: missing required <updated> element");
1383
- }
1384
- const updated = parseAtomDate(updatedRaw);
1430
+ const updated = extractAtomDate2(feed);
1385
1431
  if (!updated) {
1386
- throw new Error("Invalid Atom feed: invalid <updated> date");
1432
+ throw new Error(
1433
+ "Invalid Atom feed: missing or invalid date (tried <updated>, <modified>, <issued>, <dc:date>)"
1434
+ );
1387
1435
  }
1388
1436
  const result = {
1389
1437
  id: cleanText(id),
@@ -2445,13 +2493,12 @@ async function pluck(input, init) {
2445
2493
  const startTime = Date.now();
2446
2494
  const options = normalizeOptions2(init);
2447
2495
  const originalUrl = typeof input === "string" || input instanceof URL ? String(input) : input.url;
2448
- const abortController = new AbortController();
2449
- const timeoutId = setTimeout(() => abortController.abort(), options.timeout);
2496
+ const signal = AbortSignal.timeout(options.timeout);
2450
2497
  try {
2451
2498
  const { response, redirectChain, redirectDuration } = await followRedirects(
2452
2499
  input,
2453
2500
  options,
2454
- abortController.signal
2501
+ signal
2455
2502
  );
2456
2503
  const finalUrl = response.url;
2457
2504
  if (options.throwOnHttpError && !response.ok) {
@@ -2488,15 +2535,13 @@ async function pluck(input, init) {
2488
2535
  if (error instanceof PluckTimeoutError || error instanceof PluckNetworkError) {
2489
2536
  throw error;
2490
2537
  }
2491
- if (error.name === "AbortError") {
2538
+ if (error.name === "TimeoutError") {
2492
2539
  throw new PluckTimeoutError(`Request timeout after ${options.timeout}ms`, options.timeout);
2493
2540
  }
2494
2541
  if (error instanceof TypeError) {
2495
2542
  throw new PluckNetworkError(`Network error: ${error.message}`, error);
2496
2543
  }
2497
2544
  throw error;
2498
- } finally {
2499
- clearTimeout(timeoutId);
2500
2545
  }
2501
2546
  }
2502
2547
  function normalizeOptions2(init) {
@@ -5111,8 +5156,8 @@ ${err.stack}` : ""}`.trim());
5111
5156
  }
5112
5157
  function installAsyncEnv(init) {
5113
5158
  const { globalObj } = init;
5114
- const hostSetTimeout = setTimeout$1;
5115
- const hostClearTimeout = clearTimeout$1;
5159
+ const hostSetTimeout = setTimeout;
5160
+ const hostClearTimeout = clearTimeout;
5116
5161
  const hostSetInterval = setInterval;
5117
5162
  const hostClearInterval = clearInterval;
5118
5163
  const hostSetImmediate = setImmediate;
@@ -6211,7 +6256,7 @@ function isNodeRuntime() {
6211
6256
  return typeof process !== "undefined" && typeof process.versions === "object" && typeof process.versions.node === "string";
6212
6257
  }
6213
6258
  function sleep(ms) {
6214
- return new Promise((resolve) => setTimeout$1(resolve, ms));
6259
+ return new Promise((resolve) => setTimeout(resolve, ms));
6215
6260
  }
6216
6261
  function normalizeInit(init) {
6217
6262
  return {