magpie-html 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -2353,6 +2353,378 @@ function parseFeedAs(content, format, baseUrl) {
2353
2353
  }
2354
2354
  }
2355
2355
 
2356
+ // src/feed/sitemap/xml-parser.ts
2357
+ function parseSitemapXML(xml) {
2358
+ const cleaned = cleanXMLDeclaration2(xml);
2359
+ const withoutDoctype = removeDoctype3(cleaned);
2360
+ const withoutComments = removeComments3(withoutDoctype);
2361
+ const root = parseElement3(withoutComments, 0).element;
2362
+ return root;
2363
+ }
2364
+ function cleanXMLDeclaration2(xml) {
2365
+ return xml.replace(/<\?xml[^?]*\?>/g, "").trim();
2366
+ }
2367
+ function removeDoctype3(xml) {
2368
+ return xml.replace(/<!DOCTYPE[^>]*>/gi, "");
2369
+ }
2370
+ function removeComments3(xml) {
2371
+ return xml.replace(/<!--[\s\S]*?-->/g, "");
2372
+ }
2373
+ function extractCDATA3(text) {
2374
+ const cdataMap = /* @__PURE__ */ new Map();
2375
+ let counter = 0;
2376
+ const processed = text.replace(/<!\[CDATA\[([\s\S]*?)\]\]>/g, (_match, content) => {
2377
+ const placeholder = `__CDATA_${counter}__`;
2378
+ cdataMap.set(placeholder, content);
2379
+ counter++;
2380
+ return placeholder;
2381
+ });
2382
+ return { text: processed, cdataMap };
2383
+ }
2384
+ function restoreCDATA3(text, cdataMap) {
2385
+ let result = text;
2386
+ for (const [placeholder, content] of cdataMap.entries()) {
2387
+ result = result.replace(placeholder, content);
2388
+ }
2389
+ return result;
2390
+ }
2391
+ function parseAttributes3(tagContent) {
2392
+ const attributes = {};
2393
+ const attrRegex = /(\S+)=["']([^"']*)["']/g;
2394
+ let match = attrRegex.exec(tagContent);
2395
+ while (match !== null) {
2396
+ attributes[match[1]] = match[2];
2397
+ match = attrRegex.exec(tagContent);
2398
+ }
2399
+ return attributes;
2400
+ }
2401
+ function findClosingTag3(xml, tagName, startPos) {
2402
+ const openTag = `<${tagName}`;
2403
+ const closeTag = `</${tagName}>`;
2404
+ let depth = 1;
2405
+ let pos = startPos;
2406
+ while (pos < xml.length && depth > 0) {
2407
+ const nextOpen = xml.indexOf(openTag, pos);
2408
+ const nextClose = xml.indexOf(closeTag, pos);
2409
+ if (nextClose === -1) {
2410
+ return -1;
2411
+ }
2412
+ if (nextOpen !== -1 && nextOpen < nextClose) {
2413
+ depth++;
2414
+ pos = nextOpen + openTag.length;
2415
+ } else {
2416
+ depth--;
2417
+ if (depth === 0) {
2418
+ return nextClose;
2419
+ }
2420
+ pos = nextClose + closeTag.length;
2421
+ }
2422
+ }
2423
+ return -1;
2424
+ }
2425
+ function parseElement3(xml, startPos, parent = null, cdataMap) {
2426
+ const extracted = cdataMap ? { text: xml, cdataMap } : extractCDATA3(xml);
2427
+ const cleanedXML = extracted.text;
2428
+ const currentCdataMap = extracted.cdataMap;
2429
+ const openTagStart = cleanedXML.indexOf("<", startPos);
2430
+ if (openTagStart === -1) {
2431
+ throw new Error("No opening tag found");
2432
+ }
2433
+ const openTagEnd = cleanedXML.indexOf(">", openTagStart);
2434
+ if (openTagEnd === -1) {
2435
+ throw new Error("Unclosed opening tag");
2436
+ }
2437
+ const openTagContent = cleanedXML.substring(openTagStart + 1, openTagEnd);
2438
+ const isSelfClosing = openTagContent.endsWith("/");
2439
+ const tagContent = isSelfClosing ? openTagContent.slice(0, -1).trim() : openTagContent;
2440
+ const spaceIndex = tagContent.indexOf(" ");
2441
+ const tagName = spaceIndex === -1 ? tagContent : tagContent.substring(0, spaceIndex);
2442
+ const attributes = spaceIndex === -1 ? {} : parseAttributes3(tagContent.substring(spaceIndex));
2443
+ const element = {
2444
+ tagName,
2445
+ attributes,
2446
+ text: "",
2447
+ children: [],
2448
+ parent
2449
+ };
2450
+ if (isSelfClosing) {
2451
+ return { element, endPos: openTagEnd + 1, cdataMap: currentCdataMap };
2452
+ }
2453
+ const closingTagPos = findClosingTag3(cleanedXML, tagName, openTagEnd + 1);
2454
+ if (closingTagPos === -1) {
2455
+ throw new Error(`No closing tag found for <${tagName}>`);
2456
+ }
2457
+ const content = cleanedXML.substring(openTagEnd + 1, closingTagPos);
2458
+ if (content.includes("<")) {
2459
+ let pos = 0;
2460
+ const trimmedContent = content.trim();
2461
+ while (pos < trimmedContent.length) {
2462
+ const nextTag = trimmedContent.indexOf("<", pos);
2463
+ if (nextTag === -1) break;
2464
+ if (trimmedContent[nextTag + 1] === "/" || trimmedContent[nextTag + 1] === "!") {
2465
+ pos = nextTag + 1;
2466
+ continue;
2467
+ }
2468
+ try {
2469
+ const { element: child, endPos } = parseElement3(
2470
+ trimmedContent,
2471
+ nextTag,
2472
+ element,
2473
+ currentCdataMap
2474
+ );
2475
+ element.children.push(child);
2476
+ pos = endPos;
2477
+ } catch {
2478
+ pos = nextTag + 1;
2479
+ }
2480
+ }
2481
+ let textContent = content.replace(/<[^>]+>/g, "").trim();
2482
+ textContent = restoreCDATA3(textContent, currentCdataMap);
2483
+ element.text = textContent;
2484
+ } else {
2485
+ let textContent = content.trim();
2486
+ textContent = restoreCDATA3(textContent, currentCdataMap);
2487
+ element.text = textContent;
2488
+ }
2489
+ const closingTagEnd = closingTagPos + `</${tagName}>`.length;
2490
+ return { element, endPos: closingTagEnd, cdataMap: currentCdataMap };
2491
+ }
2492
+ function querySelector3(element, selector, caseSensitive = false) {
2493
+ const tagName = caseSensitive ? selector : selector.toLowerCase();
2494
+ const elementTag = caseSensitive ? element.tagName : element.tagName.toLowerCase();
2495
+ if (elementTag === tagName) {
2496
+ return element;
2497
+ }
2498
+ for (const child of element.children) {
2499
+ const found = querySelector3(child, selector, caseSensitive);
2500
+ if (found) return found;
2501
+ }
2502
+ return null;
2503
+ }
2504
+ function querySelectorAll3(element, selector, caseSensitive = false) {
2505
+ const results = [];
2506
+ const tagName = caseSensitive ? selector : selector.toLowerCase();
2507
+ const elementTag = caseSensitive ? element.tagName : element.tagName.toLowerCase();
2508
+ if (elementTag === tagName) {
2509
+ results.push(element);
2510
+ }
2511
+ for (const child of element.children) {
2512
+ results.push(...querySelectorAll3(child, selector, caseSensitive));
2513
+ }
2514
+ return results;
2515
+ }
2516
+ function getText2(element) {
2517
+ return element?.text || "";
2518
+ }
2519
+ function getChild(element, tagName) {
2520
+ const lowerTag = tagName.toLowerCase();
2521
+ return element.children.find((c) => c.tagName.toLowerCase() === lowerTag) || null;
2522
+ }
2523
+ function getChildren(element, tagName) {
2524
+ const lowerTag = tagName.toLowerCase();
2525
+ return element.children.filter((c) => c.tagName.toLowerCase() === lowerTag);
2526
+ }
2527
+
2528
+ // src/feed/sitemap/parse.ts
2529
+ function parseSitemap(xml, baseUrl) {
2530
+ const doc = parseSitemapXML(xml);
2531
+ const sitemapIndex = querySelector3(doc, "sitemapindex");
2532
+ if (sitemapIndex) {
2533
+ return parseSitemapIndex(sitemapIndex, baseUrl);
2534
+ }
2535
+ const urlset = querySelector3(doc, "urlset");
2536
+ if (urlset) {
2537
+ return parseUrlset(urlset, baseUrl);
2538
+ }
2539
+ const urls = querySelectorAll3(doc, "url");
2540
+ if (urls.length > 0) {
2541
+ return {
2542
+ sitemap: {
2543
+ type: "urlset",
2544
+ urls: urls.map((url) => extractUrl(url, baseUrl)),
2545
+ sitemaps: []
2546
+ },
2547
+ isIndex: false
2548
+ };
2549
+ }
2550
+ return {
2551
+ sitemap: {
2552
+ type: "urlset",
2553
+ urls: [],
2554
+ sitemaps: []
2555
+ },
2556
+ isIndex: false
2557
+ };
2558
+ }
2559
+ function parseSitemapIndex(element, baseUrl) {
2560
+ const sitemapElements = getChildren(element, "sitemap");
2561
+ const sitemaps = sitemapElements.map((el) => {
2562
+ const loc = getText2(getChild(el, "loc"));
2563
+ const lastmod = getText2(getChild(el, "lastmod")) || void 0;
2564
+ return {
2565
+ loc: baseUrl ? normalizeUrlHttps(baseUrl, loc) : loc,
2566
+ lastmod
2567
+ };
2568
+ });
2569
+ return {
2570
+ sitemap: {
2571
+ type: "sitemapindex",
2572
+ urls: [],
2573
+ sitemaps
2574
+ },
2575
+ isIndex: true
2576
+ };
2577
+ }
2578
+ function parseUrlset(element, baseUrl) {
2579
+ const urlElements = getChildren(element, "url");
2580
+ const urls = urlElements.map((el) => extractUrl(el, baseUrl));
2581
+ return {
2582
+ sitemap: {
2583
+ type: "urlset",
2584
+ urls,
2585
+ sitemaps: []
2586
+ },
2587
+ isIndex: false
2588
+ };
2589
+ }
2590
+ function extractUrl(element, baseUrl) {
2591
+ const rawLoc = getText2(getChild(element, "loc"));
2592
+ const loc = decodeXmlEntities(rawLoc);
2593
+ const lastmod = getText2(getChild(element, "lastmod")) || void 0;
2594
+ const changefreq = getText2(getChild(element, "changefreq")) || void 0;
2595
+ const priorityText = getText2(getChild(element, "priority"));
2596
+ const priority = priorityText ? Number.parseFloat(priorityText) : void 0;
2597
+ const result = {
2598
+ loc: baseUrl ? normalizeUrlHttps(baseUrl, loc) : loc,
2599
+ lastmod,
2600
+ changefreq,
2601
+ priority: priority && !Number.isNaN(priority) ? priority : void 0
2602
+ };
2603
+ const news = extractNews(element);
2604
+ if (news) {
2605
+ result.news = news;
2606
+ }
2607
+ const images = extractImages(element, baseUrl);
2608
+ if (images.length > 0) {
2609
+ result.images = images;
2610
+ }
2611
+ const videos = extractVideos(element, baseUrl);
2612
+ if (videos.length > 0) {
2613
+ result.videos = videos;
2614
+ }
2615
+ return result;
2616
+ }
2617
+ function extractNews(urlElement) {
2618
+ const newsEl = getChild(urlElement, "news:news") || getChild(urlElement, "news") || urlElement.children.find((c) => c.tagName.toLowerCase().endsWith(":news"));
2619
+ if (!newsEl) {
2620
+ return void 0;
2621
+ }
2622
+ const news = {};
2623
+ const pubEl = getChild(newsEl, "news:publication") || getChild(newsEl, "publication") || newsEl.children.find((c) => c.tagName.toLowerCase().endsWith(":publication"));
2624
+ if (pubEl) {
2625
+ const name = getText2(getChild(pubEl, "news:name")) || getText2(getChild(pubEl, "name")) || getText2(pubEl.children.find((c) => c.tagName.toLowerCase().endsWith(":name")));
2626
+ const language = getText2(getChild(pubEl, "news:language")) || getText2(getChild(pubEl, "language")) || getText2(pubEl.children.find((c) => c.tagName.toLowerCase().endsWith(":language")));
2627
+ if (name || language) {
2628
+ news.publication = {
2629
+ name: name || void 0,
2630
+ language: language || void 0
2631
+ };
2632
+ }
2633
+ }
2634
+ const pubDate = getText2(getChild(newsEl, "news:publication_date")) || getText2(getChild(newsEl, "publication_date")) || getText2(newsEl.children.find((c) => c.tagName.toLowerCase().endsWith(":publication_date")));
2635
+ if (pubDate) {
2636
+ news.publicationDate = pubDate;
2637
+ }
2638
+ const title = getText2(getChild(newsEl, "news:title")) || getText2(getChild(newsEl, "title")) || getText2(newsEl.children.find((c) => c.tagName.toLowerCase().endsWith(":title")));
2639
+ if (title) {
2640
+ news.title = decodeXmlEntities(title);
2641
+ }
2642
+ const keywords = getText2(getChild(newsEl, "news:keywords")) || getText2(getChild(newsEl, "keywords")) || getText2(newsEl.children.find((c) => c.tagName.toLowerCase().endsWith(":keywords")));
2643
+ if (keywords) {
2644
+ news.keywords = keywords.split(",").map((k) => k.trim());
2645
+ }
2646
+ const stockTickers = getText2(getChild(newsEl, "news:stock_tickers")) || getText2(getChild(newsEl, "stock_tickers")) || getText2(newsEl.children.find((c) => c.tagName.toLowerCase().endsWith(":stock_tickers")));
2647
+ if (stockTickers) {
2648
+ news.stockTickers = stockTickers.split(",").map((t) => t.trim());
2649
+ }
2650
+ return Object.keys(news).length > 0 ? news : void 0;
2651
+ }
2652
+ function extractImages(urlElement, baseUrl) {
2653
+ const imageElements = urlElement.children.filter(
2654
+ (c) => c.tagName.toLowerCase() === "image:image" || c.tagName.toLowerCase() === "image" || c.tagName.toLowerCase().endsWith(":image")
2655
+ );
2656
+ return imageElements.map((imgEl) => {
2657
+ const loc = getText2(getChild(imgEl, "image:loc")) || getText2(getChild(imgEl, "loc")) || getText2(imgEl.children.find((c) => c.tagName.toLowerCase().endsWith(":loc")));
2658
+ if (!loc) return null;
2659
+ const image = {
2660
+ loc: baseUrl ? normalizeUrlHttps(baseUrl, loc) : loc
2661
+ };
2662
+ const caption = getText2(getChild(imgEl, "image:caption")) || getText2(getChild(imgEl, "caption")) || getText2(imgEl.children.find((c) => c.tagName.toLowerCase().endsWith(":caption")));
2663
+ if (caption) image.caption = decodeXmlEntities(caption);
2664
+ const geoLocation = getText2(getChild(imgEl, "image:geo_location")) || getText2(getChild(imgEl, "geo_location")) || getText2(imgEl.children.find((c) => c.tagName.toLowerCase().endsWith(":geo_location")));
2665
+ if (geoLocation) image.geoLocation = geoLocation;
2666
+ const title = getText2(getChild(imgEl, "image:title")) || getText2(getChild(imgEl, "title")) || getText2(imgEl.children.find((c) => c.tagName.toLowerCase().endsWith(":title")));
2667
+ if (title) image.title = decodeXmlEntities(title);
2668
+ const license = getText2(getChild(imgEl, "image:license")) || getText2(getChild(imgEl, "license")) || getText2(imgEl.children.find((c) => c.tagName.toLowerCase().endsWith(":license")));
2669
+ if (license) image.license = baseUrl ? normalizeUrlHttps(baseUrl, license) : license;
2670
+ return image;
2671
+ }).filter((img) => img !== null);
2672
+ }
2673
+ function extractVideos(urlElement, baseUrl) {
2674
+ const videoElements = urlElement.children.filter(
2675
+ (c) => c.tagName.toLowerCase() === "video:video" || c.tagName.toLowerCase() === "video" || c.tagName.toLowerCase().endsWith(":video")
2676
+ );
2677
+ return videoElements.map((vidEl) => {
2678
+ const thumbnailLoc = getText2(getChild(vidEl, "video:thumbnail_loc")) || getText2(getChild(vidEl, "thumbnail_loc")) || getText2(vidEl.children.find((c) => c.tagName.toLowerCase().endsWith(":thumbnail_loc")));
2679
+ const title = getText2(getChild(vidEl, "video:title")) || getText2(getChild(vidEl, "title")) || getText2(vidEl.children.find((c) => c.tagName.toLowerCase().endsWith(":title")));
2680
+ const description = getText2(getChild(vidEl, "video:description")) || getText2(getChild(vidEl, "description")) || getText2(vidEl.children.find((c) => c.tagName.toLowerCase().endsWith(":description")));
2681
+ if (!thumbnailLoc || !title || !description) return null;
2682
+ const video = {
2683
+ thumbnailLoc: baseUrl ? normalizeUrlHttps(baseUrl, thumbnailLoc) : thumbnailLoc,
2684
+ title: decodeXmlEntities(title),
2685
+ description: decodeXmlEntities(description)
2686
+ };
2687
+ const contentLoc = getText2(getChild(vidEl, "video:content_loc")) || getText2(getChild(vidEl, "content_loc")) || getText2(vidEl.children.find((c) => c.tagName.toLowerCase().endsWith(":content_loc")));
2688
+ if (contentLoc)
2689
+ video.contentLoc = baseUrl ? normalizeUrlHttps(baseUrl, contentLoc) : contentLoc;
2690
+ const playerLoc = getText2(getChild(vidEl, "video:player_loc")) || getText2(getChild(vidEl, "player_loc")) || getText2(vidEl.children.find((c) => c.tagName.toLowerCase().endsWith(":player_loc")));
2691
+ if (playerLoc) video.playerLoc = baseUrl ? normalizeUrlHttps(baseUrl, playerLoc) : playerLoc;
2692
+ const duration = getText2(getChild(vidEl, "video:duration")) || getText2(getChild(vidEl, "duration")) || getText2(vidEl.children.find((c) => c.tagName.toLowerCase().endsWith(":duration")));
2693
+ if (duration) {
2694
+ const dur = Number.parseInt(duration, 10);
2695
+ if (!Number.isNaN(dur)) video.duration = dur;
2696
+ }
2697
+ const rating = getText2(getChild(vidEl, "video:rating")) || getText2(getChild(vidEl, "rating")) || getText2(vidEl.children.find((c) => c.tagName.toLowerCase().endsWith(":rating")));
2698
+ if (rating) {
2699
+ const r = Number.parseFloat(rating);
2700
+ if (!Number.isNaN(r)) video.rating = r;
2701
+ }
2702
+ const viewCount = getText2(getChild(vidEl, "video:view_count")) || getText2(getChild(vidEl, "view_count")) || getText2(vidEl.children.find((c) => c.tagName.toLowerCase().endsWith(":view_count")));
2703
+ if (viewCount) {
2704
+ const vc = Number.parseInt(viewCount, 10);
2705
+ if (!Number.isNaN(vc)) video.viewCount = vc;
2706
+ }
2707
+ const publicationDate = getText2(getChild(vidEl, "video:publication_date")) || getText2(getChild(vidEl, "publication_date")) || getText2(vidEl.children.find((c) => c.tagName.toLowerCase().endsWith(":publication_date")));
2708
+ if (publicationDate) video.publicationDate = publicationDate;
2709
+ const familyFriendly = getText2(getChild(vidEl, "video:family_friendly")) || getText2(getChild(vidEl, "family_friendly")) || getText2(vidEl.children.find((c) => c.tagName.toLowerCase().endsWith(":family_friendly")));
2710
+ if (familyFriendly) {
2711
+ video.familyFriendly = familyFriendly.toLowerCase() === "yes";
2712
+ }
2713
+ const category = getText2(getChild(vidEl, "video:category")) || getText2(getChild(vidEl, "category")) || getText2(vidEl.children.find((c) => c.tagName.toLowerCase().endsWith(":category")));
2714
+ if (category) video.category = category;
2715
+ const tagElements = vidEl.children.filter(
2716
+ (c) => c.tagName.toLowerCase() === "video:tag" || c.tagName.toLowerCase() === "tag" || c.tagName.toLowerCase().endsWith(":tag")
2717
+ );
2718
+ if (tagElements.length > 0) {
2719
+ video.tags = tagElements.map((t) => getText2(t)).filter(Boolean);
2720
+ }
2721
+ return video;
2722
+ }).filter((vid) => vid !== null);
2723
+ }
2724
+ function decodeXmlEntities(text) {
2725
+ return text.replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&amp;/g, "&").replace(/&quot;/g, '"').replace(/&apos;/g, "'").replace(/&#(\d+);/g, (_, code) => String.fromCharCode(Number.parseInt(code, 10))).replace(/&#x([0-9a-fA-F]+);/g, (_, code) => String.fromCharCode(Number.parseInt(code, 16)));
2726
+ }
2727
+
2356
2728
  // src/pluck/types.ts
2357
2729
  var PluckError = class extends Error {
2358
2730
  constructor(message) {
@@ -2781,7 +3153,7 @@ function extractOpenGraph(doc) {
2781
3153
  if (Object.keys(audio).length > 0) {
2782
3154
  metadata.audio = audio;
2783
3155
  }
2784
- const images = extractImages(doc);
3156
+ const images = extractImages2(doc);
2785
3157
  if (images.length > 0) {
2786
3158
  metadata.images = images;
2787
3159
  }
@@ -2850,7 +3222,7 @@ function extractAudio(doc) {
2850
3222
  Object.entries(audio).filter(([_, value]) => value !== void 0)
2851
3223
  );
2852
3224
  }
2853
- function extractImages(doc) {
3225
+ function extractImages2(doc) {
2854
3226
  const images = [];
2855
3227
  const imageUrls = getAllMetaPropertyValues(doc, "og:image");
2856
3228
  const imageSecureUrls = getAllMetaPropertyValues(doc, "og:image:secure_url");
@@ -3944,9 +4316,63 @@ async function gatherFeed(url) {
3944
4316
  }
3945
4317
  const response = await pluck(feedUrl);
3946
4318
  const content = await response.textUtf8();
4319
+ const format = detectFormat(content);
4320
+ if (format === "sitemap") {
4321
+ return normalizeSitemapToFeed(content, response.finalUrl);
4322
+ }
3947
4323
  const result = parseFeed(content, response.finalUrl);
3948
4324
  return result.feed;
3949
4325
  }
4326
+ function normalizeSitemapToFeed(content, baseUrl) {
4327
+ const result = parseSitemap(content, baseUrl);
4328
+ if (result.isIndex) {
4329
+ const items2 = result.sitemap.sitemaps.map((sitemap, index) => ({
4330
+ id: sitemap.loc || `sitemap-${index}`,
4331
+ url: sitemap.loc,
4332
+ title: `Sitemap: ${sitemap.loc}`,
4333
+ modified: sitemap.lastmod
4334
+ }));
4335
+ return {
4336
+ format: "sitemap",
4337
+ title: "Sitemap Index",
4338
+ url: baseUrl,
4339
+ items: items2
4340
+ };
4341
+ }
4342
+ const items = result.sitemap.urls.map((url, index) => {
4343
+ const item = {
4344
+ id: url.loc || `url-${index}`,
4345
+ url: url.loc,
4346
+ modified: url.lastmod
4347
+ };
4348
+ if (url.news) {
4349
+ item.title = url.news.title;
4350
+ item.published = url.news.publicationDate;
4351
+ if (url.news.publication?.name) {
4352
+ item.authors = [{ name: url.news.publication.name }];
4353
+ }
4354
+ if (url.news.keywords) {
4355
+ item.tags = url.news.keywords;
4356
+ }
4357
+ }
4358
+ if (url.images && url.images.length > 0) {
4359
+ item.image = url.images[0].loc;
4360
+ }
4361
+ return item;
4362
+ });
4363
+ let title = "Sitemap";
4364
+ try {
4365
+ const urlObj = new URL(baseUrl);
4366
+ title = `${urlObj.hostname} Sitemap`;
4367
+ } catch {
4368
+ }
4369
+ return {
4370
+ format: "sitemap",
4371
+ title,
4372
+ url: baseUrl,
4373
+ items
4374
+ };
4375
+ }
3950
4376
 
3951
4377
  // src/metadata/feed-discovery/heuristics.ts
3952
4378
  var COMMON_FEED_PATHS = [
@@ -4253,7 +4679,7 @@ function extractAnalytics(doc) {
4253
4679
  function extractAssets(doc, baseUrl) {
4254
4680
  const metadata = {};
4255
4681
  const effectiveBaseUrl = getEffectiveBaseUrl2(doc, baseUrl);
4256
- const images = extractImages2(doc, effectiveBaseUrl);
4682
+ const images = extractImages3(doc, effectiveBaseUrl);
4257
4683
  if (images.length > 0) {
4258
4684
  metadata.images = images;
4259
4685
  }
@@ -4307,7 +4733,7 @@ function getEffectiveBaseUrl2(doc, baseUrl) {
4307
4733
  }
4308
4734
  return null;
4309
4735
  }
4310
- function extractImages2(doc, baseUrl) {
4736
+ function extractImages3(doc, baseUrl) {
4311
4737
  const urls = /* @__PURE__ */ new Set();
4312
4738
  const imgElements = doc.querySelectorAll("img[src]");
4313
4739
  for (const img of Array.from(imgElements)) {
@@ -4725,7 +5151,7 @@ function extractMonetization(doc) {
4725
5151
  }
4726
5152
 
4727
5153
  // src/metadata/news/extract.ts
4728
- function extractNews(doc) {
5154
+ function extractNews2(doc) {
4729
5155
  const metadata = {};
4730
5156
  const newsKeywords = getMetaContent(doc, "news_keywords");
4731
5157
  if (newsKeywords) {
@@ -6969,7 +7395,7 @@ exports.extractIcons = extractIcons;
6969
7395
  exports.extractLanguage = extractLanguage;
6970
7396
  exports.extractLinks = extractLinks3;
6971
7397
  exports.extractMonetization = extractMonetization;
6972
- exports.extractNews = extractNews;
7398
+ exports.extractNews = extractNews2;
6973
7399
  exports.extractOpenGraph = extractOpenGraph;
6974
7400
  exports.extractPagination = extractPagination;
6975
7401
  exports.extractRobots = extractRobots;