aeorank 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/browser.js CHANGED
@@ -2655,6 +2655,38 @@ function extractNavLinks(html, domain) {
2655
2655
  }
2656
2656
  return Array.from(paths);
2657
2657
  }
2658
+ function extractAllInternalLinks(html, domain, limit = 30) {
2659
+ const cleanDomain = domain.replace(/^www\./, "").toLowerCase();
2660
+ const hrefMatches = html.match(/href="([^"#]*)"/gi) || [];
2661
+ const paths = /* @__PURE__ */ new Set();
2662
+ for (const match of hrefMatches) {
2663
+ const href = match.match(/href="([^"#]*)"/i)?.[1];
2664
+ if (!href) continue;
2665
+ let path;
2666
+ if (href.startsWith("/")) {
2667
+ path = href;
2668
+ } else if (href.startsWith("http")) {
2669
+ try {
2670
+ const url = new URL(href);
2671
+ const linkDomain = url.hostname.replace(/^www\./, "").toLowerCase();
2672
+ if (linkDomain !== cleanDomain) continue;
2673
+ path = url.pathname;
2674
+ } catch {
2675
+ continue;
2676
+ }
2677
+ } else {
2678
+ continue;
2679
+ }
2680
+ path = path.replace(/\/+$/, "") || "/";
2681
+ if (path === "/") continue;
2682
+ if (path.includes("#") || path.includes("?")) continue;
2683
+ if (/\.(js|css|png|jpg|jpeg|gif|svg|ico|pdf|xml|txt|zip|woff|woff2|ttf|eot|mp4|webm|mp3)$/i.test(path)) continue;
2684
+ if (/^\/(api|wp-admin|wp-includes|wp-json|static|assets|_next|auth|login|signup|sign-up|register|cart|checkout|account|admin|cdn-cgi|feed|rss)\b/i.test(path)) continue;
2685
+ if (path.startsWith("mailto:") || path.startsWith("tel:")) continue;
2686
+ paths.add(path);
2687
+ }
2688
+ return Array.from(paths).sort((a, b) => a.split("/").length - b.split("/").length || a.localeCompare(b)).slice(0, limit);
2689
+ }
2658
2690
  function extractContentPagesFromSitemap(sitemapText, domain, limit = 6) {
2659
2691
  const urlBlocks = sitemapText.match(/<url>([\s\S]*?)<\/url>/gi) || [];
2660
2692
  const cleanDomain = domain.replace(/^www\./, "").toLowerCase();
@@ -2722,6 +2754,16 @@ async function fetchMultiPageData(siteData, options) {
2722
2754
  if (!existingUrls.has(url)) urlsToFetch.set(url, "content");
2723
2755
  }
2724
2756
  }
2757
+ const hasBlogSample = (siteData.blogSample?.length ?? 0) > 3;
2758
+ if (!hasBlogSample) {
2759
+ const allPaths = extractAllInternalLinks(siteData.homepage.text, siteData.domain, 30);
2760
+ for (const path of allPaths) {
2761
+ const url = `${baseUrl}${path}`;
2762
+ if (!existingUrls.has(url) && !urlsToFetch.has(url)) {
2763
+ urlsToFetch.set(url, "content");
2764
+ }
2765
+ }
2766
+ }
2725
2767
  const entries = Array.from(urlsToFetch.entries());
2726
2768
  if (entries.length === 0) return 0;
2727
2769
  const results = await Promise.all(entries.map(([url]) => fetchPage(url, timeoutMs)));