aeorank 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -2737,6 +2737,38 @@ function extractNavLinks(html, domain) {
2737
2737
  }
2738
2738
  return Array.from(paths);
2739
2739
  }
2740
+ function extractAllInternalLinks(html, domain, limit = 30) {
2741
+ const cleanDomain = domain.replace(/^www\./, "").toLowerCase();
2742
+ const hrefMatches = html.match(/href="([^"#]*)"/gi) || [];
2743
+ const paths = /* @__PURE__ */ new Set();
2744
+ for (const match of hrefMatches) {
2745
+ const href = match.match(/href="([^"#]*)"/i)?.[1];
2746
+ if (!href) continue;
2747
+ let path;
2748
+ if (href.startsWith("/")) {
2749
+ path = href;
2750
+ } else if (href.startsWith("http")) {
2751
+ try {
2752
+ const url = new URL(href);
2753
+ const linkDomain = url.hostname.replace(/^www\./, "").toLowerCase();
2754
+ if (linkDomain !== cleanDomain) continue;
2755
+ path = url.pathname;
2756
+ } catch {
2757
+ continue;
2758
+ }
2759
+ } else {
2760
+ continue;
2761
+ }
2762
+ path = path.replace(/\/+$/, "") || "/";
2763
+ if (path === "/") continue;
2764
+ if (path.includes("#") || path.includes("?")) continue;
2765
+ if (/\.(js|css|png|jpg|jpeg|gif|svg|ico|pdf|xml|txt|zip|woff|woff2|ttf|eot|mp4|webm|mp3)$/i.test(path)) continue;
2766
+ if (/^\/(api|wp-admin|wp-includes|wp-json|static|assets|_next|auth|login|signup|sign-up|register|cart|checkout|account|admin|cdn-cgi|feed|rss)\b/i.test(path)) continue;
2767
+ if (path.startsWith("mailto:") || path.startsWith("tel:")) continue;
2768
+ paths.add(path);
2769
+ }
2770
+ return Array.from(paths).sort((a, b) => a.split("/").length - b.split("/").length || a.localeCompare(b)).slice(0, limit);
2771
+ }
2740
2772
  function extractContentPagesFromSitemap(sitemapText, domain, limit = 6) {
2741
2773
  const urlBlocks = sitemapText.match(/<url>([\s\S]*?)<\/url>/gi) || [];
2742
2774
  const cleanDomain = domain.replace(/^www\./, "").toLowerCase();
@@ -2804,6 +2836,16 @@ async function fetchMultiPageData(siteData, options) {
2804
2836
  if (!existingUrls.has(url)) urlsToFetch.set(url, "content");
2805
2837
  }
2806
2838
  }
2839
+ const hasBlogSample = (siteData.blogSample?.length ?? 0) > 3;
2840
+ if (!hasBlogSample) {
2841
+ const allPaths = extractAllInternalLinks(siteData.homepage.text, siteData.domain, 30);
2842
+ for (const path of allPaths) {
2843
+ const url = `${baseUrl}${path}`;
2844
+ if (!existingUrls.has(url) && !urlsToFetch.has(url)) {
2845
+ urlsToFetch.set(url, "content");
2846
+ }
2847
+ }
2848
+ }
2807
2849
  const entries = Array.from(urlsToFetch.entries());
2808
2850
  if (entries.length === 0) return 0;
2809
2851
  const results = await Promise.all(entries.map(([url]) => fetchPage(url, timeoutMs)));