aeorank 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -3113,6 +3113,38 @@ function extractNavLinks(html, domain) {
3113
3113
  }
3114
3114
  return Array.from(paths);
3115
3115
  }
3116
+ function extractAllInternalLinks(html, domain, limit = 30) {
3117
+ const cleanDomain = domain.replace(/^www\./, "").toLowerCase();
3118
+ const hrefMatches = html.match(/href="([^"#]*)"/gi) || [];
3119
+ const paths = /* @__PURE__ */ new Set();
3120
+ for (const match of hrefMatches) {
3121
+ const href = match.match(/href="([^"#]*)"/i)?.[1];
3122
+ if (!href) continue;
3123
+ let path;
3124
+ if (href.startsWith("/")) {
3125
+ path = href;
3126
+ } else if (href.startsWith("http")) {
3127
+ try {
3128
+ const url = new URL(href);
3129
+ const linkDomain = url.hostname.replace(/^www\./, "").toLowerCase();
3130
+ if (linkDomain !== cleanDomain) continue;
3131
+ path = url.pathname;
3132
+ } catch {
3133
+ continue;
3134
+ }
3135
+ } else {
3136
+ continue;
3137
+ }
3138
+ path = path.replace(/\/+$/, "") || "/";
3139
+ if (path === "/") continue;
3140
+ if (path.includes("#") || path.includes("?")) continue;
3141
+ if (/\.(js|css|png|jpg|jpeg|gif|svg|ico|pdf|xml|txt|zip|woff|woff2|ttf|eot|mp4|webm|mp3)$/i.test(path)) continue;
3142
+ if (/^\/(api|wp-admin|wp-includes|wp-json|static|assets|_next|auth|login|signup|sign-up|register|cart|checkout|account|admin|cdn-cgi|feed|rss)\b/i.test(path)) continue;
3143
+ if (path.startsWith("mailto:") || path.startsWith("tel:")) continue;
3144
+ paths.add(path);
3145
+ }
3146
+ return Array.from(paths).sort((a, b) => a.split("/").length - b.split("/").length || a.localeCompare(b)).slice(0, limit);
3147
+ }
3116
3148
  function extractContentPagesFromSitemap(sitemapText, domain, limit = 6) {
3117
3149
  const urlBlocks = sitemapText.match(/<url>([\s\S]*?)<\/url>/gi) || [];
3118
3150
  const cleanDomain = domain.replace(/^www\./, "").toLowerCase();
@@ -3180,6 +3212,16 @@ async function fetchMultiPageData(siteData, options) {
3180
3212
  if (!existingUrls.has(url)) urlsToFetch.set(url, "content");
3181
3213
  }
3182
3214
  }
3215
+ const hasBlogSample = (siteData.blogSample?.length ?? 0) > 3;
3216
+ if (!hasBlogSample) {
3217
+ const allPaths = extractAllInternalLinks(siteData.homepage.text, siteData.domain, 30);
3218
+ for (const path of allPaths) {
3219
+ const url = `${baseUrl}${path}`;
3220
+ if (!existingUrls.has(url) && !urlsToFetch.has(url)) {
3221
+ urlsToFetch.set(url, "content");
3222
+ }
3223
+ }
3224
+ }
3183
3225
  const entries = Array.from(urlsToFetch.entries());
3184
3226
  if (entries.length === 0) return 0;
3185
3227
  const results = await Promise.all(entries.map(([url]) => fetchPage(url, timeoutMs)));