aeorank 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.js +42 -0
- package/dist/browser.js.map +1 -1
- package/dist/cli.js +42 -0
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +42 -0
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +42 -0
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -2737,6 +2737,38 @@ function extractNavLinks(html, domain) {
|
|
|
2737
2737
|
}
|
|
2738
2738
|
return Array.from(paths);
|
|
2739
2739
|
}
|
|
2740
|
+
function extractAllInternalLinks(html, domain, limit = 30) {
|
|
2741
|
+
const cleanDomain = domain.replace(/^www\./, "").toLowerCase();
|
|
2742
|
+
const hrefMatches = html.match(/href="([^"#]*)"/gi) || [];
|
|
2743
|
+
const paths = /* @__PURE__ */ new Set();
|
|
2744
|
+
for (const match of hrefMatches) {
|
|
2745
|
+
const href = match.match(/href="([^"#]*)"/i)?.[1];
|
|
2746
|
+
if (!href) continue;
|
|
2747
|
+
let path;
|
|
2748
|
+
if (href.startsWith("/")) {
|
|
2749
|
+
path = href;
|
|
2750
|
+
} else if (href.startsWith("http")) {
|
|
2751
|
+
try {
|
|
2752
|
+
const url = new URL(href);
|
|
2753
|
+
const linkDomain = url.hostname.replace(/^www\./, "").toLowerCase();
|
|
2754
|
+
if (linkDomain !== cleanDomain) continue;
|
|
2755
|
+
path = url.pathname;
|
|
2756
|
+
} catch {
|
|
2757
|
+
continue;
|
|
2758
|
+
}
|
|
2759
|
+
} else {
|
|
2760
|
+
continue;
|
|
2761
|
+
}
|
|
2762
|
+
path = path.replace(/\/+$/, "") || "/";
|
|
2763
|
+
if (path === "/") continue;
|
|
2764
|
+
if (path.includes("#") || path.includes("?")) continue;
|
|
2765
|
+
if (/\.(js|css|png|jpg|jpeg|gif|svg|ico|pdf|xml|txt|zip|woff|woff2|ttf|eot|mp4|webm|mp3)$/i.test(path)) continue;
|
|
2766
|
+
if (/^\/(api|wp-admin|wp-includes|wp-json|static|assets|_next|auth|login|signup|sign-up|register|cart|checkout|account|admin|cdn-cgi|feed|rss)\b/i.test(path)) continue;
|
|
2767
|
+
if (path.startsWith("mailto:") || path.startsWith("tel:")) continue;
|
|
2768
|
+
paths.add(path);
|
|
2769
|
+
}
|
|
2770
|
+
return Array.from(paths).sort((a, b) => a.split("/").length - b.split("/").length || a.localeCompare(b)).slice(0, limit);
|
|
2771
|
+
}
|
|
2740
2772
|
function extractContentPagesFromSitemap(sitemapText, domain, limit = 6) {
|
|
2741
2773
|
const urlBlocks = sitemapText.match(/<url>([\s\S]*?)<\/url>/gi) || [];
|
|
2742
2774
|
const cleanDomain = domain.replace(/^www\./, "").toLowerCase();
|
|
@@ -2804,6 +2836,16 @@ async function fetchMultiPageData(siteData, options) {
|
|
|
2804
2836
|
if (!existingUrls.has(url)) urlsToFetch.set(url, "content");
|
|
2805
2837
|
}
|
|
2806
2838
|
}
|
|
2839
|
+
const hasBlogSample = (siteData.blogSample?.length ?? 0) > 3;
|
|
2840
|
+
if (!hasBlogSample) {
|
|
2841
|
+
const allPaths = extractAllInternalLinks(siteData.homepage.text, siteData.domain, 30);
|
|
2842
|
+
for (const path of allPaths) {
|
|
2843
|
+
const url = `${baseUrl}${path}`;
|
|
2844
|
+
if (!existingUrls.has(url) && !urlsToFetch.has(url)) {
|
|
2845
|
+
urlsToFetch.set(url, "content");
|
|
2846
|
+
}
|
|
2847
|
+
}
|
|
2848
|
+
}
|
|
2807
2849
|
const entries = Array.from(urlsToFetch.entries());
|
|
2808
2850
|
if (entries.length === 0) return 0;
|
|
2809
2851
|
const results = await Promise.all(entries.map(([url]) => fetchPage(url, timeoutMs)));
|