aeorank 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.js +42 -0
- package/dist/browser.js.map +1 -1
- package/dist/cli.js +42 -0
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +42 -0
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +42 -0
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/browser.js
CHANGED
|
@@ -2655,6 +2655,38 @@ function extractNavLinks(html, domain) {
|
|
|
2655
2655
|
}
|
|
2656
2656
|
return Array.from(paths);
|
|
2657
2657
|
}
|
|
2658
|
+
function extractAllInternalLinks(html, domain, limit = 30) {
|
|
2659
|
+
const cleanDomain = domain.replace(/^www\./, "").toLowerCase();
|
|
2660
|
+
const hrefMatches = html.match(/href="([^"#]*)"/gi) || [];
|
|
2661
|
+
const paths = /* @__PURE__ */ new Set();
|
|
2662
|
+
for (const match of hrefMatches) {
|
|
2663
|
+
const href = match.match(/href="([^"#]*)"/i)?.[1];
|
|
2664
|
+
if (!href) continue;
|
|
2665
|
+
let path;
|
|
2666
|
+
if (href.startsWith("/")) {
|
|
2667
|
+
path = href;
|
|
2668
|
+
} else if (href.startsWith("http")) {
|
|
2669
|
+
try {
|
|
2670
|
+
const url = new URL(href);
|
|
2671
|
+
const linkDomain = url.hostname.replace(/^www\./, "").toLowerCase();
|
|
2672
|
+
if (linkDomain !== cleanDomain) continue;
|
|
2673
|
+
path = url.pathname;
|
|
2674
|
+
} catch {
|
|
2675
|
+
continue;
|
|
2676
|
+
}
|
|
2677
|
+
} else {
|
|
2678
|
+
continue;
|
|
2679
|
+
}
|
|
2680
|
+
path = path.replace(/\/+$/, "") || "/";
|
|
2681
|
+
if (path === "/") continue;
|
|
2682
|
+
if (path.includes("#") || path.includes("?")) continue;
|
|
2683
|
+
if (/\.(js|css|png|jpg|jpeg|gif|svg|ico|pdf|xml|txt|zip|woff|woff2|ttf|eot|mp4|webm|mp3)$/i.test(path)) continue;
|
|
2684
|
+
if (/^\/(api|wp-admin|wp-includes|wp-json|static|assets|_next|auth|login|signup|sign-up|register|cart|checkout|account|admin|cdn-cgi|feed|rss)\b/i.test(path)) continue;
|
|
2685
|
+
if (path.startsWith("mailto:") || path.startsWith("tel:")) continue;
|
|
2686
|
+
paths.add(path);
|
|
2687
|
+
}
|
|
2688
|
+
return Array.from(paths).sort((a, b) => a.split("/").length - b.split("/").length || a.localeCompare(b)).slice(0, limit);
|
|
2689
|
+
}
|
|
2658
2690
|
function extractContentPagesFromSitemap(sitemapText, domain, limit = 6) {
|
|
2659
2691
|
const urlBlocks = sitemapText.match(/<url>([\s\S]*?)<\/url>/gi) || [];
|
|
2660
2692
|
const cleanDomain = domain.replace(/^www\./, "").toLowerCase();
|
|
@@ -2722,6 +2754,16 @@ async function fetchMultiPageData(siteData, options) {
|
|
|
2722
2754
|
if (!existingUrls.has(url)) urlsToFetch.set(url, "content");
|
|
2723
2755
|
}
|
|
2724
2756
|
}
|
|
2757
|
+
const hasBlogSample = (siteData.blogSample?.length ?? 0) > 3;
|
|
2758
|
+
if (!hasBlogSample) {
|
|
2759
|
+
const allPaths = extractAllInternalLinks(siteData.homepage.text, siteData.domain, 30);
|
|
2760
|
+
for (const path of allPaths) {
|
|
2761
|
+
const url = `${baseUrl}${path}`;
|
|
2762
|
+
if (!existingUrls.has(url) && !urlsToFetch.has(url)) {
|
|
2763
|
+
urlsToFetch.set(url, "content");
|
|
2764
|
+
}
|
|
2765
|
+
}
|
|
2766
|
+
}
|
|
2725
2767
|
const entries = Array.from(urlsToFetch.entries());
|
|
2726
2768
|
if (entries.length === 0) return 0;
|
|
2727
2769
|
const results = await Promise.all(entries.map(([url]) => fetchPage(url, timeoutMs)));
|