aeorank 1.6.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +110 -39
- package/dist/browser.d.ts +2 -2
- package/dist/browser.js +500 -125
- package/dist/browser.js.map +1 -1
- package/dist/{chunk-3IJISYWT.js → chunk-PKJIKMLV.js} +2 -2
- package/dist/chunk-PKJIKMLV.js.map +1 -0
- package/dist/cli.js +415 -96
- package/dist/cli.js.map +1 -1
- package/dist/{full-site-crawler-F7J2HRL4.js → full-site-crawler-FQYO46YV.js} +2 -2
- package/dist/full-site-crawler-FQYO46YV.js.map +1 -0
- package/dist/{full-site-crawler-VFARFR2C.js → full-site-crawler-UIOMKOZA.js} +2 -2
- package/dist/index.cjs +499 -124
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.js +500 -125
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
- package/dist/chunk-3IJISYWT.js.map +0 -1
- package/dist/full-site-crawler-F7J2HRL4.js.map +0 -1
- /package/dist/{full-site-crawler-VFARFR2C.js.map → full-site-crawler-UIOMKOZA.js.map} +0 -0
package/dist/cli.js
CHANGED
|
@@ -193,7 +193,7 @@ async function prefetchSiteData(domain) {
|
|
|
193
193
|
sitemapForBlog = subSitemap.text;
|
|
194
194
|
}
|
|
195
195
|
}
|
|
196
|
-
const blogUrls = extractBlogUrlsFromSitemap(sitemapForBlog, domain,
|
|
196
|
+
const blogUrls = extractBlogUrlsFromSitemap(sitemapForBlog, domain, 50);
|
|
197
197
|
if (blogUrls.length > 0) {
|
|
198
198
|
const fetched = await Promise.all(blogUrls.map((url) => fetchText(url)));
|
|
199
199
|
blogSample = fetched.filter(
|
|
@@ -550,15 +550,17 @@ function checkOriginalData(data) {
|
|
|
550
550
|
findings.push({ severity: "critical", detail: "Could not fetch homepage" });
|
|
551
551
|
return { criterion: "original_data", criterion_label: "Original Data & Expert Content", score: 0, status: "not_found", findings, fix_priority: "P2" };
|
|
552
552
|
}
|
|
553
|
+
const allPages = [data.homepage, ...data.blogSample || []].filter(Boolean);
|
|
553
554
|
const html = data.homepage.text;
|
|
554
|
-
const
|
|
555
|
+
const allText = allPages.map((p) => p.text.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ")).join(" ");
|
|
556
|
+
const text = data.homepage.text.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ");
|
|
555
557
|
let score = 0;
|
|
556
558
|
const statPatterns = /\d+%|\d+\s*(patients|clients|customers|cases|years|professionals|specialists|companies|users|businesses|domains|audits)/i;
|
|
557
|
-
if (statPatterns.test(
|
|
559
|
+
if (statPatterns.test(allText)) {
|
|
558
560
|
const researchContext = /\b(our\s+(?:study|analysis|research|data|survey|findings|report)|we\s+(?:surveyed|analyzed|studied|measured|tracked)|proprietary|methodology|original\s+research)\b/i;
|
|
559
|
-
if (researchContext.test(
|
|
561
|
+
if (researchContext.test(allText)) {
|
|
560
562
|
score += 3;
|
|
561
|
-
findings.push({ severity: "info", detail: "Proprietary statistics with research context found
|
|
563
|
+
findings.push({ severity: "info", detail: "Proprietary statistics with research context found" });
|
|
562
564
|
} else {
|
|
563
565
|
score += 1;
|
|
564
566
|
findings.push({ severity: "low", detail: 'Statistics found but without research context (e.g., "500+ clients")', fix: 'Add context about your methodology: "Our analysis of X found..." or "We surveyed Y..."' });
|
|
@@ -1073,20 +1075,24 @@ function checkFactDensity(data) {
|
|
|
1073
1075
|
findings.push({ severity: "critical", detail: "Could not fetch homepage" });
|
|
1074
1076
|
return { criterion: "fact_density", criterion_label: "Fact & Data Density", score: 0, status: "not_found", findings, fix_priority: "P2" };
|
|
1075
1077
|
}
|
|
1076
|
-
const
|
|
1078
|
+
const allPages = [data.homepage, ...data.blogSample || []].filter(Boolean);
|
|
1079
|
+
const allText = allPages.map((p) => p.text.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ")).join(" ");
|
|
1080
|
+
const text = allText;
|
|
1081
|
+
const pageCount = allPages.length;
|
|
1077
1082
|
let score = 0;
|
|
1078
1083
|
const dataPoints = text.match(/\d+(?:\.\d+)?(?:\s*%|\s*\$|\s*USD|\s*EUR)/g) || [];
|
|
1079
1084
|
const countPhrases = text.match(/\d+(?:,\d{3})*\+?\s+(?:users?|clients?|customers?|companies|businesses|patients?|members?|employees?|projects?|downloads?)/gi) || [];
|
|
1080
1085
|
const totalDataPoints = dataPoints.length + countPhrases.length;
|
|
1081
|
-
|
|
1086
|
+
const avgPerPage = pageCount > 0 ? totalDataPoints / pageCount : 0;
|
|
1087
|
+
if (avgPerPage >= 4) {
|
|
1082
1088
|
score += 5;
|
|
1083
|
-
findings.push({ severity: "info", detail: `${totalDataPoints} quantitative data points found
|
|
1084
|
-
} else if (
|
|
1089
|
+
findings.push({ severity: "info", detail: `${totalDataPoints} quantitative data points found across ${pageCount} pages (avg ${avgPerPage.toFixed(1)}/page)` });
|
|
1090
|
+
} else if (avgPerPage >= 2) {
|
|
1085
1091
|
score += 3;
|
|
1086
|
-
findings.push({ severity: "info", detail: `${totalDataPoints} quantitative data points found` });
|
|
1092
|
+
findings.push({ severity: "info", detail: `${totalDataPoints} quantitative data points found across ${pageCount} pages` });
|
|
1087
1093
|
} else if (totalDataPoints >= 1) {
|
|
1088
1094
|
score += 1;
|
|
1089
|
-
findings.push({ severity: "low", detail: `Only ${totalDataPoints} quantitative data point(s) found`, fix: "Add more specific numbers, percentages, and metrics to strengthen credibility" });
|
|
1095
|
+
findings.push({ severity: "low", detail: `Only ${totalDataPoints} quantitative data point(s) found across ${pageCount} pages`, fix: "Add more specific numbers, percentages, and metrics to strengthen credibility" });
|
|
1090
1096
|
} else {
|
|
1091
1097
|
findings.push({ severity: "high", detail: "No quantitative data points found", fix: "Add specific statistics (percentages, counts, comparisons) that AI engines can cite" });
|
|
1092
1098
|
}
|
|
@@ -1192,9 +1198,9 @@ function countRecentSitemapDates(sitemapText) {
|
|
|
1192
1198
|
distinctRecentDays: recentDays.size
|
|
1193
1199
|
};
|
|
1194
1200
|
}
|
|
1195
|
-
var BLOG_PATH_PATTERNS = /\/(?:blog|articles?|insights?|guides?|resources?|news|posts?|learn|help|how-?to|tutorials?|case-stud|whitepapers?)\b/i;
|
|
1201
|
+
var BLOG_PATH_PATTERNS = /\/(?:[^/]*-?)?(?:blog|articles?|insights?|guides?|resources?|news|posts?|learn|help|how-?to|tutorials?|case-stud|whitepapers?)\b/i;
|
|
1196
1202
|
var EXCLUDE_PATH_PATTERNS = /\/(?:tag|category|author|page|feed|wp-content|wp-admin|wp-json|cart|checkout|login|search|api|static|assets|_next)\b/i;
|
|
1197
|
-
function extractBlogUrlsFromSitemap(sitemapText, domain, limit =
|
|
1203
|
+
function extractBlogUrlsFromSitemap(sitemapText, domain, limit = 50) {
|
|
1198
1204
|
const urlBlocks = sitemapText.match(/<url>([\s\S]*?)<\/url>/gi) || [];
|
|
1199
1205
|
const candidates = [];
|
|
1200
1206
|
const cleanDomain = domain.replace(/^www\./, "").toLowerCase();
|
|
@@ -1490,7 +1496,7 @@ function jaccardSimilarity(a, b) {
|
|
|
1490
1496
|
const union = a.size + b.size - intersection;
|
|
1491
1497
|
return union === 0 ? 0 : intersection / union;
|
|
1492
1498
|
}
|
|
1493
|
-
function checkContentCannibalization(data) {
|
|
1499
|
+
function checkContentCannibalization(data, topicCoherenceScore) {
|
|
1494
1500
|
const findings = [];
|
|
1495
1501
|
if (!data.homepage) {
|
|
1496
1502
|
findings.push({ severity: "critical", detail: "No homepage available for cannibalization analysis" });
|
|
@@ -1500,7 +1506,7 @@ function checkContentCannibalization(data) {
|
|
|
1500
1506
|
{ html: data.homepage.text, url: data.homepage.finalUrl || `https://${data.domain}/` }
|
|
1501
1507
|
];
|
|
1502
1508
|
if (data.blogSample) {
|
|
1503
|
-
for (const page of data.blogSample
|
|
1509
|
+
for (const page of data.blogSample) {
|
|
1504
1510
|
pages.push({ html: page.text, url: page.finalUrl || "" });
|
|
1505
1511
|
}
|
|
1506
1512
|
}
|
|
@@ -1510,10 +1516,29 @@ function checkContentCannibalization(data) {
|
|
|
1510
1516
|
}
|
|
1511
1517
|
const pageTitles = pages.map((p) => ({ title: extractPageTitle(p.html), url: p.url }));
|
|
1512
1518
|
const wordSets = pageTitles.map((p) => titleToWordSet(p.title));
|
|
1519
|
+
const termPageCount = /* @__PURE__ */ new Map();
|
|
1520
|
+
for (const ws of wordSets) {
|
|
1521
|
+
for (const w of ws) {
|
|
1522
|
+
termPageCount.set(w, (termPageCount.get(w) || 0) + 1);
|
|
1523
|
+
}
|
|
1524
|
+
}
|
|
1525
|
+
const commonTermThreshold = Math.max(3, pages.length * 0.4);
|
|
1526
|
+
const siteThemeTerms = /* @__PURE__ */ new Set();
|
|
1527
|
+
for (const [term, count] of termPageCount) {
|
|
1528
|
+
if (count >= commonTermThreshold) siteThemeTerms.add(term);
|
|
1529
|
+
}
|
|
1530
|
+
const filteredSets = wordSets.map((ws) => {
|
|
1531
|
+
const filtered = /* @__PURE__ */ new Set();
|
|
1532
|
+
for (const w of ws) {
|
|
1533
|
+
if (!siteThemeTerms.has(w)) filtered.add(w);
|
|
1534
|
+
}
|
|
1535
|
+
return filtered;
|
|
1536
|
+
});
|
|
1513
1537
|
const cannibalPairs = [];
|
|
1514
1538
|
for (let i = 0; i < pages.length; i++) {
|
|
1515
1539
|
for (let j = i + 1; j < pages.length; j++) {
|
|
1516
|
-
|
|
1540
|
+
if (filteredSets[i].size === 0 && filteredSets[j].size === 0) continue;
|
|
1541
|
+
const sim = jaccardSimilarity(filteredSets[i], filteredSets[j]);
|
|
1517
1542
|
if (sim > 0.6) {
|
|
1518
1543
|
cannibalPairs.push({
|
|
1519
1544
|
urlA: pageTitles[i].url.slice(0, 60),
|
|
@@ -1523,23 +1548,39 @@ function checkContentCannibalization(data) {
|
|
|
1523
1548
|
}
|
|
1524
1549
|
}
|
|
1525
1550
|
}
|
|
1551
|
+
const cannibalUrls = /* @__PURE__ */ new Set();
|
|
1552
|
+
for (const pair of cannibalPairs) {
|
|
1553
|
+
cannibalUrls.add(pair.urlA);
|
|
1554
|
+
cannibalUrls.add(pair.urlB);
|
|
1555
|
+
}
|
|
1556
|
+
const cannibalRatio = pages.length > 0 ? cannibalUrls.size / pages.length : 0;
|
|
1526
1557
|
let score;
|
|
1527
1558
|
if (cannibalPairs.length === 0) {
|
|
1528
1559
|
score = 10;
|
|
1529
1560
|
findings.push({ severity: "info", detail: `${pages.length} pages analyzed - no content cannibalization detected` });
|
|
1530
|
-
} else if (
|
|
1531
|
-
score =
|
|
1532
|
-
findings.push({ severity: "
|
|
1533
|
-
} else if (
|
|
1561
|
+
} else if (cannibalRatio <= 0.05) {
|
|
1562
|
+
score = 9;
|
|
1563
|
+
findings.push({ severity: "info", detail: `${cannibalPairs.length} pair(s) of pages with minor topic overlap (${cannibalUrls.size}/${pages.length} pages affected)` });
|
|
1564
|
+
} else if (cannibalRatio <= 0.1) {
|
|
1565
|
+
score = 7;
|
|
1566
|
+
findings.push({ severity: "low", detail: `${cannibalUrls.size} pages (${Math.round(cannibalRatio * 100)}%) have overlapping topics`, fix: "Differentiate titles and H1 headings to reduce topic overlap" });
|
|
1567
|
+
} else if (cannibalRatio <= 0.2) {
|
|
1534
1568
|
score = 5;
|
|
1535
|
-
findings.push({ severity: "medium", detail: `${
|
|
1569
|
+
findings.push({ severity: "medium", detail: `${cannibalUrls.size} pages (${Math.round(cannibalRatio * 100)}%) competing for overlapping topics`, fix: "Consolidate overlapping pages or differentiate their titles and content focus" });
|
|
1570
|
+
} else if (cannibalRatio <= 0.4) {
|
|
1571
|
+
score = 3;
|
|
1572
|
+
findings.push({ severity: "medium", detail: `${cannibalUrls.size} pages (${Math.round(cannibalRatio * 100)}%) have significant content overlap`, fix: "Many pages compete for the same topics - consolidate or clearly differentiate them" });
|
|
1536
1573
|
} else {
|
|
1537
1574
|
score = 0;
|
|
1538
|
-
findings.push({ severity: "high", detail: `${
|
|
1575
|
+
findings.push({ severity: "high", detail: `${cannibalUrls.size} pages (${Math.round(cannibalRatio * 100)}%) competing for the same topics`, fix: "Severe content cannibalization - consolidate overlapping pages or create clear topic differentiation" });
|
|
1539
1576
|
}
|
|
1540
1577
|
for (const pair of cannibalPairs.slice(0, 3)) {
|
|
1541
1578
|
findings.push({ severity: "low", detail: `Overlap (${pair.similarity}%): ${pair.urlA} vs ${pair.urlB}` });
|
|
1542
1579
|
}
|
|
1580
|
+
if (topicCoherenceScore !== void 0 && topicCoherenceScore <= 4 && score >= 8) {
|
|
1581
|
+
score = 6;
|
|
1582
|
+
findings.push({ severity: "low", detail: "Low topic overlap but content lacks coherent focus - not a strong signal for AI authority", fix: "Focus content on fewer core topics to build topical authority that AI engines can identify" });
|
|
1583
|
+
}
|
|
1543
1584
|
return { criterion: "content_cannibalization", criterion_label: "Content Cannibalization", score, status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P1" };
|
|
1544
1585
|
}
|
|
1545
1586
|
function checkVisibleDateSignal(data) {
|
|
@@ -1765,7 +1806,233 @@ function extractRawDataSummary(data) {
|
|
|
1765
1806
|
crawl_skipped: data.crawlStats?.skipped ?? 0
|
|
1766
1807
|
};
|
|
1767
1808
|
}
|
|
1809
|
+
function getPageTopicText(html) {
|
|
1810
|
+
const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
|
|
1811
|
+
const h1Match = html.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
|
|
1812
|
+
return [
|
|
1813
|
+
titleMatch?.[1] || "",
|
|
1814
|
+
h1Match?.[1]?.replace(/<[^>]*>/g, "") || ""
|
|
1815
|
+
].join(" ").toLowerCase().trim();
|
|
1816
|
+
}
|
|
1817
|
+
function extractBigrams(text) {
|
|
1818
|
+
const words = text.split(/[\s,.!?;:()\[\]{}"'\/&]+/).filter((w) => w.length > 2 && !STOP_WORDS.has(w) && !/^\d+$/.test(w));
|
|
1819
|
+
const bigrams = [];
|
|
1820
|
+
for (let i = 0; i < words.length - 1; i++) {
|
|
1821
|
+
bigrams.push(words[i] + " " + words[i + 1]);
|
|
1822
|
+
}
|
|
1823
|
+
return bigrams;
|
|
1824
|
+
}
|
|
1825
|
+
function checkTopicCoherence(data) {
|
|
1826
|
+
const findings = [];
|
|
1827
|
+
if (!data.homepage) {
|
|
1828
|
+
findings.push({ severity: "critical", detail: "Could not fetch homepage" });
|
|
1829
|
+
return { criterion: "topic_coherence", criterion_label: "Topic Coherence", score: 0, status: "not_found", findings, fix_priority: "P0" };
|
|
1830
|
+
}
|
|
1831
|
+
if (!data.blogSample || data.blogSample.length < 3) {
|
|
1832
|
+
findings.push({ severity: "info", detail: `Only ${data.blogSample?.length || 0} blog pages found - insufficient for topic coherence analysis` });
|
|
1833
|
+
return { criterion: "topic_coherence", criterion_label: "Topic Coherence", score: 5, status: "partial", findings, fix_priority: "P2" };
|
|
1834
|
+
}
|
|
1835
|
+
const blogPages = data.blogSample;
|
|
1836
|
+
const domainBase = data.domain.replace(/^www\./, "").replace(/\.(com|org|net|io|co|ai)$/i, "").toLowerCase();
|
|
1837
|
+
const brandWords = /* @__PURE__ */ new Set();
|
|
1838
|
+
brandWords.add(domainBase);
|
|
1839
|
+
for (const part of domainBase.split(/[-_]/)) {
|
|
1840
|
+
if (part.length > 2) brandWords.add(part);
|
|
1841
|
+
}
|
|
1842
|
+
const rawTermFreq = /* @__PURE__ */ new Map();
|
|
1843
|
+
const pageTitleTexts = [];
|
|
1844
|
+
for (const page of blogPages) {
|
|
1845
|
+
const topicText = getPageTopicText(page.text);
|
|
1846
|
+
pageTitleTexts.push(topicText);
|
|
1847
|
+
const words = topicText.split(/[\s,.!?;:()\[\]{}"'\/&]+/).filter((w) => w.length > 2 && !STOP_WORDS.has(w) && !/^\d+$/.test(w));
|
|
1848
|
+
const uniqueWords = new Set(words);
|
|
1849
|
+
for (const w of uniqueWords) {
|
|
1850
|
+
rawTermFreq.set(w, (rawTermFreq.get(w) || 0) + 1);
|
|
1851
|
+
}
|
|
1852
|
+
}
|
|
1853
|
+
for (const [term, count] of rawTermFreq) {
|
|
1854
|
+
if (count / blogPages.length >= 0.8 && domainBase.includes(term)) {
|
|
1855
|
+
brandWords.add(term);
|
|
1856
|
+
}
|
|
1857
|
+
}
|
|
1858
|
+
const termFreq = /* @__PURE__ */ new Map();
|
|
1859
|
+
for (const page of blogPages) {
|
|
1860
|
+
const topicText = getPageTopicText(page.text);
|
|
1861
|
+
const words = topicText.split(/[\s,.!?;:()\[\]{}"'\/&]+/).filter((w) => w.length > 2 && !STOP_WORDS.has(w) && !/^\d+$/.test(w) && !brandWords.has(w));
|
|
1862
|
+
const uniqueWords = new Set(words);
|
|
1863
|
+
for (const w of uniqueWords) {
|
|
1864
|
+
termFreq.set(w, (termFreq.get(w) || 0) + 1);
|
|
1865
|
+
}
|
|
1866
|
+
}
|
|
1867
|
+
const sortedTerms = [...termFreq.entries()].sort((a, b) => b[1] - a[1]);
|
|
1868
|
+
const topTerm = sortedTerms[0];
|
|
1869
|
+
const bigramFreq = /* @__PURE__ */ new Map();
|
|
1870
|
+
const pageBigrams = [];
|
|
1871
|
+
for (const topicText of pageTitleTexts) {
|
|
1872
|
+
const bigrams = extractBigrams(topicText).filter((bg) => !bg.split(" ").some((w) => brandWords.has(w)));
|
|
1873
|
+
pageBigrams.push(bigrams);
|
|
1874
|
+
const uniqueBigrams = new Set(bigrams);
|
|
1875
|
+
for (const bg of uniqueBigrams) {
|
|
1876
|
+
bigramFreq.set(bg, (bigramFreq.get(bg) || 0) + 1);
|
|
1877
|
+
}
|
|
1878
|
+
}
|
|
1879
|
+
const sortedBigrams = [...bigramFreq.entries()].sort((a, b) => b[1] - a[1]);
|
|
1880
|
+
const topBigram = sortedBigrams[0];
|
|
1881
|
+
const significantBigrams = sortedBigrams.filter(([, count]) => count >= 2);
|
|
1882
|
+
const clusterRoots = [];
|
|
1883
|
+
const assigned = /* @__PURE__ */ new Set();
|
|
1884
|
+
for (const [bg] of significantBigrams) {
|
|
1885
|
+
if (assigned.has(bg)) continue;
|
|
1886
|
+
clusterRoots.push(bg);
|
|
1887
|
+
assigned.add(bg);
|
|
1888
|
+
const [w1, w2] = bg.split(" ");
|
|
1889
|
+
for (const [otherBg] of significantBigrams) {
|
|
1890
|
+
if (assigned.has(otherBg)) continue;
|
|
1891
|
+
if (otherBg.includes(w1) || otherBg.includes(w2)) {
|
|
1892
|
+
assigned.add(otherBg);
|
|
1893
|
+
}
|
|
1894
|
+
}
|
|
1895
|
+
}
|
|
1896
|
+
const topicClusterCount = clusterRoots.length;
|
|
1897
|
+
const dominantTerm = topTerm?.[0] || "";
|
|
1898
|
+
const dominantTermCount = topTerm?.[1] || 0;
|
|
1899
|
+
const focusRatio = blogPages.length > 0 ? dominantTermCount / blogPages.length : 0;
|
|
1900
|
+
const dominantBigram = topBigram?.[0] || "";
|
|
1901
|
+
const dominantBigramCount = topBigram?.[1] || 0;
|
|
1902
|
+
const bigramFocusRatio = blogPages.length > 0 ? dominantBigramCount / blogPages.length : 0;
|
|
1903
|
+
let score = 0;
|
|
1904
|
+
const bestFocusRatio = Math.max(focusRatio, bigramFocusRatio);
|
|
1905
|
+
if (bestFocusRatio >= 0.8) {
|
|
1906
|
+
score += 7;
|
|
1907
|
+
} else if (bestFocusRatio >= 0.6) {
|
|
1908
|
+
score += 6;
|
|
1909
|
+
} else if (bestFocusRatio >= 0.45) {
|
|
1910
|
+
score += 5;
|
|
1911
|
+
} else if (bestFocusRatio >= 0.3) {
|
|
1912
|
+
score += 3;
|
|
1913
|
+
} else if (bestFocusRatio >= 0.15) {
|
|
1914
|
+
score += 2;
|
|
1915
|
+
} else {
|
|
1916
|
+
score += 1;
|
|
1917
|
+
}
|
|
1918
|
+
const clusterPenaltyReduced = focusRatio >= 0.7;
|
|
1919
|
+
if (topicClusterCount <= 3) {
|
|
1920
|
+
score += 3;
|
|
1921
|
+
findings.push({ severity: "info", detail: `${topicClusterCount} topic cluster(s) - tightly focused content` });
|
|
1922
|
+
} else if (topicClusterCount <= 6) {
|
|
1923
|
+
score += clusterPenaltyReduced ? 2 : 1;
|
|
1924
|
+
findings.push({ severity: "info", detail: `${topicClusterCount} topic clusters${clusterPenaltyReduced ? " within a focused niche" : " - moderately focused"}` });
|
|
1925
|
+
} else if (topicClusterCount <= 10) {
|
|
1926
|
+
score += clusterPenaltyReduced ? 1 : 0;
|
|
1927
|
+
if (!clusterPenaltyReduced) {
|
|
1928
|
+
findings.push({ severity: "low", detail: `${topicClusterCount} topic clusters - scattered content`, fix: "Reduce the number of distinct topics. Focus blog content on 2-3 core expertise areas." });
|
|
1929
|
+
} else {
|
|
1930
|
+
findings.push({ severity: "info", detail: `${topicClusterCount} topic clusters but strong core topic focus (${Math.round(focusRatio * 100)}%)` });
|
|
1931
|
+
}
|
|
1932
|
+
} else {
|
|
1933
|
+
score += clusterPenaltyReduced ? 0 : -2;
|
|
1934
|
+
if (!clusterPenaltyReduced) {
|
|
1935
|
+
findings.push({ severity: "medium", detail: `${topicClusterCount} topic clusters - highly scattered content`, fix: "Content covers too many unrelated topics. AI engines cannot identify your expertise. Focus on your core niche." });
|
|
1936
|
+
} else {
|
|
1937
|
+
findings.push({ severity: "low", detail: `${topicClusterCount} topic clusters despite strong core topic focus`, fix: "Consider narrowing subtopics within your niche for even stronger AI visibility." });
|
|
1938
|
+
}
|
|
1939
|
+
}
|
|
1940
|
+
score = Math.max(0, Math.min(10, score));
|
|
1941
|
+
if (dominantTerm) {
|
|
1942
|
+
const focusPct = Math.round(focusRatio * 100);
|
|
1943
|
+
findings.push({ severity: "info", detail: `Dominant topic term: "${dominantTerm}" (${focusPct}% of ${blogPages.length} pages)` });
|
|
1944
|
+
}
|
|
1945
|
+
if (dominantBigram && dominantBigramCount >= 2) {
|
|
1946
|
+
findings.push({ severity: "info", detail: `Dominant topic phrase: "${dominantBigram}" (${dominantBigramCount}/${blogPages.length} pages)` });
|
|
1947
|
+
}
|
|
1948
|
+
const offTopicExamples = [];
|
|
1949
|
+
for (let i = 0; i < pageTitleTexts.length && offTopicExamples.length < 3; i++) {
|
|
1950
|
+
if (dominantTerm && !pageTitleTexts[i].includes(dominantTerm)) {
|
|
1951
|
+
const title = blogPages[i].text.match(/<title[^>]*>([^<]+)<\/title>/i)?.[1]?.trim();
|
|
1952
|
+
if (title && title.length > 3) offTopicExamples.push(title.slice(0, 60));
|
|
1953
|
+
}
|
|
1954
|
+
}
|
|
1955
|
+
if (offTopicExamples.length > 0 && score < 8) {
|
|
1956
|
+
findings.push({ severity: "low", detail: `Off-topic examples: ${offTopicExamples.join("; ")}` });
|
|
1957
|
+
}
|
|
1958
|
+
return { criterion: "topic_coherence", criterion_label: "Topic Coherence", score, status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P0" };
|
|
1959
|
+
}
|
|
1960
|
+
function countWords(html) {
|
|
1961
|
+
const text = html.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, "").replace(/<style[^>]*>[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
|
|
1962
|
+
return text.split(/\s+/).filter((w) => w.length > 0).length;
|
|
1963
|
+
}
|
|
1964
|
+
function countHeadings(html) {
|
|
1965
|
+
const headings = html.match(/<h[2-6][^>]*>/gi) || [];
|
|
1966
|
+
return headings.length;
|
|
1967
|
+
}
|
|
1968
|
+
function checkContentDepth(data, topicCoherenceScore) {
|
|
1969
|
+
const findings = [];
|
|
1970
|
+
if (!data.blogSample || data.blogSample.length < 2) {
|
|
1971
|
+
findings.push({ severity: "info", detail: `Only ${data.blogSample?.length || 0} blog pages found - insufficient for depth analysis` });
|
|
1972
|
+
return { criterion: "content_depth", criterion_label: "Content Depth", score: 3, status: "partial", findings, fix_priority: "P2" };
|
|
1973
|
+
}
|
|
1974
|
+
const blogPages = data.blogSample;
|
|
1975
|
+
const wordCounts = blogPages.map((p) => countWords(p.text));
|
|
1976
|
+
const headingCounts = blogPages.map((p) => countHeadings(p.text));
|
|
1977
|
+
const avgWords = wordCounts.reduce((a, b) => a + b, 0) / wordCounts.length;
|
|
1978
|
+
const avgHeadings = headingCounts.reduce((a, b) => a + b, 0) / headingCounts.length;
|
|
1979
|
+
const deepPages = wordCounts.filter((w) => w >= 1e3).length;
|
|
1980
|
+
const thinPages = wordCounts.filter((w) => w < 300).length;
|
|
1981
|
+
const deepRatio = deepPages / blogPages.length;
|
|
1982
|
+
const thinRatio = thinPages / blogPages.length;
|
|
1983
|
+
let score = 0;
|
|
1984
|
+
if (avgWords >= 2e3) {
|
|
1985
|
+
score += 5;
|
|
1986
|
+
findings.push({ severity: "info", detail: `Average ${Math.round(avgWords)} words per page across ${blogPages.length} pages - excellent depth` });
|
|
1987
|
+
} else if (avgWords >= 1200) {
|
|
1988
|
+
score += 4;
|
|
1989
|
+
findings.push({ severity: "info", detail: `Average ${Math.round(avgWords)} words per page across ${blogPages.length} pages - good depth` });
|
|
1990
|
+
} else if (avgWords >= 800) {
|
|
1991
|
+
score += 3;
|
|
1992
|
+
findings.push({ severity: "info", detail: `Average ${Math.round(avgWords)} words per page - moderate depth` });
|
|
1993
|
+
} else if (avgWords >= 400) {
|
|
1994
|
+
score += 2;
|
|
1995
|
+
findings.push({ severity: "low", detail: `Average ${Math.round(avgWords)} words per page - shallow content`, fix: "Expand articles with more detail, examples, and expert analysis to build AI citation authority" });
|
|
1996
|
+
} else {
|
|
1997
|
+
score += 1;
|
|
1998
|
+
findings.push({ severity: "medium", detail: `Average ${Math.round(avgWords)} words per page - very thin content`, fix: "Content is too thin for AI engines to cite. Aim for 1000+ words per article with structured sections." });
|
|
1999
|
+
}
|
|
2000
|
+
if (avgHeadings >= 8) {
|
|
2001
|
+
score += 3;
|
|
2002
|
+
findings.push({ severity: "info", detail: `Average ${avgHeadings.toFixed(1)} subheadings per page - well-structured` });
|
|
2003
|
+
} else if (avgHeadings >= 5) {
|
|
2004
|
+
score += 2;
|
|
2005
|
+
findings.push({ severity: "info", detail: `Average ${avgHeadings.toFixed(1)} subheadings per page - decent structure` });
|
|
2006
|
+
} else if (avgHeadings >= 2) {
|
|
2007
|
+
score += 1;
|
|
2008
|
+
findings.push({ severity: "low", detail: `Average ${avgHeadings.toFixed(1)} subheadings per page`, fix: "Add more H2/H3 headings to break content into extractable sections" });
|
|
2009
|
+
} else {
|
|
2010
|
+
findings.push({ severity: "medium", detail: `Average ${avgHeadings.toFixed(1)} subheadings per page - minimal structure`, fix: "Add question-format H2/H3 headings so AI engines can extract specific answers" });
|
|
2011
|
+
}
|
|
2012
|
+
if (deepRatio >= 0.5) {
|
|
2013
|
+
score += 2;
|
|
2014
|
+
findings.push({ severity: "info", detail: `${deepPages}/${blogPages.length} pages (${Math.round(deepRatio * 100)}%) have 1000+ words` });
|
|
2015
|
+
} else if (deepRatio >= 0.25) {
|
|
2016
|
+
score += 1;
|
|
2017
|
+
findings.push({ severity: "info", detail: `${deepPages}/${blogPages.length} pages have 1000+ words` });
|
|
2018
|
+
}
|
|
2019
|
+
if (thinRatio >= 0.5) {
|
|
2020
|
+
score = Math.max(0, score - 2);
|
|
2021
|
+
findings.push({ severity: "medium", detail: `${thinPages}/${blogPages.length} pages (${Math.round(thinRatio * 100)}%) have under 300 words - high thin content ratio`, fix: "Remove or expand thin pages. Thin content dilutes site quality for AI engines." });
|
|
2022
|
+
} else if (thinRatio >= 0.25) {
|
|
2023
|
+
score = Math.max(0, score - 1);
|
|
2024
|
+
findings.push({ severity: "low", detail: `${thinPages}/${blogPages.length} pages have under 300 words` });
|
|
2025
|
+
}
|
|
2026
|
+
let finalScore = Math.min(10, score);
|
|
2027
|
+
if (topicCoherenceScore !== void 0 && topicCoherenceScore <= 4 && finalScore >= 8) {
|
|
2028
|
+
finalScore = 7;
|
|
2029
|
+
findings.push({ severity: "low", detail: "Deep content but low topic coherence - depth on scattered topics has reduced AI citation value", fix: "Focus content depth on your core expertise area for maximum AI visibility" });
|
|
2030
|
+
}
|
|
2031
|
+
return { criterion: "content_depth", criterion_label: "Content Depth", score: finalScore, status: finalScore >= 7 ? "pass" : finalScore >= 4 ? "partial" : "fail", findings, fix_priority: finalScore >= 7 ? "P3" : "P1" };
|
|
2032
|
+
}
|
|
1768
2033
|
function auditSiteFromData(data) {
|
|
2034
|
+
const topicCoherence = checkTopicCoherence(data);
|
|
2035
|
+
const cannibalization = checkContentCannibalization(data, topicCoherence.score);
|
|
1769
2036
|
return [
|
|
1770
2037
|
checkLlmsTxt(data),
|
|
1771
2038
|
checkSchemaMarkup(data),
|
|
@@ -1791,52 +2058,84 @@ function auditSiteFromData(data) {
|
|
|
1791
2058
|
checkSchemaCoverage(data),
|
|
1792
2059
|
checkSpeakableSchema(data),
|
|
1793
2060
|
checkQueryAnswerAlignment(data),
|
|
1794
|
-
|
|
1795
|
-
checkVisibleDateSignal(data)
|
|
2061
|
+
cannibalization,
|
|
2062
|
+
checkVisibleDateSignal(data),
|
|
2063
|
+
topicCoherence,
|
|
2064
|
+
checkContentDepth(data, topicCoherence.score)
|
|
1796
2065
|
];
|
|
1797
2066
|
}
|
|
1798
2067
|
|
|
1799
2068
|
// src/scoring.ts
|
|
1800
2069
|
var WEIGHTS = {
|
|
1801
|
-
//
|
|
1802
|
-
|
|
1803
|
-
|
|
1804
|
-
|
|
1805
|
-
clean_html: 0.1,
|
|
1806
|
-
entity_consistency: 0.1,
|
|
1807
|
-
robots_txt: 0.05,
|
|
1808
|
-
faq_section: 0.1,
|
|
2070
|
+
// ─── Content Substance (~55%) ─────────────────────────────────────────────
|
|
2071
|
+
// WHY an AI engine would cite you. These drive citation quality directly.
|
|
2072
|
+
topic_coherence: 0.14,
|
|
2073
|
+
// Topical authority - THE gating signal
|
|
1809
2074
|
original_data: 0.1,
|
|
1810
|
-
|
|
1811
|
-
|
|
1812
|
-
//
|
|
1813
|
-
|
|
1814
|
-
|
|
1815
|
-
|
|
1816
|
-
|
|
1817
|
-
|
|
1818
|
-
|
|
1819
|
-
|
|
1820
|
-
|
|
1821
|
-
|
|
1822
|
-
|
|
1823
|
-
|
|
1824
|
-
|
|
1825
|
-
|
|
1826
|
-
|
|
1827
|
-
|
|
1828
|
-
|
|
2075
|
+
// Unique value AI can't find elsewhere
|
|
2076
|
+
content_depth: 0.07,
|
|
2077
|
+
// Comprehensive vs thin coverage
|
|
2078
|
+
fact_density: 0.06,
|
|
2079
|
+
// Information density per page
|
|
2080
|
+
direct_answer_density: 0.05,
|
|
2081
|
+
// Direct answers to queries
|
|
2082
|
+
qa_content_format: 0.05,
|
|
2083
|
+
// Answer-shaped content structure
|
|
2084
|
+
query_answer_alignment: 0.05,
|
|
2085
|
+
// Relevance to actual AI queries
|
|
2086
|
+
faq_section: 0.04,
|
|
2087
|
+
// Structured Q&A pairs
|
|
2088
|
+
// ─── Content Organization (~30%) ──────────────────────────────────────────
|
|
2089
|
+
// HOW easily AI engines can extract and trust your content.
|
|
2090
|
+
entity_consistency: 0.05,
|
|
2091
|
+
// Brand authority and E-E-A-T
|
|
2092
|
+
internal_linking: 0.04,
|
|
2093
|
+
// Site structure and topic clusters
|
|
2094
|
+
content_freshness: 0.04,
|
|
2095
|
+
// Recency signals
|
|
2096
|
+
schema_markup: 0.03,
|
|
2097
|
+
// Structured data for discovery
|
|
2098
|
+
author_schema_depth: 0.03,
|
|
2099
|
+
// Expert attribution
|
|
2100
|
+
table_list_extractability: 0.03,
|
|
2101
|
+
// Extractable structured data
|
|
2102
|
+
definition_patterns: 0.02,
|
|
2103
|
+
// Clear definitions
|
|
2104
|
+
visible_date_signal: 0.02,
|
|
2105
|
+
// Publication date trust
|
|
2106
|
+
semantic_html: 0.02,
|
|
2107
|
+
// Clean semantic structure
|
|
2108
|
+
clean_html: 0.02,
|
|
2109
|
+
// Parseable markup
|
|
2110
|
+
// ─── Technical Plumbing (~15%) ────────────────────────────────────────────
|
|
2111
|
+
// WHETHER AI crawlers can find you. Table stakes with diminishing returns.
|
|
2112
|
+
content_cannibalization: 0.02,
|
|
2113
|
+
llms_txt: 0.02,
|
|
2114
|
+
robots_txt: 0.02,
|
|
2115
|
+
content_velocity: 0.02,
|
|
2116
|
+
content_licensing: 0.02,
|
|
2117
|
+
sitemap_completeness: 0.01,
|
|
2118
|
+
canonical_url: 0.01,
|
|
2119
|
+
rss_feed: 0.01,
|
|
2120
|
+
schema_coverage: 0.01,
|
|
2121
|
+
speakable_schema: 0.01
|
|
1829
2122
|
};
|
|
1830
2123
|
function calculateOverallScore(criteria) {
|
|
1831
2124
|
let totalWeight = 0;
|
|
1832
2125
|
let weightedSum = 0;
|
|
1833
2126
|
for (const c of criteria) {
|
|
1834
|
-
const weight = WEIGHTS[c.criterion] ?? 0.
|
|
2127
|
+
const weight = WEIGHTS[c.criterion] ?? 0.05;
|
|
1835
2128
|
weightedSum += c.score / 10 * weight * 100;
|
|
1836
2129
|
totalWeight += weight;
|
|
1837
2130
|
}
|
|
1838
2131
|
if (totalWeight === 0) return 0;
|
|
1839
|
-
|
|
2132
|
+
let score = Math.round(weightedSum / totalWeight);
|
|
2133
|
+
const coherence = criteria.find((c) => c.criterion === "topic_coherence");
|
|
2134
|
+
if (coherence && coherence.score < 6) {
|
|
2135
|
+
const cap2 = 35 + coherence.score * 5;
|
|
2136
|
+
score = Math.min(score, cap2);
|
|
2137
|
+
}
|
|
2138
|
+
return score;
|
|
1840
2139
|
}
|
|
1841
2140
|
|
|
1842
2141
|
// src/headless-fetch.ts
|
|
@@ -1950,7 +2249,9 @@ var CRITERION_LABELS = {
|
|
|
1950
2249
|
"Speakable Schema": "Speakable Schema",
|
|
1951
2250
|
"Query-Answer Alignment": "Query-Answer Alignment",
|
|
1952
2251
|
"Content Cannibalization": "Content Cannibalization",
|
|
1953
|
-
"Visible Date Signal": "Visible Date Signal"
|
|
2252
|
+
"Visible Date Signal": "Visible Date Signal",
|
|
2253
|
+
"Topic Coherence": "Topic Coherence",
|
|
2254
|
+
"Content Depth": "Content Depth"
|
|
1954
2255
|
};
|
|
1955
2256
|
function scoreToStatus(score) {
|
|
1956
2257
|
if (score === 0) return "MISSING";
|
|
@@ -2036,32 +2337,37 @@ function buildDetailedFindings(results) {
|
|
|
2036
2337
|
|
|
2037
2338
|
// src/narrative-generator.ts
|
|
2038
2339
|
var CRITERION_WEIGHTS = {
|
|
2039
|
-
|
|
2040
|
-
|
|
2041
|
-
qa_content_format: 0.15,
|
|
2042
|
-
clean_html: 0.1,
|
|
2043
|
-
entity_consistency: 0.1,
|
|
2044
|
-
robots_txt: 0.05,
|
|
2045
|
-
faq_section: 0.1,
|
|
2340
|
+
// Content Substance (~55%)
|
|
2341
|
+
topic_coherence: 0.14,
|
|
2046
2342
|
original_data: 0.1,
|
|
2047
|
-
|
|
2048
|
-
|
|
2049
|
-
|
|
2050
|
-
|
|
2051
|
-
|
|
2052
|
-
|
|
2053
|
-
|
|
2054
|
-
|
|
2055
|
-
|
|
2056
|
-
|
|
2057
|
-
|
|
2058
|
-
|
|
2059
|
-
|
|
2060
|
-
|
|
2061
|
-
|
|
2062
|
-
|
|
2063
|
-
|
|
2064
|
-
|
|
2343
|
+
content_depth: 0.07,
|
|
2344
|
+
fact_density: 0.06,
|
|
2345
|
+
direct_answer_density: 0.05,
|
|
2346
|
+
qa_content_format: 0.05,
|
|
2347
|
+
query_answer_alignment: 0.05,
|
|
2348
|
+
faq_section: 0.04,
|
|
2349
|
+
// Content Organization (~30%)
|
|
2350
|
+
entity_consistency: 0.05,
|
|
2351
|
+
internal_linking: 0.04,
|
|
2352
|
+
content_freshness: 0.04,
|
|
2353
|
+
schema_markup: 0.03,
|
|
2354
|
+
author_schema_depth: 0.03,
|
|
2355
|
+
table_list_extractability: 0.03,
|
|
2356
|
+
definition_patterns: 0.02,
|
|
2357
|
+
visible_date_signal: 0.02,
|
|
2358
|
+
semantic_html: 0.02,
|
|
2359
|
+
clean_html: 0.02,
|
|
2360
|
+
// Technical Plumbing (~15%)
|
|
2361
|
+
content_cannibalization: 0.02,
|
|
2362
|
+
llms_txt: 0.02,
|
|
2363
|
+
robots_txt: 0.02,
|
|
2364
|
+
content_velocity: 0.02,
|
|
2365
|
+
content_licensing: 0.02,
|
|
2366
|
+
sitemap_completeness: 0.01,
|
|
2367
|
+
canonical_url: 0.01,
|
|
2368
|
+
rss_feed: 0.01,
|
|
2369
|
+
schema_coverage: 0.01,
|
|
2370
|
+
speakable_schema: 0.01
|
|
2065
2371
|
};
|
|
2066
2372
|
var OPPORTUNITY_TEMPLATES = {
|
|
2067
2373
|
llms_txt: {
|
|
@@ -2193,6 +2499,16 @@ var OPPORTUNITY_TEMPLATES = {
|
|
|
2193
2499
|
name: "Add Visible Date Signals",
|
|
2194
2500
|
effort: "Low",
|
|
2195
2501
|
description: "Display publication/modification dates visibly using <time> elements and add datePublished/dateModified to JSON-LD schema."
|
|
2502
|
+
},
|
|
2503
|
+
topic_coherence: {
|
|
2504
|
+
name: "Focus Content on Core Topics",
|
|
2505
|
+
effort: "High",
|
|
2506
|
+
description: 'Ensure blog content consistently covers your core expertise areas rather than scattering across unrelated topics. AI engines build authority models - a site about "Medicare coverage" that also publishes about humidifiers and groceries dilutes its topical authority.'
|
|
2507
|
+
},
|
|
2508
|
+
content_depth: {
|
|
2509
|
+
name: "Increase Content Depth",
|
|
2510
|
+
effort: "Medium",
|
|
2511
|
+
description: "Expand articles to 1000+ words with structured H2/H3 sections, comparison tables, and expert analysis. Thin content (under 300 words) is rarely cited by AI engines. Deep, well-structured articles demonstrate expertise."
|
|
2196
2512
|
}
|
|
2197
2513
|
};
|
|
2198
2514
|
function calculateImpact(score, weight, effort) {
|
|
@@ -2314,7 +2630,7 @@ function generatePitchNumbers(score, rawData, scorecard) {
|
|
|
2314
2630
|
const passing = scorecard.filter((s) => s.score >= 7).length;
|
|
2315
2631
|
metrics.push({
|
|
2316
2632
|
metric: "Criteria Passing",
|
|
2317
|
-
value: `${passing}/
|
|
2633
|
+
value: `${passing}/28`,
|
|
2318
2634
|
significance: passing >= 18 ? "Excellent coverage across AEO dimensions" : passing >= 12 ? "Good foundation with room to improve remaining criteria" : `${26 - passing} criteria need attention for full AI visibility`
|
|
2319
2635
|
});
|
|
2320
2636
|
return metrics;
|
|
@@ -2506,20 +2822,23 @@ async function fetchMultiPageData(siteData, options) {
|
|
|
2506
2822
|
|
|
2507
2823
|
// src/page-scorer.ts
|
|
2508
2824
|
var PAGE_CRITERIA = {
|
|
2509
|
-
|
|
2510
|
-
qa_content_format: { weight: 0.15, label: "Q&A Content Format" },
|
|
2511
|
-
clean_html: { weight: 0.1, label: "Clean, Crawlable HTML" },
|
|
2512
|
-
faq_section: { weight: 0.1, label: "FAQ Section Content" },
|
|
2825
|
+
// Content Substance
|
|
2513
2826
|
original_data: { weight: 0.1, label: "Original Data & Expert Content" },
|
|
2514
|
-
|
|
2515
|
-
|
|
2516
|
-
|
|
2517
|
-
|
|
2518
|
-
|
|
2519
|
-
|
|
2520
|
-
|
|
2521
|
-
|
|
2522
|
-
|
|
2827
|
+
fact_density: { weight: 0.06, label: "Fact & Data Density" },
|
|
2828
|
+
direct_answer_density: { weight: 0.05, label: "Direct Answer Paragraphs" },
|
|
2829
|
+
qa_content_format: { weight: 0.05, label: "Q&A Content Format" },
|
|
2830
|
+
query_answer_alignment: { weight: 0.05, label: "Query-Answer Alignment" },
|
|
2831
|
+
faq_section: { weight: 0.04, label: "FAQ Section Content" },
|
|
2832
|
+
// Content Organization
|
|
2833
|
+
content_freshness: { weight: 0.04, label: "Content Freshness Signals" },
|
|
2834
|
+
schema_markup: { weight: 0.03, label: "Schema.org Structured Data" },
|
|
2835
|
+
table_list_extractability: { weight: 0.03, label: "Table & List Extractability" },
|
|
2836
|
+
definition_patterns: { weight: 0.02, label: "Definition Patterns" },
|
|
2837
|
+
visible_date_signal: { weight: 0.02, label: "Visible Date Signal" },
|
|
2838
|
+
semantic_html: { weight: 0.02, label: "Semantic HTML5 & Accessibility" },
|
|
2839
|
+
clean_html: { weight: 0.02, label: "Clean, Crawlable HTML" },
|
|
2840
|
+
// Technical Plumbing
|
|
2841
|
+
canonical_url: { weight: 0.01, label: "Canonical URL Strategy" }
|
|
2523
2842
|
};
|
|
2524
2843
|
function extractJsonLdBlocks(html) {
|
|
2525
2844
|
const blocks = [];
|
|
@@ -2845,7 +3164,7 @@ function extractTitle(html) {
|
|
|
2845
3164
|
function getTextContent2(html) {
|
|
2846
3165
|
return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
|
|
2847
3166
|
}
|
|
2848
|
-
function
|
|
3167
|
+
function countWords2(text) {
|
|
2849
3168
|
if (!text) return 0;
|
|
2850
3169
|
return text.split(/\s+/).filter((w) => w.length > 0).length;
|
|
2851
3170
|
}
|
|
@@ -2996,7 +3315,7 @@ function checkHasQuestionHeadings(html) {
|
|
|
2996
3315
|
function analyzePage(html, url, category) {
|
|
2997
3316
|
const title = extractTitle(html);
|
|
2998
3317
|
const textContent = getTextContent2(html);
|
|
2999
|
-
const wordCount =
|
|
3318
|
+
const wordCount = countWords2(textContent);
|
|
3000
3319
|
const issues = [];
|
|
3001
3320
|
const strengths = [];
|
|
3002
3321
|
const issueChecks = [
|
|
@@ -3076,7 +3395,7 @@ async function audit(domain, options) {
|
|
|
3076
3395
|
}
|
|
3077
3396
|
}
|
|
3078
3397
|
if (options?.fullCrawl) {
|
|
3079
|
-
const { crawlFullSite } = await import("./full-site-crawler-
|
|
3398
|
+
const { crawlFullSite } = await import("./full-site-crawler-FQYO46YV.js");
|
|
3080
3399
|
const crawlResult = await crawlFullSite(siteData, {
|
|
3081
3400
|
maxPages: options.maxPages ?? 200,
|
|
3082
3401
|
concurrency: options.concurrency ?? 5
|