aeorank 1.6.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +110 -39
- package/dist/browser.d.ts +2 -2
- package/dist/browser.js +500 -125
- package/dist/browser.js.map +1 -1
- package/dist/{chunk-3IJISYWT.js → chunk-PKJIKMLV.js} +2 -2
- package/dist/chunk-PKJIKMLV.js.map +1 -0
- package/dist/cli.js +415 -96
- package/dist/cli.js.map +1 -1
- package/dist/{full-site-crawler-F7J2HRL4.js → full-site-crawler-FQYO46YV.js} +2 -2
- package/dist/full-site-crawler-FQYO46YV.js.map +1 -0
- package/dist/{full-site-crawler-VFARFR2C.js → full-site-crawler-UIOMKOZA.js} +2 -2
- package/dist/index.cjs +499 -124
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.js +500 -125
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
- package/dist/chunk-3IJISYWT.js.map +0 -1
- package/dist/full-site-crawler-F7J2HRL4.js.map +0 -1
- /package/dist/{full-site-crawler-VFARFR2C.js.map → full-site-crawler-UIOMKOZA.js.map} +0 -0
package/dist/browser.js
CHANGED
|
@@ -3,7 +3,7 @@ import {
|
|
|
3
3
|
extractAllUrlsFromSitemap,
|
|
4
4
|
extractInternalLinks,
|
|
5
5
|
inferCategory
|
|
6
|
-
} from "./chunk-
|
|
6
|
+
} from "./chunk-PKJIKMLV.js";
|
|
7
7
|
|
|
8
8
|
// src/parked-domain.ts
|
|
9
9
|
var PARKING_PATHS = ["/lander", "/parking", "/park", "/sedoparking"];
|
|
@@ -195,7 +195,7 @@ async function prefetchSiteData(domain) {
|
|
|
195
195
|
sitemapForBlog = subSitemap.text;
|
|
196
196
|
}
|
|
197
197
|
}
|
|
198
|
-
const blogUrls = extractBlogUrlsFromSitemap(sitemapForBlog, domain,
|
|
198
|
+
const blogUrls = extractBlogUrlsFromSitemap(sitemapForBlog, domain, 50);
|
|
199
199
|
if (blogUrls.length > 0) {
|
|
200
200
|
const fetched = await Promise.all(blogUrls.map((url) => fetchText(url)));
|
|
201
201
|
blogSample = fetched.filter(
|
|
@@ -552,15 +552,17 @@ function checkOriginalData(data) {
|
|
|
552
552
|
findings.push({ severity: "critical", detail: "Could not fetch homepage" });
|
|
553
553
|
return { criterion: "original_data", criterion_label: "Original Data & Expert Content", score: 0, status: "not_found", findings, fix_priority: "P2" };
|
|
554
554
|
}
|
|
555
|
+
const allPages = [data.homepage, ...data.blogSample || []].filter(Boolean);
|
|
555
556
|
const html = data.homepage.text;
|
|
556
|
-
const
|
|
557
|
+
const allText = allPages.map((p) => p.text.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ")).join(" ");
|
|
558
|
+
const text = data.homepage.text.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ");
|
|
557
559
|
let score = 0;
|
|
558
560
|
const statPatterns = /\d+%|\d+\s*(patients|clients|customers|cases|years|professionals|specialists|companies|users|businesses|domains|audits)/i;
|
|
559
|
-
if (statPatterns.test(
|
|
561
|
+
if (statPatterns.test(allText)) {
|
|
560
562
|
const researchContext = /\b(our\s+(?:study|analysis|research|data|survey|findings|report)|we\s+(?:surveyed|analyzed|studied|measured|tracked)|proprietary|methodology|original\s+research)\b/i;
|
|
561
|
-
if (researchContext.test(
|
|
563
|
+
if (researchContext.test(allText)) {
|
|
562
564
|
score += 3;
|
|
563
|
-
findings.push({ severity: "info", detail: "Proprietary statistics with research context found
|
|
565
|
+
findings.push({ severity: "info", detail: "Proprietary statistics with research context found" });
|
|
564
566
|
} else {
|
|
565
567
|
score += 1;
|
|
566
568
|
findings.push({ severity: "low", detail: 'Statistics found but without research context (e.g., "500+ clients")', fix: 'Add context about your methodology: "Our analysis of X found..." or "We surveyed Y..."' });
|
|
@@ -1075,20 +1077,24 @@ function checkFactDensity(data) {
|
|
|
1075
1077
|
findings.push({ severity: "critical", detail: "Could not fetch homepage" });
|
|
1076
1078
|
return { criterion: "fact_density", criterion_label: "Fact & Data Density", score: 0, status: "not_found", findings, fix_priority: "P2" };
|
|
1077
1079
|
}
|
|
1078
|
-
const
|
|
1080
|
+
const allPages = [data.homepage, ...data.blogSample || []].filter(Boolean);
|
|
1081
|
+
const allText = allPages.map((p) => p.text.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ")).join(" ");
|
|
1082
|
+
const text = allText;
|
|
1083
|
+
const pageCount = allPages.length;
|
|
1079
1084
|
let score = 0;
|
|
1080
1085
|
const dataPoints = text.match(/\d+(?:\.\d+)?(?:\s*%|\s*\$|\s*USD|\s*EUR)/g) || [];
|
|
1081
1086
|
const countPhrases = text.match(/\d+(?:,\d{3})*\+?\s+(?:users?|clients?|customers?|companies|businesses|patients?|members?|employees?|projects?|downloads?)/gi) || [];
|
|
1082
1087
|
const totalDataPoints = dataPoints.length + countPhrases.length;
|
|
1083
|
-
|
|
1088
|
+
const avgPerPage = pageCount > 0 ? totalDataPoints / pageCount : 0;
|
|
1089
|
+
if (avgPerPage >= 4) {
|
|
1084
1090
|
score += 5;
|
|
1085
|
-
findings.push({ severity: "info", detail: `${totalDataPoints} quantitative data points found
|
|
1086
|
-
} else if (
|
|
1091
|
+
findings.push({ severity: "info", detail: `${totalDataPoints} quantitative data points found across ${pageCount} pages (avg ${avgPerPage.toFixed(1)}/page)` });
|
|
1092
|
+
} else if (avgPerPage >= 2) {
|
|
1087
1093
|
score += 3;
|
|
1088
|
-
findings.push({ severity: "info", detail: `${totalDataPoints} quantitative data points found` });
|
|
1094
|
+
findings.push({ severity: "info", detail: `${totalDataPoints} quantitative data points found across ${pageCount} pages` });
|
|
1089
1095
|
} else if (totalDataPoints >= 1) {
|
|
1090
1096
|
score += 1;
|
|
1091
|
-
findings.push({ severity: "low", detail: `Only ${totalDataPoints} quantitative data point(s) found`, fix: "Add more specific numbers, percentages, and metrics to strengthen credibility" });
|
|
1097
|
+
findings.push({ severity: "low", detail: `Only ${totalDataPoints} quantitative data point(s) found across ${pageCount} pages`, fix: "Add more specific numbers, percentages, and metrics to strengthen credibility" });
|
|
1092
1098
|
} else {
|
|
1093
1099
|
findings.push({ severity: "high", detail: "No quantitative data points found", fix: "Add specific statistics (percentages, counts, comparisons) that AI engines can cite" });
|
|
1094
1100
|
}
|
|
@@ -1194,9 +1200,9 @@ function countRecentSitemapDates(sitemapText) {
|
|
|
1194
1200
|
distinctRecentDays: recentDays.size
|
|
1195
1201
|
};
|
|
1196
1202
|
}
|
|
1197
|
-
var BLOG_PATH_PATTERNS = /\/(?:blog|articles?|insights?|guides?|resources?|news|posts?|learn|help|how-?to|tutorials?|case-stud|whitepapers?)\b/i;
|
|
1203
|
+
var BLOG_PATH_PATTERNS = /\/(?:[^/]*-?)?(?:blog|articles?|insights?|guides?|resources?|news|posts?|learn|help|how-?to|tutorials?|case-stud|whitepapers?)\b/i;
|
|
1198
1204
|
var EXCLUDE_PATH_PATTERNS = /\/(?:tag|category|author|page|feed|wp-content|wp-admin|wp-json|cart|checkout|login|search|api|static|assets|_next)\b/i;
|
|
1199
|
-
function extractBlogUrlsFromSitemap(sitemapText, domain, limit =
|
|
1205
|
+
function extractBlogUrlsFromSitemap(sitemapText, domain, limit = 50) {
|
|
1200
1206
|
const urlBlocks = sitemapText.match(/<url>([\s\S]*?)<\/url>/gi) || [];
|
|
1201
1207
|
const candidates = [];
|
|
1202
1208
|
const cleanDomain = domain.replace(/^www\./, "").toLowerCase();
|
|
@@ -1492,7 +1498,7 @@ function jaccardSimilarity(a, b) {
|
|
|
1492
1498
|
const union = a.size + b.size - intersection;
|
|
1493
1499
|
return union === 0 ? 0 : intersection / union;
|
|
1494
1500
|
}
|
|
1495
|
-
function checkContentCannibalization(data) {
|
|
1501
|
+
function checkContentCannibalization(data, topicCoherenceScore) {
|
|
1496
1502
|
const findings = [];
|
|
1497
1503
|
if (!data.homepage) {
|
|
1498
1504
|
findings.push({ severity: "critical", detail: "No homepage available for cannibalization analysis" });
|
|
@@ -1502,7 +1508,7 @@ function checkContentCannibalization(data) {
|
|
|
1502
1508
|
{ html: data.homepage.text, url: data.homepage.finalUrl || `https://${data.domain}/` }
|
|
1503
1509
|
];
|
|
1504
1510
|
if (data.blogSample) {
|
|
1505
|
-
for (const page of data.blogSample
|
|
1511
|
+
for (const page of data.blogSample) {
|
|
1506
1512
|
pages.push({ html: page.text, url: page.finalUrl || "" });
|
|
1507
1513
|
}
|
|
1508
1514
|
}
|
|
@@ -1512,10 +1518,29 @@ function checkContentCannibalization(data) {
|
|
|
1512
1518
|
}
|
|
1513
1519
|
const pageTitles = pages.map((p) => ({ title: extractPageTitle(p.html), url: p.url }));
|
|
1514
1520
|
const wordSets = pageTitles.map((p) => titleToWordSet(p.title));
|
|
1521
|
+
const termPageCount = /* @__PURE__ */ new Map();
|
|
1522
|
+
for (const ws of wordSets) {
|
|
1523
|
+
for (const w of ws) {
|
|
1524
|
+
termPageCount.set(w, (termPageCount.get(w) || 0) + 1);
|
|
1525
|
+
}
|
|
1526
|
+
}
|
|
1527
|
+
const commonTermThreshold = Math.max(3, pages.length * 0.4);
|
|
1528
|
+
const siteThemeTerms = /* @__PURE__ */ new Set();
|
|
1529
|
+
for (const [term, count] of termPageCount) {
|
|
1530
|
+
if (count >= commonTermThreshold) siteThemeTerms.add(term);
|
|
1531
|
+
}
|
|
1532
|
+
const filteredSets = wordSets.map((ws) => {
|
|
1533
|
+
const filtered = /* @__PURE__ */ new Set();
|
|
1534
|
+
for (const w of ws) {
|
|
1535
|
+
if (!siteThemeTerms.has(w)) filtered.add(w);
|
|
1536
|
+
}
|
|
1537
|
+
return filtered;
|
|
1538
|
+
});
|
|
1515
1539
|
const cannibalPairs = [];
|
|
1516
1540
|
for (let i = 0; i < pages.length; i++) {
|
|
1517
1541
|
for (let j = i + 1; j < pages.length; j++) {
|
|
1518
|
-
|
|
1542
|
+
if (filteredSets[i].size === 0 && filteredSets[j].size === 0) continue;
|
|
1543
|
+
const sim = jaccardSimilarity(filteredSets[i], filteredSets[j]);
|
|
1519
1544
|
if (sim > 0.6) {
|
|
1520
1545
|
cannibalPairs.push({
|
|
1521
1546
|
urlA: pageTitles[i].url.slice(0, 60),
|
|
@@ -1525,23 +1550,39 @@ function checkContentCannibalization(data) {
|
|
|
1525
1550
|
}
|
|
1526
1551
|
}
|
|
1527
1552
|
}
|
|
1553
|
+
const cannibalUrls = /* @__PURE__ */ new Set();
|
|
1554
|
+
for (const pair of cannibalPairs) {
|
|
1555
|
+
cannibalUrls.add(pair.urlA);
|
|
1556
|
+
cannibalUrls.add(pair.urlB);
|
|
1557
|
+
}
|
|
1558
|
+
const cannibalRatio = pages.length > 0 ? cannibalUrls.size / pages.length : 0;
|
|
1528
1559
|
let score;
|
|
1529
1560
|
if (cannibalPairs.length === 0) {
|
|
1530
1561
|
score = 10;
|
|
1531
1562
|
findings.push({ severity: "info", detail: `${pages.length} pages analyzed - no content cannibalization detected` });
|
|
1532
|
-
} else if (
|
|
1533
|
-
score =
|
|
1534
|
-
findings.push({ severity: "
|
|
1535
|
-
} else if (
|
|
1563
|
+
} else if (cannibalRatio <= 0.05) {
|
|
1564
|
+
score = 9;
|
|
1565
|
+
findings.push({ severity: "info", detail: `${cannibalPairs.length} pair(s) of pages with minor topic overlap (${cannibalUrls.size}/${pages.length} pages affected)` });
|
|
1566
|
+
} else if (cannibalRatio <= 0.1) {
|
|
1567
|
+
score = 7;
|
|
1568
|
+
findings.push({ severity: "low", detail: `${cannibalUrls.size} pages (${Math.round(cannibalRatio * 100)}%) have overlapping topics`, fix: "Differentiate titles and H1 headings to reduce topic overlap" });
|
|
1569
|
+
} else if (cannibalRatio <= 0.2) {
|
|
1536
1570
|
score = 5;
|
|
1537
|
-
findings.push({ severity: "medium", detail: `${
|
|
1571
|
+
findings.push({ severity: "medium", detail: `${cannibalUrls.size} pages (${Math.round(cannibalRatio * 100)}%) competing for overlapping topics`, fix: "Consolidate overlapping pages or differentiate their titles and content focus" });
|
|
1572
|
+
} else if (cannibalRatio <= 0.4) {
|
|
1573
|
+
score = 3;
|
|
1574
|
+
findings.push({ severity: "medium", detail: `${cannibalUrls.size} pages (${Math.round(cannibalRatio * 100)}%) have significant content overlap`, fix: "Many pages compete for the same topics - consolidate or clearly differentiate them" });
|
|
1538
1575
|
} else {
|
|
1539
1576
|
score = 0;
|
|
1540
|
-
findings.push({ severity: "high", detail: `${
|
|
1577
|
+
findings.push({ severity: "high", detail: `${cannibalUrls.size} pages (${Math.round(cannibalRatio * 100)}%) competing for the same topics`, fix: "Severe content cannibalization - consolidate overlapping pages or create clear topic differentiation" });
|
|
1541
1578
|
}
|
|
1542
1579
|
for (const pair of cannibalPairs.slice(0, 3)) {
|
|
1543
1580
|
findings.push({ severity: "low", detail: `Overlap (${pair.similarity}%): ${pair.urlA} vs ${pair.urlB}` });
|
|
1544
1581
|
}
|
|
1582
|
+
if (topicCoherenceScore !== void 0 && topicCoherenceScore <= 4 && score >= 8) {
|
|
1583
|
+
score = 6;
|
|
1584
|
+
findings.push({ severity: "low", detail: "Low topic overlap but content lacks coherent focus - not a strong signal for AI authority", fix: "Focus content on fewer core topics to build topical authority that AI engines can identify" });
|
|
1585
|
+
}
|
|
1545
1586
|
return { criterion: "content_cannibalization", criterion_label: "Content Cannibalization", score, status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P1" };
|
|
1546
1587
|
}
|
|
1547
1588
|
function checkVisibleDateSignal(data) {
|
|
@@ -1767,7 +1808,233 @@ function extractRawDataSummary(data) {
|
|
|
1767
1808
|
crawl_skipped: data.crawlStats?.skipped ?? 0
|
|
1768
1809
|
};
|
|
1769
1810
|
}
|
|
1811
|
+
function getPageTopicText(html) {
|
|
1812
|
+
const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
|
|
1813
|
+
const h1Match = html.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
|
|
1814
|
+
return [
|
|
1815
|
+
titleMatch?.[1] || "",
|
|
1816
|
+
h1Match?.[1]?.replace(/<[^>]*>/g, "") || ""
|
|
1817
|
+
].join(" ").toLowerCase().trim();
|
|
1818
|
+
}
|
|
1819
|
+
function extractBigrams(text) {
|
|
1820
|
+
const words = text.split(/[\s,.!?;:()\[\]{}"'\/&]+/).filter((w) => w.length > 2 && !STOP_WORDS.has(w) && !/^\d+$/.test(w));
|
|
1821
|
+
const bigrams = [];
|
|
1822
|
+
for (let i = 0; i < words.length - 1; i++) {
|
|
1823
|
+
bigrams.push(words[i] + " " + words[i + 1]);
|
|
1824
|
+
}
|
|
1825
|
+
return bigrams;
|
|
1826
|
+
}
|
|
1827
|
+
function checkTopicCoherence(data) {
|
|
1828
|
+
const findings = [];
|
|
1829
|
+
if (!data.homepage) {
|
|
1830
|
+
findings.push({ severity: "critical", detail: "Could not fetch homepage" });
|
|
1831
|
+
return { criterion: "topic_coherence", criterion_label: "Topic Coherence", score: 0, status: "not_found", findings, fix_priority: "P0" };
|
|
1832
|
+
}
|
|
1833
|
+
if (!data.blogSample || data.blogSample.length < 3) {
|
|
1834
|
+
findings.push({ severity: "info", detail: `Only ${data.blogSample?.length || 0} blog pages found - insufficient for topic coherence analysis` });
|
|
1835
|
+
return { criterion: "topic_coherence", criterion_label: "Topic Coherence", score: 5, status: "partial", findings, fix_priority: "P2" };
|
|
1836
|
+
}
|
|
1837
|
+
const blogPages = data.blogSample;
|
|
1838
|
+
const domainBase = data.domain.replace(/^www\./, "").replace(/\.(com|org|net|io|co|ai)$/i, "").toLowerCase();
|
|
1839
|
+
const brandWords = /* @__PURE__ */ new Set();
|
|
1840
|
+
brandWords.add(domainBase);
|
|
1841
|
+
for (const part of domainBase.split(/[-_]/)) {
|
|
1842
|
+
if (part.length > 2) brandWords.add(part);
|
|
1843
|
+
}
|
|
1844
|
+
const rawTermFreq = /* @__PURE__ */ new Map();
|
|
1845
|
+
const pageTitleTexts = [];
|
|
1846
|
+
for (const page of blogPages) {
|
|
1847
|
+
const topicText = getPageTopicText(page.text);
|
|
1848
|
+
pageTitleTexts.push(topicText);
|
|
1849
|
+
const words = topicText.split(/[\s,.!?;:()\[\]{}"'\/&]+/).filter((w) => w.length > 2 && !STOP_WORDS.has(w) && !/^\d+$/.test(w));
|
|
1850
|
+
const uniqueWords = new Set(words);
|
|
1851
|
+
for (const w of uniqueWords) {
|
|
1852
|
+
rawTermFreq.set(w, (rawTermFreq.get(w) || 0) + 1);
|
|
1853
|
+
}
|
|
1854
|
+
}
|
|
1855
|
+
for (const [term, count] of rawTermFreq) {
|
|
1856
|
+
if (count / blogPages.length >= 0.8 && domainBase.includes(term)) {
|
|
1857
|
+
brandWords.add(term);
|
|
1858
|
+
}
|
|
1859
|
+
}
|
|
1860
|
+
const termFreq = /* @__PURE__ */ new Map();
|
|
1861
|
+
for (const page of blogPages) {
|
|
1862
|
+
const topicText = getPageTopicText(page.text);
|
|
1863
|
+
const words = topicText.split(/[\s,.!?;:()\[\]{}"'\/&]+/).filter((w) => w.length > 2 && !STOP_WORDS.has(w) && !/^\d+$/.test(w) && !brandWords.has(w));
|
|
1864
|
+
const uniqueWords = new Set(words);
|
|
1865
|
+
for (const w of uniqueWords) {
|
|
1866
|
+
termFreq.set(w, (termFreq.get(w) || 0) + 1);
|
|
1867
|
+
}
|
|
1868
|
+
}
|
|
1869
|
+
const sortedTerms = [...termFreq.entries()].sort((a, b) => b[1] - a[1]);
|
|
1870
|
+
const topTerm = sortedTerms[0];
|
|
1871
|
+
const bigramFreq = /* @__PURE__ */ new Map();
|
|
1872
|
+
const pageBigrams = [];
|
|
1873
|
+
for (const topicText of pageTitleTexts) {
|
|
1874
|
+
const bigrams = extractBigrams(topicText).filter((bg) => !bg.split(" ").some((w) => brandWords.has(w)));
|
|
1875
|
+
pageBigrams.push(bigrams);
|
|
1876
|
+
const uniqueBigrams = new Set(bigrams);
|
|
1877
|
+
for (const bg of uniqueBigrams) {
|
|
1878
|
+
bigramFreq.set(bg, (bigramFreq.get(bg) || 0) + 1);
|
|
1879
|
+
}
|
|
1880
|
+
}
|
|
1881
|
+
const sortedBigrams = [...bigramFreq.entries()].sort((a, b) => b[1] - a[1]);
|
|
1882
|
+
const topBigram = sortedBigrams[0];
|
|
1883
|
+
const significantBigrams = sortedBigrams.filter(([, count]) => count >= 2);
|
|
1884
|
+
const clusterRoots = [];
|
|
1885
|
+
const assigned = /* @__PURE__ */ new Set();
|
|
1886
|
+
for (const [bg] of significantBigrams) {
|
|
1887
|
+
if (assigned.has(bg)) continue;
|
|
1888
|
+
clusterRoots.push(bg);
|
|
1889
|
+
assigned.add(bg);
|
|
1890
|
+
const [w1, w2] = bg.split(" ");
|
|
1891
|
+
for (const [otherBg] of significantBigrams) {
|
|
1892
|
+
if (assigned.has(otherBg)) continue;
|
|
1893
|
+
if (otherBg.includes(w1) || otherBg.includes(w2)) {
|
|
1894
|
+
assigned.add(otherBg);
|
|
1895
|
+
}
|
|
1896
|
+
}
|
|
1897
|
+
}
|
|
1898
|
+
const topicClusterCount = clusterRoots.length;
|
|
1899
|
+
const dominantTerm = topTerm?.[0] || "";
|
|
1900
|
+
const dominantTermCount = topTerm?.[1] || 0;
|
|
1901
|
+
const focusRatio = blogPages.length > 0 ? dominantTermCount / blogPages.length : 0;
|
|
1902
|
+
const dominantBigram = topBigram?.[0] || "";
|
|
1903
|
+
const dominantBigramCount = topBigram?.[1] || 0;
|
|
1904
|
+
const bigramFocusRatio = blogPages.length > 0 ? dominantBigramCount / blogPages.length : 0;
|
|
1905
|
+
let score = 0;
|
|
1906
|
+
const bestFocusRatio = Math.max(focusRatio, bigramFocusRatio);
|
|
1907
|
+
if (bestFocusRatio >= 0.8) {
|
|
1908
|
+
score += 7;
|
|
1909
|
+
} else if (bestFocusRatio >= 0.6) {
|
|
1910
|
+
score += 6;
|
|
1911
|
+
} else if (bestFocusRatio >= 0.45) {
|
|
1912
|
+
score += 5;
|
|
1913
|
+
} else if (bestFocusRatio >= 0.3) {
|
|
1914
|
+
score += 3;
|
|
1915
|
+
} else if (bestFocusRatio >= 0.15) {
|
|
1916
|
+
score += 2;
|
|
1917
|
+
} else {
|
|
1918
|
+
score += 1;
|
|
1919
|
+
}
|
|
1920
|
+
const clusterPenaltyReduced = focusRatio >= 0.7;
|
|
1921
|
+
if (topicClusterCount <= 3) {
|
|
1922
|
+
score += 3;
|
|
1923
|
+
findings.push({ severity: "info", detail: `${topicClusterCount} topic cluster(s) - tightly focused content` });
|
|
1924
|
+
} else if (topicClusterCount <= 6) {
|
|
1925
|
+
score += clusterPenaltyReduced ? 2 : 1;
|
|
1926
|
+
findings.push({ severity: "info", detail: `${topicClusterCount} topic clusters${clusterPenaltyReduced ? " within a focused niche" : " - moderately focused"}` });
|
|
1927
|
+
} else if (topicClusterCount <= 10) {
|
|
1928
|
+
score += clusterPenaltyReduced ? 1 : 0;
|
|
1929
|
+
if (!clusterPenaltyReduced) {
|
|
1930
|
+
findings.push({ severity: "low", detail: `${topicClusterCount} topic clusters - scattered content`, fix: "Reduce the number of distinct topics. Focus blog content on 2-3 core expertise areas." });
|
|
1931
|
+
} else {
|
|
1932
|
+
findings.push({ severity: "info", detail: `${topicClusterCount} topic clusters but strong core topic focus (${Math.round(focusRatio * 100)}%)` });
|
|
1933
|
+
}
|
|
1934
|
+
} else {
|
|
1935
|
+
score += clusterPenaltyReduced ? 0 : -2;
|
|
1936
|
+
if (!clusterPenaltyReduced) {
|
|
1937
|
+
findings.push({ severity: "medium", detail: `${topicClusterCount} topic clusters - highly scattered content`, fix: "Content covers too many unrelated topics. AI engines cannot identify your expertise. Focus on your core niche." });
|
|
1938
|
+
} else {
|
|
1939
|
+
findings.push({ severity: "low", detail: `${topicClusterCount} topic clusters despite strong core topic focus`, fix: "Consider narrowing subtopics within your niche for even stronger AI visibility." });
|
|
1940
|
+
}
|
|
1941
|
+
}
|
|
1942
|
+
score = Math.max(0, Math.min(10, score));
|
|
1943
|
+
if (dominantTerm) {
|
|
1944
|
+
const focusPct = Math.round(focusRatio * 100);
|
|
1945
|
+
findings.push({ severity: "info", detail: `Dominant topic term: "${dominantTerm}" (${focusPct}% of ${blogPages.length} pages)` });
|
|
1946
|
+
}
|
|
1947
|
+
if (dominantBigram && dominantBigramCount >= 2) {
|
|
1948
|
+
findings.push({ severity: "info", detail: `Dominant topic phrase: "${dominantBigram}" (${dominantBigramCount}/${blogPages.length} pages)` });
|
|
1949
|
+
}
|
|
1950
|
+
const offTopicExamples = [];
|
|
1951
|
+
for (let i = 0; i < pageTitleTexts.length && offTopicExamples.length < 3; i++) {
|
|
1952
|
+
if (dominantTerm && !pageTitleTexts[i].includes(dominantTerm)) {
|
|
1953
|
+
const title = blogPages[i].text.match(/<title[^>]*>([^<]+)<\/title>/i)?.[1]?.trim();
|
|
1954
|
+
if (title && title.length > 3) offTopicExamples.push(title.slice(0, 60));
|
|
1955
|
+
}
|
|
1956
|
+
}
|
|
1957
|
+
if (offTopicExamples.length > 0 && score < 8) {
|
|
1958
|
+
findings.push({ severity: "low", detail: `Off-topic examples: ${offTopicExamples.join("; ")}` });
|
|
1959
|
+
}
|
|
1960
|
+
return { criterion: "topic_coherence", criterion_label: "Topic Coherence", score, status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P0" };
|
|
1961
|
+
}
|
|
1962
|
+
function countWords(html) {
|
|
1963
|
+
const text = html.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, "").replace(/<style[^>]*>[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
|
|
1964
|
+
return text.split(/\s+/).filter((w) => w.length > 0).length;
|
|
1965
|
+
}
|
|
1966
|
+
function countHeadings(html) {
|
|
1967
|
+
const headings = html.match(/<h[2-6][^>]*>/gi) || [];
|
|
1968
|
+
return headings.length;
|
|
1969
|
+
}
|
|
1970
|
+
function checkContentDepth(data, topicCoherenceScore) {
|
|
1971
|
+
const findings = [];
|
|
1972
|
+
if (!data.blogSample || data.blogSample.length < 2) {
|
|
1973
|
+
findings.push({ severity: "info", detail: `Only ${data.blogSample?.length || 0} blog pages found - insufficient for depth analysis` });
|
|
1974
|
+
return { criterion: "content_depth", criterion_label: "Content Depth", score: 3, status: "partial", findings, fix_priority: "P2" };
|
|
1975
|
+
}
|
|
1976
|
+
const blogPages = data.blogSample;
|
|
1977
|
+
const wordCounts = blogPages.map((p) => countWords(p.text));
|
|
1978
|
+
const headingCounts = blogPages.map((p) => countHeadings(p.text));
|
|
1979
|
+
const avgWords = wordCounts.reduce((a, b) => a + b, 0) / wordCounts.length;
|
|
1980
|
+
const avgHeadings = headingCounts.reduce((a, b) => a + b, 0) / headingCounts.length;
|
|
1981
|
+
const deepPages = wordCounts.filter((w) => w >= 1e3).length;
|
|
1982
|
+
const thinPages = wordCounts.filter((w) => w < 300).length;
|
|
1983
|
+
const deepRatio = deepPages / blogPages.length;
|
|
1984
|
+
const thinRatio = thinPages / blogPages.length;
|
|
1985
|
+
let score = 0;
|
|
1986
|
+
if (avgWords >= 2e3) {
|
|
1987
|
+
score += 5;
|
|
1988
|
+
findings.push({ severity: "info", detail: `Average ${Math.round(avgWords)} words per page across ${blogPages.length} pages - excellent depth` });
|
|
1989
|
+
} else if (avgWords >= 1200) {
|
|
1990
|
+
score += 4;
|
|
1991
|
+
findings.push({ severity: "info", detail: `Average ${Math.round(avgWords)} words per page across ${blogPages.length} pages - good depth` });
|
|
1992
|
+
} else if (avgWords >= 800) {
|
|
1993
|
+
score += 3;
|
|
1994
|
+
findings.push({ severity: "info", detail: `Average ${Math.round(avgWords)} words per page - moderate depth` });
|
|
1995
|
+
} else if (avgWords >= 400) {
|
|
1996
|
+
score += 2;
|
|
1997
|
+
findings.push({ severity: "low", detail: `Average ${Math.round(avgWords)} words per page - shallow content`, fix: "Expand articles with more detail, examples, and expert analysis to build AI citation authority" });
|
|
1998
|
+
} else {
|
|
1999
|
+
score += 1;
|
|
2000
|
+
findings.push({ severity: "medium", detail: `Average ${Math.round(avgWords)} words per page - very thin content`, fix: "Content is too thin for AI engines to cite. Aim for 1000+ words per article with structured sections." });
|
|
2001
|
+
}
|
|
2002
|
+
if (avgHeadings >= 8) {
|
|
2003
|
+
score += 3;
|
|
2004
|
+
findings.push({ severity: "info", detail: `Average ${avgHeadings.toFixed(1)} subheadings per page - well-structured` });
|
|
2005
|
+
} else if (avgHeadings >= 5) {
|
|
2006
|
+
score += 2;
|
|
2007
|
+
findings.push({ severity: "info", detail: `Average ${avgHeadings.toFixed(1)} subheadings per page - decent structure` });
|
|
2008
|
+
} else if (avgHeadings >= 2) {
|
|
2009
|
+
score += 1;
|
|
2010
|
+
findings.push({ severity: "low", detail: `Average ${avgHeadings.toFixed(1)} subheadings per page`, fix: "Add more H2/H3 headings to break content into extractable sections" });
|
|
2011
|
+
} else {
|
|
2012
|
+
findings.push({ severity: "medium", detail: `Average ${avgHeadings.toFixed(1)} subheadings per page - minimal structure`, fix: "Add question-format H2/H3 headings so AI engines can extract specific answers" });
|
|
2013
|
+
}
|
|
2014
|
+
if (deepRatio >= 0.5) {
|
|
2015
|
+
score += 2;
|
|
2016
|
+
findings.push({ severity: "info", detail: `${deepPages}/${blogPages.length} pages (${Math.round(deepRatio * 100)}%) have 1000+ words` });
|
|
2017
|
+
} else if (deepRatio >= 0.25) {
|
|
2018
|
+
score += 1;
|
|
2019
|
+
findings.push({ severity: "info", detail: `${deepPages}/${blogPages.length} pages have 1000+ words` });
|
|
2020
|
+
}
|
|
2021
|
+
if (thinRatio >= 0.5) {
|
|
2022
|
+
score = Math.max(0, score - 2);
|
|
2023
|
+
findings.push({ severity: "medium", detail: `${thinPages}/${blogPages.length} pages (${Math.round(thinRatio * 100)}%) have under 300 words - high thin content ratio`, fix: "Remove or expand thin pages. Thin content dilutes site quality for AI engines." });
|
|
2024
|
+
} else if (thinRatio >= 0.25) {
|
|
2025
|
+
score = Math.max(0, score - 1);
|
|
2026
|
+
findings.push({ severity: "low", detail: `${thinPages}/${blogPages.length} pages have under 300 words` });
|
|
2027
|
+
}
|
|
2028
|
+
let finalScore = Math.min(10, score);
|
|
2029
|
+
if (topicCoherenceScore !== void 0 && topicCoherenceScore <= 4 && finalScore >= 8) {
|
|
2030
|
+
finalScore = 7;
|
|
2031
|
+
findings.push({ severity: "low", detail: "Deep content but low topic coherence - depth on scattered topics has reduced AI citation value", fix: "Focus content depth on your core expertise area for maximum AI visibility" });
|
|
2032
|
+
}
|
|
2033
|
+
return { criterion: "content_depth", criterion_label: "Content Depth", score: finalScore, status: finalScore >= 7 ? "pass" : finalScore >= 4 ? "partial" : "fail", findings, fix_priority: finalScore >= 7 ? "P3" : "P1" };
|
|
2034
|
+
}
|
|
1770
2035
|
function auditSiteFromData(data) {
|
|
2036
|
+
const topicCoherence = checkTopicCoherence(data);
|
|
2037
|
+
const cannibalization = checkContentCannibalization(data, topicCoherence.score);
|
|
1771
2038
|
return [
|
|
1772
2039
|
checkLlmsTxt(data),
|
|
1773
2040
|
checkSchemaMarkup(data),
|
|
@@ -1793,52 +2060,84 @@ function auditSiteFromData(data) {
|
|
|
1793
2060
|
checkSchemaCoverage(data),
|
|
1794
2061
|
checkSpeakableSchema(data),
|
|
1795
2062
|
checkQueryAnswerAlignment(data),
|
|
1796
|
-
|
|
1797
|
-
checkVisibleDateSignal(data)
|
|
2063
|
+
cannibalization,
|
|
2064
|
+
checkVisibleDateSignal(data),
|
|
2065
|
+
topicCoherence,
|
|
2066
|
+
checkContentDepth(data, topicCoherence.score)
|
|
1798
2067
|
];
|
|
1799
2068
|
}
|
|
1800
2069
|
|
|
1801
2070
|
// src/scoring.ts
|
|
1802
2071
|
var WEIGHTS = {
|
|
1803
|
-
//
|
|
1804
|
-
|
|
1805
|
-
|
|
1806
|
-
|
|
1807
|
-
clean_html: 0.1,
|
|
1808
|
-
entity_consistency: 0.1,
|
|
1809
|
-
robots_txt: 0.05,
|
|
1810
|
-
faq_section: 0.1,
|
|
2072
|
+
// ─── Content Substance (~55%) ─────────────────────────────────────────────
|
|
2073
|
+
// WHY an AI engine would cite you. These drive citation quality directly.
|
|
2074
|
+
topic_coherence: 0.14,
|
|
2075
|
+
// Topical authority - THE gating signal
|
|
1811
2076
|
original_data: 0.1,
|
|
1812
|
-
|
|
1813
|
-
|
|
1814
|
-
//
|
|
1815
|
-
|
|
1816
|
-
|
|
1817
|
-
|
|
1818
|
-
|
|
1819
|
-
|
|
1820
|
-
|
|
1821
|
-
|
|
1822
|
-
|
|
1823
|
-
|
|
1824
|
-
|
|
1825
|
-
|
|
1826
|
-
|
|
1827
|
-
|
|
1828
|
-
|
|
1829
|
-
|
|
1830
|
-
|
|
2077
|
+
// Unique value AI can't find elsewhere
|
|
2078
|
+
content_depth: 0.07,
|
|
2079
|
+
// Comprehensive vs thin coverage
|
|
2080
|
+
fact_density: 0.06,
|
|
2081
|
+
// Information density per page
|
|
2082
|
+
direct_answer_density: 0.05,
|
|
2083
|
+
// Direct answers to queries
|
|
2084
|
+
qa_content_format: 0.05,
|
|
2085
|
+
// Answer-shaped content structure
|
|
2086
|
+
query_answer_alignment: 0.05,
|
|
2087
|
+
// Relevance to actual AI queries
|
|
2088
|
+
faq_section: 0.04,
|
|
2089
|
+
// Structured Q&A pairs
|
|
2090
|
+
// ─── Content Organization (~30%) ──────────────────────────────────────────
|
|
2091
|
+
// HOW easily AI engines can extract and trust your content.
|
|
2092
|
+
entity_consistency: 0.05,
|
|
2093
|
+
// Brand authority and E-E-A-T
|
|
2094
|
+
internal_linking: 0.04,
|
|
2095
|
+
// Site structure and topic clusters
|
|
2096
|
+
content_freshness: 0.04,
|
|
2097
|
+
// Recency signals
|
|
2098
|
+
schema_markup: 0.03,
|
|
2099
|
+
// Structured data for discovery
|
|
2100
|
+
author_schema_depth: 0.03,
|
|
2101
|
+
// Expert attribution
|
|
2102
|
+
table_list_extractability: 0.03,
|
|
2103
|
+
// Extractable structured data
|
|
2104
|
+
definition_patterns: 0.02,
|
|
2105
|
+
// Clear definitions
|
|
2106
|
+
visible_date_signal: 0.02,
|
|
2107
|
+
// Publication date trust
|
|
2108
|
+
semantic_html: 0.02,
|
|
2109
|
+
// Clean semantic structure
|
|
2110
|
+
clean_html: 0.02,
|
|
2111
|
+
// Parseable markup
|
|
2112
|
+
// ─── Technical Plumbing (~15%) ────────────────────────────────────────────
|
|
2113
|
+
// WHETHER AI crawlers can find you. Table stakes with diminishing returns.
|
|
2114
|
+
content_cannibalization: 0.02,
|
|
2115
|
+
llms_txt: 0.02,
|
|
2116
|
+
robots_txt: 0.02,
|
|
2117
|
+
content_velocity: 0.02,
|
|
2118
|
+
content_licensing: 0.02,
|
|
2119
|
+
sitemap_completeness: 0.01,
|
|
2120
|
+
canonical_url: 0.01,
|
|
2121
|
+
rss_feed: 0.01,
|
|
2122
|
+
schema_coverage: 0.01,
|
|
2123
|
+
speakable_schema: 0.01
|
|
1831
2124
|
};
|
|
1832
2125
|
function calculateOverallScore(criteria) {
|
|
1833
2126
|
let totalWeight = 0;
|
|
1834
2127
|
let weightedSum = 0;
|
|
1835
2128
|
for (const c of criteria) {
|
|
1836
|
-
const weight = WEIGHTS[c.criterion] ?? 0.
|
|
2129
|
+
const weight = WEIGHTS[c.criterion] ?? 0.05;
|
|
1837
2130
|
weightedSum += c.score / 10 * weight * 100;
|
|
1838
2131
|
totalWeight += weight;
|
|
1839
2132
|
}
|
|
1840
2133
|
if (totalWeight === 0) return 0;
|
|
1841
|
-
|
|
2134
|
+
let score = Math.round(weightedSum / totalWeight);
|
|
2135
|
+
const coherence = criteria.find((c) => c.criterion === "topic_coherence");
|
|
2136
|
+
if (coherence && coherence.score < 6) {
|
|
2137
|
+
const cap2 = 35 + coherence.score * 5;
|
|
2138
|
+
score = Math.min(score, cap2);
|
|
2139
|
+
}
|
|
2140
|
+
return score;
|
|
1842
2141
|
}
|
|
1843
2142
|
|
|
1844
2143
|
// src/scorecard-builder.ts
|
|
@@ -1868,7 +2167,9 @@ var CRITERION_LABELS = {
|
|
|
1868
2167
|
"Speakable Schema": "Speakable Schema",
|
|
1869
2168
|
"Query-Answer Alignment": "Query-Answer Alignment",
|
|
1870
2169
|
"Content Cannibalization": "Content Cannibalization",
|
|
1871
|
-
"Visible Date Signal": "Visible Date Signal"
|
|
2170
|
+
"Visible Date Signal": "Visible Date Signal",
|
|
2171
|
+
"Topic Coherence": "Topic Coherence",
|
|
2172
|
+
"Content Depth": "Content Depth"
|
|
1872
2173
|
};
|
|
1873
2174
|
function scoreToStatus(score) {
|
|
1874
2175
|
if (score === 0) return "MISSING";
|
|
@@ -1954,32 +2255,37 @@ function buildDetailedFindings(results) {
|
|
|
1954
2255
|
|
|
1955
2256
|
// src/narrative-generator.ts
|
|
1956
2257
|
var CRITERION_WEIGHTS = {
|
|
1957
|
-
|
|
1958
|
-
|
|
1959
|
-
qa_content_format: 0.15,
|
|
1960
|
-
clean_html: 0.1,
|
|
1961
|
-
entity_consistency: 0.1,
|
|
1962
|
-
robots_txt: 0.05,
|
|
1963
|
-
faq_section: 0.1,
|
|
2258
|
+
// Content Substance (~55%)
|
|
2259
|
+
topic_coherence: 0.14,
|
|
1964
2260
|
original_data: 0.1,
|
|
1965
|
-
|
|
1966
|
-
|
|
1967
|
-
|
|
1968
|
-
|
|
1969
|
-
|
|
1970
|
-
|
|
1971
|
-
|
|
1972
|
-
|
|
1973
|
-
|
|
1974
|
-
|
|
1975
|
-
|
|
1976
|
-
|
|
1977
|
-
|
|
1978
|
-
|
|
1979
|
-
|
|
1980
|
-
|
|
1981
|
-
|
|
1982
|
-
|
|
2261
|
+
content_depth: 0.07,
|
|
2262
|
+
fact_density: 0.06,
|
|
2263
|
+
direct_answer_density: 0.05,
|
|
2264
|
+
qa_content_format: 0.05,
|
|
2265
|
+
query_answer_alignment: 0.05,
|
|
2266
|
+
faq_section: 0.04,
|
|
2267
|
+
// Content Organization (~30%)
|
|
2268
|
+
entity_consistency: 0.05,
|
|
2269
|
+
internal_linking: 0.04,
|
|
2270
|
+
content_freshness: 0.04,
|
|
2271
|
+
schema_markup: 0.03,
|
|
2272
|
+
author_schema_depth: 0.03,
|
|
2273
|
+
table_list_extractability: 0.03,
|
|
2274
|
+
definition_patterns: 0.02,
|
|
2275
|
+
visible_date_signal: 0.02,
|
|
2276
|
+
semantic_html: 0.02,
|
|
2277
|
+
clean_html: 0.02,
|
|
2278
|
+
// Technical Plumbing (~15%)
|
|
2279
|
+
content_cannibalization: 0.02,
|
|
2280
|
+
llms_txt: 0.02,
|
|
2281
|
+
robots_txt: 0.02,
|
|
2282
|
+
content_velocity: 0.02,
|
|
2283
|
+
content_licensing: 0.02,
|
|
2284
|
+
sitemap_completeness: 0.01,
|
|
2285
|
+
canonical_url: 0.01,
|
|
2286
|
+
rss_feed: 0.01,
|
|
2287
|
+
schema_coverage: 0.01,
|
|
2288
|
+
speakable_schema: 0.01
|
|
1983
2289
|
};
|
|
1984
2290
|
var OPPORTUNITY_TEMPLATES = {
|
|
1985
2291
|
llms_txt: {
|
|
@@ -2111,6 +2417,16 @@ var OPPORTUNITY_TEMPLATES = {
|
|
|
2111
2417
|
name: "Add Visible Date Signals",
|
|
2112
2418
|
effort: "Low",
|
|
2113
2419
|
description: "Display publication/modification dates visibly using <time> elements and add datePublished/dateModified to JSON-LD schema."
|
|
2420
|
+
},
|
|
2421
|
+
topic_coherence: {
|
|
2422
|
+
name: "Focus Content on Core Topics",
|
|
2423
|
+
effort: "High",
|
|
2424
|
+
description: 'Ensure blog content consistently covers your core expertise areas rather than scattering across unrelated topics. AI engines build authority models - a site about "Medicare coverage" that also publishes about humidifiers and groceries dilutes its topical authority.'
|
|
2425
|
+
},
|
|
2426
|
+
content_depth: {
|
|
2427
|
+
name: "Increase Content Depth",
|
|
2428
|
+
effort: "Medium",
|
|
2429
|
+
description: "Expand articles to 1000+ words with structured H2/H3 sections, comparison tables, and expert analysis. Thin content (under 300 words) is rarely cited by AI engines. Deep, well-structured articles demonstrate expertise."
|
|
2114
2430
|
}
|
|
2115
2431
|
};
|
|
2116
2432
|
function calculateImpact(score, weight, effort) {
|
|
@@ -2232,7 +2548,7 @@ function generatePitchNumbers(score, rawData, scorecard) {
|
|
|
2232
2548
|
const passing = scorecard.filter((s) => s.score >= 7).length;
|
|
2233
2549
|
metrics.push({
|
|
2234
2550
|
metric: "Criteria Passing",
|
|
2235
|
-
value: `${passing}/
|
|
2551
|
+
value: `${passing}/28`,
|
|
2236
2552
|
significance: passing >= 18 ? "Excellent coverage across AEO dimensions" : passing >= 12 ? "Good foundation with room to improve remaining criteria" : `${26 - passing} criteria need attention for full AI visibility`
|
|
2237
2553
|
});
|
|
2238
2554
|
return metrics;
|
|
@@ -2424,20 +2740,23 @@ async function fetchMultiPageData(siteData, options) {
|
|
|
2424
2740
|
|
|
2425
2741
|
// src/page-scorer.ts
|
|
2426
2742
|
var PAGE_CRITERIA = {
|
|
2427
|
-
|
|
2428
|
-
qa_content_format: { weight: 0.15, label: "Q&A Content Format" },
|
|
2429
|
-
clean_html: { weight: 0.1, label: "Clean, Crawlable HTML" },
|
|
2430
|
-
faq_section: { weight: 0.1, label: "FAQ Section Content" },
|
|
2743
|
+
// Content Substance
|
|
2431
2744
|
original_data: { weight: 0.1, label: "Original Data & Expert Content" },
|
|
2432
|
-
|
|
2433
|
-
|
|
2434
|
-
|
|
2435
|
-
|
|
2436
|
-
|
|
2437
|
-
|
|
2438
|
-
|
|
2439
|
-
|
|
2440
|
-
|
|
2745
|
+
fact_density: { weight: 0.06, label: "Fact & Data Density" },
|
|
2746
|
+
direct_answer_density: { weight: 0.05, label: "Direct Answer Paragraphs" },
|
|
2747
|
+
qa_content_format: { weight: 0.05, label: "Q&A Content Format" },
|
|
2748
|
+
query_answer_alignment: { weight: 0.05, label: "Query-Answer Alignment" },
|
|
2749
|
+
faq_section: { weight: 0.04, label: "FAQ Section Content" },
|
|
2750
|
+
// Content Organization
|
|
2751
|
+
content_freshness: { weight: 0.04, label: "Content Freshness Signals" },
|
|
2752
|
+
schema_markup: { weight: 0.03, label: "Schema.org Structured Data" },
|
|
2753
|
+
table_list_extractability: { weight: 0.03, label: "Table & List Extractability" },
|
|
2754
|
+
definition_patterns: { weight: 0.02, label: "Definition Patterns" },
|
|
2755
|
+
visible_date_signal: { weight: 0.02, label: "Visible Date Signal" },
|
|
2756
|
+
semantic_html: { weight: 0.02, label: "Semantic HTML5 & Accessibility" },
|
|
2757
|
+
clean_html: { weight: 0.02, label: "Clean, Crawlable HTML" },
|
|
2758
|
+
// Technical Plumbing
|
|
2759
|
+
canonical_url: { weight: 0.01, label: "Canonical URL Strategy" }
|
|
2441
2760
|
};
|
|
2442
2761
|
function extractJsonLdBlocks(html) {
|
|
2443
2762
|
const blocks = [];
|
|
@@ -2777,7 +3096,7 @@ function extractTitle(html) {
|
|
|
2777
3096
|
function getTextContent2(html) {
|
|
2778
3097
|
return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
|
|
2779
3098
|
}
|
|
2780
|
-
function
|
|
3099
|
+
function countWords2(text) {
|
|
2781
3100
|
if (!text) return 0;
|
|
2782
3101
|
return text.split(/\s+/).filter((w) => w.length > 0).length;
|
|
2783
3102
|
}
|
|
@@ -2928,7 +3247,7 @@ function checkHasQuestionHeadings(html) {
|
|
|
2928
3247
|
function analyzePage(html, url, category) {
|
|
2929
3248
|
const title = extractTitle(html);
|
|
2930
3249
|
const textContent = getTextContent2(html);
|
|
2931
|
-
const wordCount =
|
|
3250
|
+
const wordCount = countWords2(textContent);
|
|
2932
3251
|
const issues = [];
|
|
2933
3252
|
const strengths = [];
|
|
2934
3253
|
const issueChecks = [
|
|
@@ -2996,7 +3315,7 @@ function extractTitle2(html) {
|
|
|
2996
3315
|
function getTextContent3(html) {
|
|
2997
3316
|
return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
|
|
2998
3317
|
}
|
|
2999
|
-
function
|
|
3318
|
+
function countWords3(text) {
|
|
3000
3319
|
if (!text) return 0;
|
|
3001
3320
|
return text.split(/\s+/).filter((w) => w.length > 0).length;
|
|
3002
3321
|
}
|
|
@@ -3144,7 +3463,7 @@ function buildLinkGraph(pages, domain, homepageUrl) {
|
|
|
3144
3463
|
if (nodes.has(norm)) continue;
|
|
3145
3464
|
const title = extractTitle2(page.text);
|
|
3146
3465
|
const text = getTextContent3(page.text);
|
|
3147
|
-
const wordCount =
|
|
3466
|
+
const wordCount = countWords3(text);
|
|
3148
3467
|
nodes.set(norm, {
|
|
3149
3468
|
url: norm,
|
|
3150
3469
|
title,
|
|
@@ -3207,32 +3526,37 @@ function buildLinkGraph(pages, domain, homepageUrl) {
|
|
|
3207
3526
|
|
|
3208
3527
|
// src/fix-engine.ts
|
|
3209
3528
|
var CRITERION_WEIGHTS2 = {
|
|
3210
|
-
|
|
3211
|
-
|
|
3212
|
-
qa_content_format: 0.15,
|
|
3213
|
-
clean_html: 0.1,
|
|
3214
|
-
entity_consistency: 0.1,
|
|
3215
|
-
robots_txt: 0.05,
|
|
3216
|
-
faq_section: 0.1,
|
|
3529
|
+
// Content Substance (~55%)
|
|
3530
|
+
topic_coherence: 0.14,
|
|
3217
3531
|
original_data: 0.1,
|
|
3218
|
-
|
|
3219
|
-
|
|
3220
|
-
|
|
3221
|
-
|
|
3222
|
-
|
|
3223
|
-
|
|
3224
|
-
|
|
3225
|
-
|
|
3226
|
-
|
|
3227
|
-
|
|
3228
|
-
|
|
3229
|
-
|
|
3230
|
-
|
|
3231
|
-
|
|
3232
|
-
|
|
3233
|
-
|
|
3234
|
-
|
|
3235
|
-
|
|
3532
|
+
content_depth: 0.07,
|
|
3533
|
+
fact_density: 0.06,
|
|
3534
|
+
direct_answer_density: 0.05,
|
|
3535
|
+
qa_content_format: 0.05,
|
|
3536
|
+
query_answer_alignment: 0.05,
|
|
3537
|
+
faq_section: 0.04,
|
|
3538
|
+
// Content Organization (~30%)
|
|
3539
|
+
entity_consistency: 0.05,
|
|
3540
|
+
internal_linking: 0.04,
|
|
3541
|
+
content_freshness: 0.04,
|
|
3542
|
+
schema_markup: 0.03,
|
|
3543
|
+
author_schema_depth: 0.03,
|
|
3544
|
+
table_list_extractability: 0.03,
|
|
3545
|
+
definition_patterns: 0.02,
|
|
3546
|
+
visible_date_signal: 0.02,
|
|
3547
|
+
semantic_html: 0.02,
|
|
3548
|
+
clean_html: 0.02,
|
|
3549
|
+
// Technical Plumbing (~15%)
|
|
3550
|
+
content_cannibalization: 0.02,
|
|
3551
|
+
llms_txt: 0.02,
|
|
3552
|
+
robots_txt: 0.02,
|
|
3553
|
+
content_velocity: 0.02,
|
|
3554
|
+
content_licensing: 0.02,
|
|
3555
|
+
sitemap_completeness: 0.01,
|
|
3556
|
+
canonical_url: 0.01,
|
|
3557
|
+
rss_feed: 0.01,
|
|
3558
|
+
schema_coverage: 0.01,
|
|
3559
|
+
speakable_schema: 0.01
|
|
3236
3560
|
};
|
|
3237
3561
|
var PHASE_CONFIG = [
|
|
3238
3562
|
{
|
|
@@ -3255,7 +3579,9 @@ var PHASE_CONFIG = [
|
|
|
3255
3579
|
"content_freshness",
|
|
3256
3580
|
"table_list_extractability",
|
|
3257
3581
|
"query_answer_alignment",
|
|
3258
|
-
"visible_date_signal"
|
|
3582
|
+
"visible_date_signal",
|
|
3583
|
+
"topic_coherence",
|
|
3584
|
+
"content_depth"
|
|
3259
3585
|
]
|
|
3260
3586
|
},
|
|
3261
3587
|
{
|
|
@@ -4159,6 +4485,55 @@ Summarization: yes`,
|
|
|
4159
4485
|
affectedPages: affected,
|
|
4160
4486
|
pageCount: affected?.length
|
|
4161
4487
|
}];
|
|
4488
|
+
},
|
|
4489
|
+
topic_coherence: (c) => {
|
|
4490
|
+
if (c.score >= 10) return [];
|
|
4491
|
+
const impact = impactFromScore(c.score);
|
|
4492
|
+
const effort = effortForCriterion("topic_coherence", c.score);
|
|
4493
|
+
return [{
|
|
4494
|
+
id: "fix-topic-coherence",
|
|
4495
|
+
criterion: c.criterion_label,
|
|
4496
|
+
criterionId: c.criterion,
|
|
4497
|
+
title: "Focus blog content on core expertise",
|
|
4498
|
+
description: "Ensure blog content consistently covers your core topic areas. Scattered content across unrelated topics weakens AI engine authority signals.",
|
|
4499
|
+
impact,
|
|
4500
|
+
effort: effort === "trivial" ? "low" : effort,
|
|
4501
|
+
impactScore: 0,
|
|
4502
|
+
category: "content",
|
|
4503
|
+
steps: [
|
|
4504
|
+
"Identify 2-3 core expertise areas your brand is known for",
|
|
4505
|
+
"Audit existing blog posts and remove or consolidate off-topic content",
|
|
4506
|
+
"Create a content calendar focused on core topics",
|
|
4507
|
+
"Use topic clusters: pillar pages linking to supporting articles within the same niche"
|
|
4508
|
+
],
|
|
4509
|
+
successCriteria: "80%+ of blog content covers core expertise areas with consistent topic focus"
|
|
4510
|
+
}];
|
|
4511
|
+
},
|
|
4512
|
+
content_depth: (c, pages) => {
|
|
4513
|
+
if (c.score >= 10) return [];
|
|
4514
|
+
const impact = impactFromScore(c.score);
|
|
4515
|
+
const effort = effortForCriterion("content_depth", c.score);
|
|
4516
|
+
const affected = getAffectedPages("content_depth", pages);
|
|
4517
|
+
return [{
|
|
4518
|
+
id: "fix-content-depth",
|
|
4519
|
+
criterion: c.criterion_label,
|
|
4520
|
+
criterionId: c.criterion,
|
|
4521
|
+
title: "Increase content depth and structure",
|
|
4522
|
+
description: "Expand thin content with more detail, examples, and structured sections. AI engines prefer comprehensive articles with clear heading hierarchies.",
|
|
4523
|
+
impact,
|
|
4524
|
+
effort: effort === "trivial" ? "low" : effort,
|
|
4525
|
+
impactScore: 0,
|
|
4526
|
+
category: "content",
|
|
4527
|
+
steps: [
|
|
4528
|
+
"Aim for 1000+ words per article with expert analysis and examples",
|
|
4529
|
+
"Use H2/H3 subheadings every 200-300 words for clear structure",
|
|
4530
|
+
"Add comparison tables, numbered steps, and data points",
|
|
4531
|
+
"Remove or expand thin pages (under 300 words) that dilute site quality"
|
|
4532
|
+
],
|
|
4533
|
+
successCriteria: "Average article length exceeds 1000 words with 5+ subheadings per page",
|
|
4534
|
+
affectedPages: affected,
|
|
4535
|
+
pageCount: affected?.length
|
|
4536
|
+
}];
|
|
4162
4537
|
}
|
|
4163
4538
|
};
|
|
4164
4539
|
function generateFixPlan(domain, overallScore, criteria, pagesReviewed, linkGraph) {
|
|
@@ -4400,7 +4775,7 @@ async function audit(domain, options) {
|
|
|
4400
4775
|
}
|
|
4401
4776
|
}
|
|
4402
4777
|
if (options?.fullCrawl) {
|
|
4403
|
-
const { crawlFullSite: crawlFullSite2 } = await import("./full-site-crawler-
|
|
4778
|
+
const { crawlFullSite: crawlFullSite2 } = await import("./full-site-crawler-UIOMKOZA.js");
|
|
4404
4779
|
const crawlResult = await crawlFullSite2(siteData, {
|
|
4405
4780
|
maxPages: options.maxPages ?? 200,
|
|
4406
4781
|
concurrency: options.concurrency ?? 5
|