aeorank 1.6.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +110 -39
- package/dist/browser.d.ts +2 -2
- package/dist/browser.js +500 -125
- package/dist/browser.js.map +1 -1
- package/dist/{chunk-3IJISYWT.js → chunk-PKJIKMLV.js} +2 -2
- package/dist/chunk-PKJIKMLV.js.map +1 -0
- package/dist/cli.js +415 -96
- package/dist/cli.js.map +1 -1
- package/dist/{full-site-crawler-F7J2HRL4.js → full-site-crawler-FQYO46YV.js} +2 -2
- package/dist/full-site-crawler-FQYO46YV.js.map +1 -0
- package/dist/{full-site-crawler-VFARFR2C.js → full-site-crawler-UIOMKOZA.js} +2 -2
- package/dist/index.cjs +499 -124
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.js +500 -125
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
- package/dist/chunk-3IJISYWT.js.map +0 -1
- package/dist/full-site-crawler-F7J2HRL4.js.map +0 -1
- /package/dist/{full-site-crawler-VFARFR2C.js.map → full-site-crawler-UIOMKOZA.js.map} +0 -0
package/dist/index.cjs
CHANGED
|
@@ -303,7 +303,7 @@ var init_full_site_crawler = __esm({
|
|
|
303
303
|
RESOURCE_EXTENSIONS = /\.(js|css|png|jpg|jpeg|gif|svg|ico|pdf|xml|txt|woff|woff2|ttf|eot|mp4|mp3|webp|avif|zip|gz|tar|json)$/i;
|
|
304
304
|
SKIP_PATH_PATTERNS = /^\/(api|wp-admin|wp-json|static|assets|_next|auth|login|signup|cart|checkout|admin|feed|xmlrpc)\b/i;
|
|
305
305
|
CATEGORY_PATTERNS = [
|
|
306
|
-
[/\/(blog|articles?|posts?|news|insights|guides)\b/i, "blog"],
|
|
306
|
+
[/\/([^/]*-?)?(blog|articles?|posts?|news|insights|guides)\b/i, "blog"],
|
|
307
307
|
[/\/(about|about-us|company|who-we-are)\b/i, "about"],
|
|
308
308
|
[/\/(pricing|plans|packages)\b/i, "pricing"],
|
|
309
309
|
[/\/(services?|features?|solutions?|products?|what-we-do|offerings?)\b/i, "services"],
|
|
@@ -552,7 +552,7 @@ async function prefetchSiteData(domain) {
|
|
|
552
552
|
sitemapForBlog = subSitemap.text;
|
|
553
553
|
}
|
|
554
554
|
}
|
|
555
|
-
const blogUrls = extractBlogUrlsFromSitemap(sitemapForBlog, domain,
|
|
555
|
+
const blogUrls = extractBlogUrlsFromSitemap(sitemapForBlog, domain, 50);
|
|
556
556
|
if (blogUrls.length > 0) {
|
|
557
557
|
const fetched = await Promise.all(blogUrls.map((url) => fetchText(url)));
|
|
558
558
|
blogSample = fetched.filter(
|
|
@@ -909,15 +909,17 @@ function checkOriginalData(data) {
|
|
|
909
909
|
findings.push({ severity: "critical", detail: "Could not fetch homepage" });
|
|
910
910
|
return { criterion: "original_data", criterion_label: "Original Data & Expert Content", score: 0, status: "not_found", findings, fix_priority: "P2" };
|
|
911
911
|
}
|
|
912
|
+
const allPages = [data.homepage, ...data.blogSample || []].filter(Boolean);
|
|
912
913
|
const html = data.homepage.text;
|
|
913
|
-
const
|
|
914
|
+
const allText = allPages.map((p) => p.text.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ")).join(" ");
|
|
915
|
+
const text = data.homepage.text.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ");
|
|
914
916
|
let score = 0;
|
|
915
917
|
const statPatterns = /\d+%|\d+\s*(patients|clients|customers|cases|years|professionals|specialists|companies|users|businesses|domains|audits)/i;
|
|
916
|
-
if (statPatterns.test(
|
|
918
|
+
if (statPatterns.test(allText)) {
|
|
917
919
|
const researchContext = /\b(our\s+(?:study|analysis|research|data|survey|findings|report)|we\s+(?:surveyed|analyzed|studied|measured|tracked)|proprietary|methodology|original\s+research)\b/i;
|
|
918
|
-
if (researchContext.test(
|
|
920
|
+
if (researchContext.test(allText)) {
|
|
919
921
|
score += 3;
|
|
920
|
-
findings.push({ severity: "info", detail: "Proprietary statistics with research context found
|
|
922
|
+
findings.push({ severity: "info", detail: "Proprietary statistics with research context found" });
|
|
921
923
|
} else {
|
|
922
924
|
score += 1;
|
|
923
925
|
findings.push({ severity: "low", detail: 'Statistics found but without research context (e.g., "500+ clients")', fix: 'Add context about your methodology: "Our analysis of X found..." or "We surveyed Y..."' });
|
|
@@ -1432,20 +1434,24 @@ function checkFactDensity(data) {
|
|
|
1432
1434
|
findings.push({ severity: "critical", detail: "Could not fetch homepage" });
|
|
1433
1435
|
return { criterion: "fact_density", criterion_label: "Fact & Data Density", score: 0, status: "not_found", findings, fix_priority: "P2" };
|
|
1434
1436
|
}
|
|
1435
|
-
const
|
|
1437
|
+
const allPages = [data.homepage, ...data.blogSample || []].filter(Boolean);
|
|
1438
|
+
const allText = allPages.map((p) => p.text.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ")).join(" ");
|
|
1439
|
+
const text = allText;
|
|
1440
|
+
const pageCount = allPages.length;
|
|
1436
1441
|
let score = 0;
|
|
1437
1442
|
const dataPoints = text.match(/\d+(?:\.\d+)?(?:\s*%|\s*\$|\s*USD|\s*EUR)/g) || [];
|
|
1438
1443
|
const countPhrases = text.match(/\d+(?:,\d{3})*\+?\s+(?:users?|clients?|customers?|companies|businesses|patients?|members?|employees?|projects?|downloads?)/gi) || [];
|
|
1439
1444
|
const totalDataPoints = dataPoints.length + countPhrases.length;
|
|
1440
|
-
|
|
1445
|
+
const avgPerPage = pageCount > 0 ? totalDataPoints / pageCount : 0;
|
|
1446
|
+
if (avgPerPage >= 4) {
|
|
1441
1447
|
score += 5;
|
|
1442
|
-
findings.push({ severity: "info", detail: `${totalDataPoints} quantitative data points found
|
|
1443
|
-
} else if (
|
|
1448
|
+
findings.push({ severity: "info", detail: `${totalDataPoints} quantitative data points found across ${pageCount} pages (avg ${avgPerPage.toFixed(1)}/page)` });
|
|
1449
|
+
} else if (avgPerPage >= 2) {
|
|
1444
1450
|
score += 3;
|
|
1445
|
-
findings.push({ severity: "info", detail: `${totalDataPoints} quantitative data points found` });
|
|
1451
|
+
findings.push({ severity: "info", detail: `${totalDataPoints} quantitative data points found across ${pageCount} pages` });
|
|
1446
1452
|
} else if (totalDataPoints >= 1) {
|
|
1447
1453
|
score += 1;
|
|
1448
|
-
findings.push({ severity: "low", detail: `Only ${totalDataPoints} quantitative data point(s) found`, fix: "Add more specific numbers, percentages, and metrics to strengthen credibility" });
|
|
1454
|
+
findings.push({ severity: "low", detail: `Only ${totalDataPoints} quantitative data point(s) found across ${pageCount} pages`, fix: "Add more specific numbers, percentages, and metrics to strengthen credibility" });
|
|
1449
1455
|
} else {
|
|
1450
1456
|
findings.push({ severity: "high", detail: "No quantitative data points found", fix: "Add specific statistics (percentages, counts, comparisons) that AI engines can cite" });
|
|
1451
1457
|
}
|
|
@@ -1551,9 +1557,9 @@ function countRecentSitemapDates(sitemapText) {
|
|
|
1551
1557
|
distinctRecentDays: recentDays.size
|
|
1552
1558
|
};
|
|
1553
1559
|
}
|
|
1554
|
-
var BLOG_PATH_PATTERNS = /\/(?:blog|articles?|insights?|guides?|resources?|news|posts?|learn|help|how-?to|tutorials?|case-stud|whitepapers?)\b/i;
|
|
1560
|
+
var BLOG_PATH_PATTERNS = /\/(?:[^/]*-?)?(?:blog|articles?|insights?|guides?|resources?|news|posts?|learn|help|how-?to|tutorials?|case-stud|whitepapers?)\b/i;
|
|
1555
1561
|
var EXCLUDE_PATH_PATTERNS = /\/(?:tag|category|author|page|feed|wp-content|wp-admin|wp-json|cart|checkout|login|search|api|static|assets|_next)\b/i;
|
|
1556
|
-
function extractBlogUrlsFromSitemap(sitemapText, domain, limit =
|
|
1562
|
+
function extractBlogUrlsFromSitemap(sitemapText, domain, limit = 50) {
|
|
1557
1563
|
const urlBlocks = sitemapText.match(/<url>([\s\S]*?)<\/url>/gi) || [];
|
|
1558
1564
|
const candidates = [];
|
|
1559
1565
|
const cleanDomain = domain.replace(/^www\./, "").toLowerCase();
|
|
@@ -1849,7 +1855,7 @@ function jaccardSimilarity(a, b) {
|
|
|
1849
1855
|
const union = a.size + b.size - intersection;
|
|
1850
1856
|
return union === 0 ? 0 : intersection / union;
|
|
1851
1857
|
}
|
|
1852
|
-
function checkContentCannibalization(data) {
|
|
1858
|
+
function checkContentCannibalization(data, topicCoherenceScore) {
|
|
1853
1859
|
const findings = [];
|
|
1854
1860
|
if (!data.homepage) {
|
|
1855
1861
|
findings.push({ severity: "critical", detail: "No homepage available for cannibalization analysis" });
|
|
@@ -1859,7 +1865,7 @@ function checkContentCannibalization(data) {
|
|
|
1859
1865
|
{ html: data.homepage.text, url: data.homepage.finalUrl || `https://${data.domain}/` }
|
|
1860
1866
|
];
|
|
1861
1867
|
if (data.blogSample) {
|
|
1862
|
-
for (const page of data.blogSample
|
|
1868
|
+
for (const page of data.blogSample) {
|
|
1863
1869
|
pages.push({ html: page.text, url: page.finalUrl || "" });
|
|
1864
1870
|
}
|
|
1865
1871
|
}
|
|
@@ -1869,10 +1875,29 @@ function checkContentCannibalization(data) {
|
|
|
1869
1875
|
}
|
|
1870
1876
|
const pageTitles = pages.map((p) => ({ title: extractPageTitle(p.html), url: p.url }));
|
|
1871
1877
|
const wordSets = pageTitles.map((p) => titleToWordSet(p.title));
|
|
1878
|
+
const termPageCount = /* @__PURE__ */ new Map();
|
|
1879
|
+
for (const ws of wordSets) {
|
|
1880
|
+
for (const w of ws) {
|
|
1881
|
+
termPageCount.set(w, (termPageCount.get(w) || 0) + 1);
|
|
1882
|
+
}
|
|
1883
|
+
}
|
|
1884
|
+
const commonTermThreshold = Math.max(3, pages.length * 0.4);
|
|
1885
|
+
const siteThemeTerms = /* @__PURE__ */ new Set();
|
|
1886
|
+
for (const [term, count] of termPageCount) {
|
|
1887
|
+
if (count >= commonTermThreshold) siteThemeTerms.add(term);
|
|
1888
|
+
}
|
|
1889
|
+
const filteredSets = wordSets.map((ws) => {
|
|
1890
|
+
const filtered = /* @__PURE__ */ new Set();
|
|
1891
|
+
for (const w of ws) {
|
|
1892
|
+
if (!siteThemeTerms.has(w)) filtered.add(w);
|
|
1893
|
+
}
|
|
1894
|
+
return filtered;
|
|
1895
|
+
});
|
|
1872
1896
|
const cannibalPairs = [];
|
|
1873
1897
|
for (let i = 0; i < pages.length; i++) {
|
|
1874
1898
|
for (let j = i + 1; j < pages.length; j++) {
|
|
1875
|
-
|
|
1899
|
+
if (filteredSets[i].size === 0 && filteredSets[j].size === 0) continue;
|
|
1900
|
+
const sim = jaccardSimilarity(filteredSets[i], filteredSets[j]);
|
|
1876
1901
|
if (sim > 0.6) {
|
|
1877
1902
|
cannibalPairs.push({
|
|
1878
1903
|
urlA: pageTitles[i].url.slice(0, 60),
|
|
@@ -1882,23 +1907,39 @@ function checkContentCannibalization(data) {
|
|
|
1882
1907
|
}
|
|
1883
1908
|
}
|
|
1884
1909
|
}
|
|
1910
|
+
const cannibalUrls = /* @__PURE__ */ new Set();
|
|
1911
|
+
for (const pair of cannibalPairs) {
|
|
1912
|
+
cannibalUrls.add(pair.urlA);
|
|
1913
|
+
cannibalUrls.add(pair.urlB);
|
|
1914
|
+
}
|
|
1915
|
+
const cannibalRatio = pages.length > 0 ? cannibalUrls.size / pages.length : 0;
|
|
1885
1916
|
let score;
|
|
1886
1917
|
if (cannibalPairs.length === 0) {
|
|
1887
1918
|
score = 10;
|
|
1888
1919
|
findings.push({ severity: "info", detail: `${pages.length} pages analyzed - no content cannibalization detected` });
|
|
1889
|
-
} else if (
|
|
1890
|
-
score =
|
|
1891
|
-
findings.push({ severity: "
|
|
1892
|
-
} else if (
|
|
1920
|
+
} else if (cannibalRatio <= 0.05) {
|
|
1921
|
+
score = 9;
|
|
1922
|
+
findings.push({ severity: "info", detail: `${cannibalPairs.length} pair(s) of pages with minor topic overlap (${cannibalUrls.size}/${pages.length} pages affected)` });
|
|
1923
|
+
} else if (cannibalRatio <= 0.1) {
|
|
1924
|
+
score = 7;
|
|
1925
|
+
findings.push({ severity: "low", detail: `${cannibalUrls.size} pages (${Math.round(cannibalRatio * 100)}%) have overlapping topics`, fix: "Differentiate titles and H1 headings to reduce topic overlap" });
|
|
1926
|
+
} else if (cannibalRatio <= 0.2) {
|
|
1893
1927
|
score = 5;
|
|
1894
|
-
findings.push({ severity: "medium", detail: `${
|
|
1928
|
+
findings.push({ severity: "medium", detail: `${cannibalUrls.size} pages (${Math.round(cannibalRatio * 100)}%) competing for overlapping topics`, fix: "Consolidate overlapping pages or differentiate their titles and content focus" });
|
|
1929
|
+
} else if (cannibalRatio <= 0.4) {
|
|
1930
|
+
score = 3;
|
|
1931
|
+
findings.push({ severity: "medium", detail: `${cannibalUrls.size} pages (${Math.round(cannibalRatio * 100)}%) have significant content overlap`, fix: "Many pages compete for the same topics - consolidate or clearly differentiate them" });
|
|
1895
1932
|
} else {
|
|
1896
1933
|
score = 0;
|
|
1897
|
-
findings.push({ severity: "high", detail: `${
|
|
1934
|
+
findings.push({ severity: "high", detail: `${cannibalUrls.size} pages (${Math.round(cannibalRatio * 100)}%) competing for the same topics`, fix: "Severe content cannibalization - consolidate overlapping pages or create clear topic differentiation" });
|
|
1898
1935
|
}
|
|
1899
1936
|
for (const pair of cannibalPairs.slice(0, 3)) {
|
|
1900
1937
|
findings.push({ severity: "low", detail: `Overlap (${pair.similarity}%): ${pair.urlA} vs ${pair.urlB}` });
|
|
1901
1938
|
}
|
|
1939
|
+
if (topicCoherenceScore !== void 0 && topicCoherenceScore <= 4 && score >= 8) {
|
|
1940
|
+
score = 6;
|
|
1941
|
+
findings.push({ severity: "low", detail: "Low topic overlap but content lacks coherent focus - not a strong signal for AI authority", fix: "Focus content on fewer core topics to build topical authority that AI engines can identify" });
|
|
1942
|
+
}
|
|
1902
1943
|
return { criterion: "content_cannibalization", criterion_label: "Content Cannibalization", score, status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P1" };
|
|
1903
1944
|
}
|
|
1904
1945
|
function checkVisibleDateSignal(data) {
|
|
@@ -2124,7 +2165,233 @@ function extractRawDataSummary(data) {
|
|
|
2124
2165
|
crawl_skipped: data.crawlStats?.skipped ?? 0
|
|
2125
2166
|
};
|
|
2126
2167
|
}
|
|
2168
|
+
function getPageTopicText(html) {
|
|
2169
|
+
const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
|
|
2170
|
+
const h1Match = html.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
|
|
2171
|
+
return [
|
|
2172
|
+
titleMatch?.[1] || "",
|
|
2173
|
+
h1Match?.[1]?.replace(/<[^>]*>/g, "") || ""
|
|
2174
|
+
].join(" ").toLowerCase().trim();
|
|
2175
|
+
}
|
|
2176
|
+
function extractBigrams(text) {
|
|
2177
|
+
const words = text.split(/[\s,.!?;:()\[\]{}"'\/&]+/).filter((w) => w.length > 2 && !STOP_WORDS.has(w) && !/^\d+$/.test(w));
|
|
2178
|
+
const bigrams = [];
|
|
2179
|
+
for (let i = 0; i < words.length - 1; i++) {
|
|
2180
|
+
bigrams.push(words[i] + " " + words[i + 1]);
|
|
2181
|
+
}
|
|
2182
|
+
return bigrams;
|
|
2183
|
+
}
|
|
2184
|
+
function checkTopicCoherence(data) {
|
|
2185
|
+
const findings = [];
|
|
2186
|
+
if (!data.homepage) {
|
|
2187
|
+
findings.push({ severity: "critical", detail: "Could not fetch homepage" });
|
|
2188
|
+
return { criterion: "topic_coherence", criterion_label: "Topic Coherence", score: 0, status: "not_found", findings, fix_priority: "P0" };
|
|
2189
|
+
}
|
|
2190
|
+
if (!data.blogSample || data.blogSample.length < 3) {
|
|
2191
|
+
findings.push({ severity: "info", detail: `Only ${data.blogSample?.length || 0} blog pages found - insufficient for topic coherence analysis` });
|
|
2192
|
+
return { criterion: "topic_coherence", criterion_label: "Topic Coherence", score: 5, status: "partial", findings, fix_priority: "P2" };
|
|
2193
|
+
}
|
|
2194
|
+
const blogPages = data.blogSample;
|
|
2195
|
+
const domainBase = data.domain.replace(/^www\./, "").replace(/\.(com|org|net|io|co|ai)$/i, "").toLowerCase();
|
|
2196
|
+
const brandWords = /* @__PURE__ */ new Set();
|
|
2197
|
+
brandWords.add(domainBase);
|
|
2198
|
+
for (const part of domainBase.split(/[-_]/)) {
|
|
2199
|
+
if (part.length > 2) brandWords.add(part);
|
|
2200
|
+
}
|
|
2201
|
+
const rawTermFreq = /* @__PURE__ */ new Map();
|
|
2202
|
+
const pageTitleTexts = [];
|
|
2203
|
+
for (const page of blogPages) {
|
|
2204
|
+
const topicText = getPageTopicText(page.text);
|
|
2205
|
+
pageTitleTexts.push(topicText);
|
|
2206
|
+
const words = topicText.split(/[\s,.!?;:()\[\]{}"'\/&]+/).filter((w) => w.length > 2 && !STOP_WORDS.has(w) && !/^\d+$/.test(w));
|
|
2207
|
+
const uniqueWords = new Set(words);
|
|
2208
|
+
for (const w of uniqueWords) {
|
|
2209
|
+
rawTermFreq.set(w, (rawTermFreq.get(w) || 0) + 1);
|
|
2210
|
+
}
|
|
2211
|
+
}
|
|
2212
|
+
for (const [term, count] of rawTermFreq) {
|
|
2213
|
+
if (count / blogPages.length >= 0.8 && domainBase.includes(term)) {
|
|
2214
|
+
brandWords.add(term);
|
|
2215
|
+
}
|
|
2216
|
+
}
|
|
2217
|
+
const termFreq = /* @__PURE__ */ new Map();
|
|
2218
|
+
for (const page of blogPages) {
|
|
2219
|
+
const topicText = getPageTopicText(page.text);
|
|
2220
|
+
const words = topicText.split(/[\s,.!?;:()\[\]{}"'\/&]+/).filter((w) => w.length > 2 && !STOP_WORDS.has(w) && !/^\d+$/.test(w) && !brandWords.has(w));
|
|
2221
|
+
const uniqueWords = new Set(words);
|
|
2222
|
+
for (const w of uniqueWords) {
|
|
2223
|
+
termFreq.set(w, (termFreq.get(w) || 0) + 1);
|
|
2224
|
+
}
|
|
2225
|
+
}
|
|
2226
|
+
const sortedTerms = [...termFreq.entries()].sort((a, b) => b[1] - a[1]);
|
|
2227
|
+
const topTerm = sortedTerms[0];
|
|
2228
|
+
const bigramFreq = /* @__PURE__ */ new Map();
|
|
2229
|
+
const pageBigrams = [];
|
|
2230
|
+
for (const topicText of pageTitleTexts) {
|
|
2231
|
+
const bigrams = extractBigrams(topicText).filter((bg) => !bg.split(" ").some((w) => brandWords.has(w)));
|
|
2232
|
+
pageBigrams.push(bigrams);
|
|
2233
|
+
const uniqueBigrams = new Set(bigrams);
|
|
2234
|
+
for (const bg of uniqueBigrams) {
|
|
2235
|
+
bigramFreq.set(bg, (bigramFreq.get(bg) || 0) + 1);
|
|
2236
|
+
}
|
|
2237
|
+
}
|
|
2238
|
+
const sortedBigrams = [...bigramFreq.entries()].sort((a, b) => b[1] - a[1]);
|
|
2239
|
+
const topBigram = sortedBigrams[0];
|
|
2240
|
+
const significantBigrams = sortedBigrams.filter(([, count]) => count >= 2);
|
|
2241
|
+
const clusterRoots = [];
|
|
2242
|
+
const assigned = /* @__PURE__ */ new Set();
|
|
2243
|
+
for (const [bg] of significantBigrams) {
|
|
2244
|
+
if (assigned.has(bg)) continue;
|
|
2245
|
+
clusterRoots.push(bg);
|
|
2246
|
+
assigned.add(bg);
|
|
2247
|
+
const [w1, w2] = bg.split(" ");
|
|
2248
|
+
for (const [otherBg] of significantBigrams) {
|
|
2249
|
+
if (assigned.has(otherBg)) continue;
|
|
2250
|
+
if (otherBg.includes(w1) || otherBg.includes(w2)) {
|
|
2251
|
+
assigned.add(otherBg);
|
|
2252
|
+
}
|
|
2253
|
+
}
|
|
2254
|
+
}
|
|
2255
|
+
const topicClusterCount = clusterRoots.length;
|
|
2256
|
+
const dominantTerm = topTerm?.[0] || "";
|
|
2257
|
+
const dominantTermCount = topTerm?.[1] || 0;
|
|
2258
|
+
const focusRatio = blogPages.length > 0 ? dominantTermCount / blogPages.length : 0;
|
|
2259
|
+
const dominantBigram = topBigram?.[0] || "";
|
|
2260
|
+
const dominantBigramCount = topBigram?.[1] || 0;
|
|
2261
|
+
const bigramFocusRatio = blogPages.length > 0 ? dominantBigramCount / blogPages.length : 0;
|
|
2262
|
+
let score = 0;
|
|
2263
|
+
const bestFocusRatio = Math.max(focusRatio, bigramFocusRatio);
|
|
2264
|
+
if (bestFocusRatio >= 0.8) {
|
|
2265
|
+
score += 7;
|
|
2266
|
+
} else if (bestFocusRatio >= 0.6) {
|
|
2267
|
+
score += 6;
|
|
2268
|
+
} else if (bestFocusRatio >= 0.45) {
|
|
2269
|
+
score += 5;
|
|
2270
|
+
} else if (bestFocusRatio >= 0.3) {
|
|
2271
|
+
score += 3;
|
|
2272
|
+
} else if (bestFocusRatio >= 0.15) {
|
|
2273
|
+
score += 2;
|
|
2274
|
+
} else {
|
|
2275
|
+
score += 1;
|
|
2276
|
+
}
|
|
2277
|
+
const clusterPenaltyReduced = focusRatio >= 0.7;
|
|
2278
|
+
if (topicClusterCount <= 3) {
|
|
2279
|
+
score += 3;
|
|
2280
|
+
findings.push({ severity: "info", detail: `${topicClusterCount} topic cluster(s) - tightly focused content` });
|
|
2281
|
+
} else if (topicClusterCount <= 6) {
|
|
2282
|
+
score += clusterPenaltyReduced ? 2 : 1;
|
|
2283
|
+
findings.push({ severity: "info", detail: `${topicClusterCount} topic clusters${clusterPenaltyReduced ? " within a focused niche" : " - moderately focused"}` });
|
|
2284
|
+
} else if (topicClusterCount <= 10) {
|
|
2285
|
+
score += clusterPenaltyReduced ? 1 : 0;
|
|
2286
|
+
if (!clusterPenaltyReduced) {
|
|
2287
|
+
findings.push({ severity: "low", detail: `${topicClusterCount} topic clusters - scattered content`, fix: "Reduce the number of distinct topics. Focus blog content on 2-3 core expertise areas." });
|
|
2288
|
+
} else {
|
|
2289
|
+
findings.push({ severity: "info", detail: `${topicClusterCount} topic clusters but strong core topic focus (${Math.round(focusRatio * 100)}%)` });
|
|
2290
|
+
}
|
|
2291
|
+
} else {
|
|
2292
|
+
score += clusterPenaltyReduced ? 0 : -2;
|
|
2293
|
+
if (!clusterPenaltyReduced) {
|
|
2294
|
+
findings.push({ severity: "medium", detail: `${topicClusterCount} topic clusters - highly scattered content`, fix: "Content covers too many unrelated topics. AI engines cannot identify your expertise. Focus on your core niche." });
|
|
2295
|
+
} else {
|
|
2296
|
+
findings.push({ severity: "low", detail: `${topicClusterCount} topic clusters despite strong core topic focus`, fix: "Consider narrowing subtopics within your niche for even stronger AI visibility." });
|
|
2297
|
+
}
|
|
2298
|
+
}
|
|
2299
|
+
score = Math.max(0, Math.min(10, score));
|
|
2300
|
+
if (dominantTerm) {
|
|
2301
|
+
const focusPct = Math.round(focusRatio * 100);
|
|
2302
|
+
findings.push({ severity: "info", detail: `Dominant topic term: "${dominantTerm}" (${focusPct}% of ${blogPages.length} pages)` });
|
|
2303
|
+
}
|
|
2304
|
+
if (dominantBigram && dominantBigramCount >= 2) {
|
|
2305
|
+
findings.push({ severity: "info", detail: `Dominant topic phrase: "${dominantBigram}" (${dominantBigramCount}/${blogPages.length} pages)` });
|
|
2306
|
+
}
|
|
2307
|
+
const offTopicExamples = [];
|
|
2308
|
+
for (let i = 0; i < pageTitleTexts.length && offTopicExamples.length < 3; i++) {
|
|
2309
|
+
if (dominantTerm && !pageTitleTexts[i].includes(dominantTerm)) {
|
|
2310
|
+
const title = blogPages[i].text.match(/<title[^>]*>([^<]+)<\/title>/i)?.[1]?.trim();
|
|
2311
|
+
if (title && title.length > 3) offTopicExamples.push(title.slice(0, 60));
|
|
2312
|
+
}
|
|
2313
|
+
}
|
|
2314
|
+
if (offTopicExamples.length > 0 && score < 8) {
|
|
2315
|
+
findings.push({ severity: "low", detail: `Off-topic examples: ${offTopicExamples.join("; ")}` });
|
|
2316
|
+
}
|
|
2317
|
+
return { criterion: "topic_coherence", criterion_label: "Topic Coherence", score, status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P0" };
|
|
2318
|
+
}
|
|
2319
|
+
function countWords(html) {
|
|
2320
|
+
const text = html.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, "").replace(/<style[^>]*>[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
|
|
2321
|
+
return text.split(/\s+/).filter((w) => w.length > 0).length;
|
|
2322
|
+
}
|
|
2323
|
+
function countHeadings(html) {
|
|
2324
|
+
const headings = html.match(/<h[2-6][^>]*>/gi) || [];
|
|
2325
|
+
return headings.length;
|
|
2326
|
+
}
|
|
2327
|
+
function checkContentDepth(data, topicCoherenceScore) {
|
|
2328
|
+
const findings = [];
|
|
2329
|
+
if (!data.blogSample || data.blogSample.length < 2) {
|
|
2330
|
+
findings.push({ severity: "info", detail: `Only ${data.blogSample?.length || 0} blog pages found - insufficient for depth analysis` });
|
|
2331
|
+
return { criterion: "content_depth", criterion_label: "Content Depth", score: 3, status: "partial", findings, fix_priority: "P2" };
|
|
2332
|
+
}
|
|
2333
|
+
const blogPages = data.blogSample;
|
|
2334
|
+
const wordCounts = blogPages.map((p) => countWords(p.text));
|
|
2335
|
+
const headingCounts = blogPages.map((p) => countHeadings(p.text));
|
|
2336
|
+
const avgWords = wordCounts.reduce((a, b) => a + b, 0) / wordCounts.length;
|
|
2337
|
+
const avgHeadings = headingCounts.reduce((a, b) => a + b, 0) / headingCounts.length;
|
|
2338
|
+
const deepPages = wordCounts.filter((w) => w >= 1e3).length;
|
|
2339
|
+
const thinPages = wordCounts.filter((w) => w < 300).length;
|
|
2340
|
+
const deepRatio = deepPages / blogPages.length;
|
|
2341
|
+
const thinRatio = thinPages / blogPages.length;
|
|
2342
|
+
let score = 0;
|
|
2343
|
+
if (avgWords >= 2e3) {
|
|
2344
|
+
score += 5;
|
|
2345
|
+
findings.push({ severity: "info", detail: `Average ${Math.round(avgWords)} words per page across ${blogPages.length} pages - excellent depth` });
|
|
2346
|
+
} else if (avgWords >= 1200) {
|
|
2347
|
+
score += 4;
|
|
2348
|
+
findings.push({ severity: "info", detail: `Average ${Math.round(avgWords)} words per page across ${blogPages.length} pages - good depth` });
|
|
2349
|
+
} else if (avgWords >= 800) {
|
|
2350
|
+
score += 3;
|
|
2351
|
+
findings.push({ severity: "info", detail: `Average ${Math.round(avgWords)} words per page - moderate depth` });
|
|
2352
|
+
} else if (avgWords >= 400) {
|
|
2353
|
+
score += 2;
|
|
2354
|
+
findings.push({ severity: "low", detail: `Average ${Math.round(avgWords)} words per page - shallow content`, fix: "Expand articles with more detail, examples, and expert analysis to build AI citation authority" });
|
|
2355
|
+
} else {
|
|
2356
|
+
score += 1;
|
|
2357
|
+
findings.push({ severity: "medium", detail: `Average ${Math.round(avgWords)} words per page - very thin content`, fix: "Content is too thin for AI engines to cite. Aim for 1000+ words per article with structured sections." });
|
|
2358
|
+
}
|
|
2359
|
+
if (avgHeadings >= 8) {
|
|
2360
|
+
score += 3;
|
|
2361
|
+
findings.push({ severity: "info", detail: `Average ${avgHeadings.toFixed(1)} subheadings per page - well-structured` });
|
|
2362
|
+
} else if (avgHeadings >= 5) {
|
|
2363
|
+
score += 2;
|
|
2364
|
+
findings.push({ severity: "info", detail: `Average ${avgHeadings.toFixed(1)} subheadings per page - decent structure` });
|
|
2365
|
+
} else if (avgHeadings >= 2) {
|
|
2366
|
+
score += 1;
|
|
2367
|
+
findings.push({ severity: "low", detail: `Average ${avgHeadings.toFixed(1)} subheadings per page`, fix: "Add more H2/H3 headings to break content into extractable sections" });
|
|
2368
|
+
} else {
|
|
2369
|
+
findings.push({ severity: "medium", detail: `Average ${avgHeadings.toFixed(1)} subheadings per page - minimal structure`, fix: "Add question-format H2/H3 headings so AI engines can extract specific answers" });
|
|
2370
|
+
}
|
|
2371
|
+
if (deepRatio >= 0.5) {
|
|
2372
|
+
score += 2;
|
|
2373
|
+
findings.push({ severity: "info", detail: `${deepPages}/${blogPages.length} pages (${Math.round(deepRatio * 100)}%) have 1000+ words` });
|
|
2374
|
+
} else if (deepRatio >= 0.25) {
|
|
2375
|
+
score += 1;
|
|
2376
|
+
findings.push({ severity: "info", detail: `${deepPages}/${blogPages.length} pages have 1000+ words` });
|
|
2377
|
+
}
|
|
2378
|
+
if (thinRatio >= 0.5) {
|
|
2379
|
+
score = Math.max(0, score - 2);
|
|
2380
|
+
findings.push({ severity: "medium", detail: `${thinPages}/${blogPages.length} pages (${Math.round(thinRatio * 100)}%) have under 300 words - high thin content ratio`, fix: "Remove or expand thin pages. Thin content dilutes site quality for AI engines." });
|
|
2381
|
+
} else if (thinRatio >= 0.25) {
|
|
2382
|
+
score = Math.max(0, score - 1);
|
|
2383
|
+
findings.push({ severity: "low", detail: `${thinPages}/${blogPages.length} pages have under 300 words` });
|
|
2384
|
+
}
|
|
2385
|
+
let finalScore = Math.min(10, score);
|
|
2386
|
+
if (topicCoherenceScore !== void 0 && topicCoherenceScore <= 4 && finalScore >= 8) {
|
|
2387
|
+
finalScore = 7;
|
|
2388
|
+
findings.push({ severity: "low", detail: "Deep content but low topic coherence - depth on scattered topics has reduced AI citation value", fix: "Focus content depth on your core expertise area for maximum AI visibility" });
|
|
2389
|
+
}
|
|
2390
|
+
return { criterion: "content_depth", criterion_label: "Content Depth", score: finalScore, status: finalScore >= 7 ? "pass" : finalScore >= 4 ? "partial" : "fail", findings, fix_priority: finalScore >= 7 ? "P3" : "P1" };
|
|
2391
|
+
}
|
|
2127
2392
|
function auditSiteFromData(data) {
|
|
2393
|
+
const topicCoherence = checkTopicCoherence(data);
|
|
2394
|
+
const cannibalization = checkContentCannibalization(data, topicCoherence.score);
|
|
2128
2395
|
return [
|
|
2129
2396
|
checkLlmsTxt(data),
|
|
2130
2397
|
checkSchemaMarkup(data),
|
|
@@ -2150,52 +2417,84 @@ function auditSiteFromData(data) {
|
|
|
2150
2417
|
checkSchemaCoverage(data),
|
|
2151
2418
|
checkSpeakableSchema(data),
|
|
2152
2419
|
checkQueryAnswerAlignment(data),
|
|
2153
|
-
|
|
2154
|
-
checkVisibleDateSignal(data)
|
|
2420
|
+
cannibalization,
|
|
2421
|
+
checkVisibleDateSignal(data),
|
|
2422
|
+
topicCoherence,
|
|
2423
|
+
checkContentDepth(data, topicCoherence.score)
|
|
2155
2424
|
];
|
|
2156
2425
|
}
|
|
2157
2426
|
|
|
2158
2427
|
// src/scoring.ts
|
|
2159
2428
|
var WEIGHTS = {
|
|
2160
|
-
//
|
|
2161
|
-
|
|
2162
|
-
|
|
2163
|
-
|
|
2164
|
-
clean_html: 0.1,
|
|
2165
|
-
entity_consistency: 0.1,
|
|
2166
|
-
robots_txt: 0.05,
|
|
2167
|
-
faq_section: 0.1,
|
|
2429
|
+
// ─── Content Substance (~55%) ─────────────────────────────────────────────
|
|
2430
|
+
// WHY an AI engine would cite you. These drive citation quality directly.
|
|
2431
|
+
topic_coherence: 0.14,
|
|
2432
|
+
// Topical authority - THE gating signal
|
|
2168
2433
|
original_data: 0.1,
|
|
2169
|
-
|
|
2170
|
-
|
|
2171
|
-
//
|
|
2172
|
-
|
|
2173
|
-
|
|
2174
|
-
|
|
2175
|
-
|
|
2176
|
-
|
|
2177
|
-
|
|
2178
|
-
|
|
2179
|
-
|
|
2180
|
-
|
|
2181
|
-
|
|
2182
|
-
|
|
2183
|
-
|
|
2184
|
-
|
|
2185
|
-
|
|
2186
|
-
|
|
2187
|
-
|
|
2434
|
+
// Unique value AI can't find elsewhere
|
|
2435
|
+
content_depth: 0.07,
|
|
2436
|
+
// Comprehensive vs thin coverage
|
|
2437
|
+
fact_density: 0.06,
|
|
2438
|
+
// Information density per page
|
|
2439
|
+
direct_answer_density: 0.05,
|
|
2440
|
+
// Direct answers to queries
|
|
2441
|
+
qa_content_format: 0.05,
|
|
2442
|
+
// Answer-shaped content structure
|
|
2443
|
+
query_answer_alignment: 0.05,
|
|
2444
|
+
// Relevance to actual AI queries
|
|
2445
|
+
faq_section: 0.04,
|
|
2446
|
+
// Structured Q&A pairs
|
|
2447
|
+
// ─── Content Organization (~30%) ──────────────────────────────────────────
|
|
2448
|
+
// HOW easily AI engines can extract and trust your content.
|
|
2449
|
+
entity_consistency: 0.05,
|
|
2450
|
+
// Brand authority and E-E-A-T
|
|
2451
|
+
internal_linking: 0.04,
|
|
2452
|
+
// Site structure and topic clusters
|
|
2453
|
+
content_freshness: 0.04,
|
|
2454
|
+
// Recency signals
|
|
2455
|
+
schema_markup: 0.03,
|
|
2456
|
+
// Structured data for discovery
|
|
2457
|
+
author_schema_depth: 0.03,
|
|
2458
|
+
// Expert attribution
|
|
2459
|
+
table_list_extractability: 0.03,
|
|
2460
|
+
// Extractable structured data
|
|
2461
|
+
definition_patterns: 0.02,
|
|
2462
|
+
// Clear definitions
|
|
2463
|
+
visible_date_signal: 0.02,
|
|
2464
|
+
// Publication date trust
|
|
2465
|
+
semantic_html: 0.02,
|
|
2466
|
+
// Clean semantic structure
|
|
2467
|
+
clean_html: 0.02,
|
|
2468
|
+
// Parseable markup
|
|
2469
|
+
// ─── Technical Plumbing (~15%) ────────────────────────────────────────────
|
|
2470
|
+
// WHETHER AI crawlers can find you. Table stakes with diminishing returns.
|
|
2471
|
+
content_cannibalization: 0.02,
|
|
2472
|
+
llms_txt: 0.02,
|
|
2473
|
+
robots_txt: 0.02,
|
|
2474
|
+
content_velocity: 0.02,
|
|
2475
|
+
content_licensing: 0.02,
|
|
2476
|
+
sitemap_completeness: 0.01,
|
|
2477
|
+
canonical_url: 0.01,
|
|
2478
|
+
rss_feed: 0.01,
|
|
2479
|
+
schema_coverage: 0.01,
|
|
2480
|
+
speakable_schema: 0.01
|
|
2188
2481
|
};
|
|
2189
2482
|
function calculateOverallScore(criteria) {
|
|
2190
2483
|
let totalWeight = 0;
|
|
2191
2484
|
let weightedSum = 0;
|
|
2192
2485
|
for (const c of criteria) {
|
|
2193
|
-
const weight = WEIGHTS[c.criterion] ?? 0.
|
|
2486
|
+
const weight = WEIGHTS[c.criterion] ?? 0.05;
|
|
2194
2487
|
weightedSum += c.score / 10 * weight * 100;
|
|
2195
2488
|
totalWeight += weight;
|
|
2196
2489
|
}
|
|
2197
2490
|
if (totalWeight === 0) return 0;
|
|
2198
|
-
|
|
2491
|
+
let score = Math.round(weightedSum / totalWeight);
|
|
2492
|
+
const coherence = criteria.find((c) => c.criterion === "topic_coherence");
|
|
2493
|
+
if (coherence && coherence.score < 6) {
|
|
2494
|
+
const cap2 = 35 + coherence.score * 5;
|
|
2495
|
+
score = Math.min(score, cap2);
|
|
2496
|
+
}
|
|
2497
|
+
return score;
|
|
2199
2498
|
}
|
|
2200
2499
|
|
|
2201
2500
|
// src/headless-fetch.ts
|
|
@@ -2326,7 +2625,9 @@ var CRITERION_LABELS = {
|
|
|
2326
2625
|
"Speakable Schema": "Speakable Schema",
|
|
2327
2626
|
"Query-Answer Alignment": "Query-Answer Alignment",
|
|
2328
2627
|
"Content Cannibalization": "Content Cannibalization",
|
|
2329
|
-
"Visible Date Signal": "Visible Date Signal"
|
|
2628
|
+
"Visible Date Signal": "Visible Date Signal",
|
|
2629
|
+
"Topic Coherence": "Topic Coherence",
|
|
2630
|
+
"Content Depth": "Content Depth"
|
|
2330
2631
|
};
|
|
2331
2632
|
function scoreToStatus(score) {
|
|
2332
2633
|
if (score === 0) return "MISSING";
|
|
@@ -2412,32 +2713,37 @@ function buildDetailedFindings(results) {
|
|
|
2412
2713
|
|
|
2413
2714
|
// src/narrative-generator.ts
|
|
2414
2715
|
var CRITERION_WEIGHTS = {
|
|
2415
|
-
|
|
2416
|
-
|
|
2417
|
-
qa_content_format: 0.15,
|
|
2418
|
-
clean_html: 0.1,
|
|
2419
|
-
entity_consistency: 0.1,
|
|
2420
|
-
robots_txt: 0.05,
|
|
2421
|
-
faq_section: 0.1,
|
|
2716
|
+
// Content Substance (~55%)
|
|
2717
|
+
topic_coherence: 0.14,
|
|
2422
2718
|
original_data: 0.1,
|
|
2423
|
-
|
|
2424
|
-
|
|
2425
|
-
|
|
2426
|
-
|
|
2427
|
-
|
|
2428
|
-
|
|
2429
|
-
|
|
2430
|
-
|
|
2431
|
-
|
|
2432
|
-
|
|
2433
|
-
|
|
2434
|
-
|
|
2435
|
-
|
|
2436
|
-
|
|
2437
|
-
|
|
2438
|
-
|
|
2439
|
-
|
|
2440
|
-
|
|
2719
|
+
content_depth: 0.07,
|
|
2720
|
+
fact_density: 0.06,
|
|
2721
|
+
direct_answer_density: 0.05,
|
|
2722
|
+
qa_content_format: 0.05,
|
|
2723
|
+
query_answer_alignment: 0.05,
|
|
2724
|
+
faq_section: 0.04,
|
|
2725
|
+
// Content Organization (~30%)
|
|
2726
|
+
entity_consistency: 0.05,
|
|
2727
|
+
internal_linking: 0.04,
|
|
2728
|
+
content_freshness: 0.04,
|
|
2729
|
+
schema_markup: 0.03,
|
|
2730
|
+
author_schema_depth: 0.03,
|
|
2731
|
+
table_list_extractability: 0.03,
|
|
2732
|
+
definition_patterns: 0.02,
|
|
2733
|
+
visible_date_signal: 0.02,
|
|
2734
|
+
semantic_html: 0.02,
|
|
2735
|
+
clean_html: 0.02,
|
|
2736
|
+
// Technical Plumbing (~15%)
|
|
2737
|
+
content_cannibalization: 0.02,
|
|
2738
|
+
llms_txt: 0.02,
|
|
2739
|
+
robots_txt: 0.02,
|
|
2740
|
+
content_velocity: 0.02,
|
|
2741
|
+
content_licensing: 0.02,
|
|
2742
|
+
sitemap_completeness: 0.01,
|
|
2743
|
+
canonical_url: 0.01,
|
|
2744
|
+
rss_feed: 0.01,
|
|
2745
|
+
schema_coverage: 0.01,
|
|
2746
|
+
speakable_schema: 0.01
|
|
2441
2747
|
};
|
|
2442
2748
|
var OPPORTUNITY_TEMPLATES = {
|
|
2443
2749
|
llms_txt: {
|
|
@@ -2569,6 +2875,16 @@ var OPPORTUNITY_TEMPLATES = {
|
|
|
2569
2875
|
name: "Add Visible Date Signals",
|
|
2570
2876
|
effort: "Low",
|
|
2571
2877
|
description: "Display publication/modification dates visibly using <time> elements and add datePublished/dateModified to JSON-LD schema."
|
|
2878
|
+
},
|
|
2879
|
+
topic_coherence: {
|
|
2880
|
+
name: "Focus Content on Core Topics",
|
|
2881
|
+
effort: "High",
|
|
2882
|
+
description: 'Ensure blog content consistently covers your core expertise areas rather than scattering across unrelated topics. AI engines build authority models - a site about "Medicare coverage" that also publishes about humidifiers and groceries dilutes its topical authority.'
|
|
2883
|
+
},
|
|
2884
|
+
content_depth: {
|
|
2885
|
+
name: "Increase Content Depth",
|
|
2886
|
+
effort: "Medium",
|
|
2887
|
+
description: "Expand articles to 1000+ words with structured H2/H3 sections, comparison tables, and expert analysis. Thin content (under 300 words) is rarely cited by AI engines. Deep, well-structured articles demonstrate expertise."
|
|
2572
2888
|
}
|
|
2573
2889
|
};
|
|
2574
2890
|
function calculateImpact(score, weight, effort) {
|
|
@@ -2690,7 +3006,7 @@ function generatePitchNumbers(score, rawData, scorecard) {
|
|
|
2690
3006
|
const passing = scorecard.filter((s) => s.score >= 7).length;
|
|
2691
3007
|
metrics.push({
|
|
2692
3008
|
metric: "Criteria Passing",
|
|
2693
|
-
value: `${passing}/
|
|
3009
|
+
value: `${passing}/28`,
|
|
2694
3010
|
significance: passing >= 18 ? "Excellent coverage across AEO dimensions" : passing >= 12 ? "Good foundation with room to improve remaining criteria" : `${26 - passing} criteria need attention for full AI visibility`
|
|
2695
3011
|
});
|
|
2696
3012
|
return metrics;
|
|
@@ -2882,20 +3198,23 @@ async function fetchMultiPageData(siteData, options) {
|
|
|
2882
3198
|
|
|
2883
3199
|
// src/page-scorer.ts
|
|
2884
3200
|
var PAGE_CRITERIA = {
|
|
2885
|
-
|
|
2886
|
-
qa_content_format: { weight: 0.15, label: "Q&A Content Format" },
|
|
2887
|
-
clean_html: { weight: 0.1, label: "Clean, Crawlable HTML" },
|
|
2888
|
-
faq_section: { weight: 0.1, label: "FAQ Section Content" },
|
|
3201
|
+
// Content Substance
|
|
2889
3202
|
original_data: { weight: 0.1, label: "Original Data & Expert Content" },
|
|
2890
|
-
|
|
2891
|
-
|
|
2892
|
-
|
|
2893
|
-
|
|
2894
|
-
|
|
2895
|
-
|
|
2896
|
-
|
|
2897
|
-
|
|
2898
|
-
|
|
3203
|
+
fact_density: { weight: 0.06, label: "Fact & Data Density" },
|
|
3204
|
+
direct_answer_density: { weight: 0.05, label: "Direct Answer Paragraphs" },
|
|
3205
|
+
qa_content_format: { weight: 0.05, label: "Q&A Content Format" },
|
|
3206
|
+
query_answer_alignment: { weight: 0.05, label: "Query-Answer Alignment" },
|
|
3207
|
+
faq_section: { weight: 0.04, label: "FAQ Section Content" },
|
|
3208
|
+
// Content Organization
|
|
3209
|
+
content_freshness: { weight: 0.04, label: "Content Freshness Signals" },
|
|
3210
|
+
schema_markup: { weight: 0.03, label: "Schema.org Structured Data" },
|
|
3211
|
+
table_list_extractability: { weight: 0.03, label: "Table & List Extractability" },
|
|
3212
|
+
definition_patterns: { weight: 0.02, label: "Definition Patterns" },
|
|
3213
|
+
visible_date_signal: { weight: 0.02, label: "Visible Date Signal" },
|
|
3214
|
+
semantic_html: { weight: 0.02, label: "Semantic HTML5 & Accessibility" },
|
|
3215
|
+
clean_html: { weight: 0.02, label: "Clean, Crawlable HTML" },
|
|
3216
|
+
// Technical Plumbing
|
|
3217
|
+
canonical_url: { weight: 0.01, label: "Canonical URL Strategy" }
|
|
2899
3218
|
};
|
|
2900
3219
|
function extractJsonLdBlocks(html) {
|
|
2901
3220
|
const blocks = [];
|
|
@@ -3235,7 +3554,7 @@ function extractTitle(html) {
|
|
|
3235
3554
|
function getTextContent2(html) {
|
|
3236
3555
|
return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
|
|
3237
3556
|
}
|
|
3238
|
-
function
|
|
3557
|
+
function countWords2(text) {
|
|
3239
3558
|
if (!text) return 0;
|
|
3240
3559
|
return text.split(/\s+/).filter((w) => w.length > 0).length;
|
|
3241
3560
|
}
|
|
@@ -3386,7 +3705,7 @@ function checkHasQuestionHeadings(html) {
|
|
|
3386
3705
|
function analyzePage(html, url, category) {
|
|
3387
3706
|
const title = extractTitle(html);
|
|
3388
3707
|
const textContent = getTextContent2(html);
|
|
3389
|
-
const wordCount =
|
|
3708
|
+
const wordCount = countWords2(textContent);
|
|
3390
3709
|
const issues = [];
|
|
3391
3710
|
const strengths = [];
|
|
3392
3711
|
const issueChecks = [
|
|
@@ -3539,7 +3858,7 @@ function extractTitle2(html) {
|
|
|
3539
3858
|
function getTextContent3(html) {
|
|
3540
3859
|
return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
|
|
3541
3860
|
}
|
|
3542
|
-
function
|
|
3861
|
+
function countWords3(text) {
|
|
3543
3862
|
if (!text) return 0;
|
|
3544
3863
|
return text.split(/\s+/).filter((w) => w.length > 0).length;
|
|
3545
3864
|
}
|
|
@@ -3687,7 +4006,7 @@ function buildLinkGraph(pages, domain, homepageUrl) {
|
|
|
3687
4006
|
if (nodes.has(norm)) continue;
|
|
3688
4007
|
const title = extractTitle2(page.text);
|
|
3689
4008
|
const text = getTextContent3(page.text);
|
|
3690
|
-
const wordCount =
|
|
4009
|
+
const wordCount = countWords3(text);
|
|
3691
4010
|
nodes.set(norm, {
|
|
3692
4011
|
url: norm,
|
|
3693
4012
|
title,
|
|
@@ -3750,32 +4069,37 @@ function buildLinkGraph(pages, domain, homepageUrl) {
|
|
|
3750
4069
|
|
|
3751
4070
|
// src/fix-engine.ts
|
|
3752
4071
|
var CRITERION_WEIGHTS2 = {
|
|
3753
|
-
|
|
3754
|
-
|
|
3755
|
-
qa_content_format: 0.15,
|
|
3756
|
-
clean_html: 0.1,
|
|
3757
|
-
entity_consistency: 0.1,
|
|
3758
|
-
robots_txt: 0.05,
|
|
3759
|
-
faq_section: 0.1,
|
|
4072
|
+
// Content Substance (~55%)
|
|
4073
|
+
topic_coherence: 0.14,
|
|
3760
4074
|
original_data: 0.1,
|
|
3761
|
-
|
|
3762
|
-
|
|
3763
|
-
|
|
3764
|
-
|
|
3765
|
-
|
|
3766
|
-
|
|
3767
|
-
|
|
3768
|
-
|
|
3769
|
-
|
|
3770
|
-
|
|
3771
|
-
|
|
3772
|
-
|
|
3773
|
-
|
|
3774
|
-
|
|
3775
|
-
|
|
3776
|
-
|
|
3777
|
-
|
|
3778
|
-
|
|
4075
|
+
content_depth: 0.07,
|
|
4076
|
+
fact_density: 0.06,
|
|
4077
|
+
direct_answer_density: 0.05,
|
|
4078
|
+
qa_content_format: 0.05,
|
|
4079
|
+
query_answer_alignment: 0.05,
|
|
4080
|
+
faq_section: 0.04,
|
|
4081
|
+
// Content Organization (~30%)
|
|
4082
|
+
entity_consistency: 0.05,
|
|
4083
|
+
internal_linking: 0.04,
|
|
4084
|
+
content_freshness: 0.04,
|
|
4085
|
+
schema_markup: 0.03,
|
|
4086
|
+
author_schema_depth: 0.03,
|
|
4087
|
+
table_list_extractability: 0.03,
|
|
4088
|
+
definition_patterns: 0.02,
|
|
4089
|
+
visible_date_signal: 0.02,
|
|
4090
|
+
semantic_html: 0.02,
|
|
4091
|
+
clean_html: 0.02,
|
|
4092
|
+
// Technical Plumbing (~15%)
|
|
4093
|
+
content_cannibalization: 0.02,
|
|
4094
|
+
llms_txt: 0.02,
|
|
4095
|
+
robots_txt: 0.02,
|
|
4096
|
+
content_velocity: 0.02,
|
|
4097
|
+
content_licensing: 0.02,
|
|
4098
|
+
sitemap_completeness: 0.01,
|
|
4099
|
+
canonical_url: 0.01,
|
|
4100
|
+
rss_feed: 0.01,
|
|
4101
|
+
schema_coverage: 0.01,
|
|
4102
|
+
speakable_schema: 0.01
|
|
3779
4103
|
};
|
|
3780
4104
|
var PHASE_CONFIG = [
|
|
3781
4105
|
{
|
|
@@ -3798,7 +4122,9 @@ var PHASE_CONFIG = [
|
|
|
3798
4122
|
"content_freshness",
|
|
3799
4123
|
"table_list_extractability",
|
|
3800
4124
|
"query_answer_alignment",
|
|
3801
|
-
"visible_date_signal"
|
|
4125
|
+
"visible_date_signal",
|
|
4126
|
+
"topic_coherence",
|
|
4127
|
+
"content_depth"
|
|
3802
4128
|
]
|
|
3803
4129
|
},
|
|
3804
4130
|
{
|
|
@@ -4702,6 +5028,55 @@ Summarization: yes`,
|
|
|
4702
5028
|
affectedPages: affected,
|
|
4703
5029
|
pageCount: affected?.length
|
|
4704
5030
|
}];
|
|
5031
|
+
},
|
|
5032
|
+
topic_coherence: (c) => {
|
|
5033
|
+
if (c.score >= 10) return [];
|
|
5034
|
+
const impact = impactFromScore(c.score);
|
|
5035
|
+
const effort = effortForCriterion("topic_coherence", c.score);
|
|
5036
|
+
return [{
|
|
5037
|
+
id: "fix-topic-coherence",
|
|
5038
|
+
criterion: c.criterion_label,
|
|
5039
|
+
criterionId: c.criterion,
|
|
5040
|
+
title: "Focus blog content on core expertise",
|
|
5041
|
+
description: "Ensure blog content consistently covers your core topic areas. Scattered content across unrelated topics weakens AI engine authority signals.",
|
|
5042
|
+
impact,
|
|
5043
|
+
effort: effort === "trivial" ? "low" : effort,
|
|
5044
|
+
impactScore: 0,
|
|
5045
|
+
category: "content",
|
|
5046
|
+
steps: [
|
|
5047
|
+
"Identify 2-3 core expertise areas your brand is known for",
|
|
5048
|
+
"Audit existing blog posts and remove or consolidate off-topic content",
|
|
5049
|
+
"Create a content calendar focused on core topics",
|
|
5050
|
+
"Use topic clusters: pillar pages linking to supporting articles within the same niche"
|
|
5051
|
+
],
|
|
5052
|
+
successCriteria: "80%+ of blog content covers core expertise areas with consistent topic focus"
|
|
5053
|
+
}];
|
|
5054
|
+
},
|
|
5055
|
+
content_depth: (c, pages) => {
|
|
5056
|
+
if (c.score >= 10) return [];
|
|
5057
|
+
const impact = impactFromScore(c.score);
|
|
5058
|
+
const effort = effortForCriterion("content_depth", c.score);
|
|
5059
|
+
const affected = getAffectedPages("content_depth", pages);
|
|
5060
|
+
return [{
|
|
5061
|
+
id: "fix-content-depth",
|
|
5062
|
+
criterion: c.criterion_label,
|
|
5063
|
+
criterionId: c.criterion,
|
|
5064
|
+
title: "Increase content depth and structure",
|
|
5065
|
+
description: "Expand thin content with more detail, examples, and structured sections. AI engines prefer comprehensive articles with clear heading hierarchies.",
|
|
5066
|
+
impact,
|
|
5067
|
+
effort: effort === "trivial" ? "low" : effort,
|
|
5068
|
+
impactScore: 0,
|
|
5069
|
+
category: "content",
|
|
5070
|
+
steps: [
|
|
5071
|
+
"Aim for 1000+ words per article with expert analysis and examples",
|
|
5072
|
+
"Use H2/H3 subheadings every 200-300 words for clear structure",
|
|
5073
|
+
"Add comparison tables, numbered steps, and data points",
|
|
5074
|
+
"Remove or expand thin pages (under 300 words) that dilute site quality"
|
|
5075
|
+
],
|
|
5076
|
+
successCriteria: "Average article length exceeds 1000 words with 5+ subheadings per page",
|
|
5077
|
+
affectedPages: affected,
|
|
5078
|
+
pageCount: affected?.length
|
|
5079
|
+
}];
|
|
4705
5080
|
}
|
|
4706
5081
|
};
|
|
4707
5082
|
function generateFixPlan(domain, overallScore, criteria, pagesReviewed, linkGraph) {
|