npm - aeorank - Versions diffs - 1.6.0 → 2.1.0 - Mend

aeorank 1.6.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/README.md +110 -39
package/dist/browser.d.ts +2 -2
package/dist/browser.js +500 -125
package/dist/browser.js.map +1 -1
package/dist/{chunk-3IJISYWT.js → chunk-PKJIKMLV.js} +2 -2
package/dist/chunk-PKJIKMLV.js.map +1 -0
package/dist/cli.js +415 -96
package/dist/cli.js.map +1 -1
package/dist/{full-site-crawler-F7J2HRL4.js → full-site-crawler-FQYO46YV.js} +2 -2
package/dist/full-site-crawler-FQYO46YV.js.map +1 -0
package/dist/{full-site-crawler-VFARFR2C.js → full-site-crawler-UIOMKOZA.js} +2 -2
package/dist/index.cjs +499 -124
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +2 -2
package/dist/index.d.ts +2 -2
package/dist/index.js +500 -125
package/dist/index.js.map +1 -1
package/package.json +2 -2
package/dist/chunk-3IJISYWT.js.map +0 -1
package/dist/full-site-crawler-F7J2HRL4.js.map +0 -1
/package/dist/{full-site-crawler-VFARFR2C.js.map → full-site-crawler-UIOMKOZA.js.map} +0 -0

package/dist/browser.js CHANGED Viewed

@@ -3,7 +3,7 @@ import {
   extractAllUrlsFromSitemap,
   extractInternalLinks,
   inferCategory
-} from "./chunk-3IJISYWT.js";
+} from "./chunk-PKJIKMLV.js";
 // src/parked-domain.ts
 var PARKING_PATHS = ["/lander", "/parking", "/park", "/sedoparking"];
@@ -195,7 +195,7 @@ async function prefetchSiteData(domain) {
         sitemapForBlog = subSitemap.text;
       }
     }
-    const blogUrls = extractBlogUrlsFromSitemap(sitemapForBlog, domain, 10);
+    const blogUrls = extractBlogUrlsFromSitemap(sitemapForBlog, domain, 50);
     if (blogUrls.length > 0) {
       const fetched = await Promise.all(blogUrls.map((url) => fetchText(url)));
       blogSample = fetched.filter(
@@ -552,15 +552,17 @@ function checkOriginalData(data) {
     findings.push({ severity: "critical", detail: "Could not fetch homepage" });
     return { criterion: "original_data", criterion_label: "Original Data & Expert Content", score: 0, status: "not_found", findings, fix_priority: "P2" };
   }
+  const allPages = [data.homepage, ...data.blogSample || []].filter(Boolean);
   const html = data.homepage.text;
-  const text = html.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ");
+  const allText = allPages.map((p) => p.text.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ")).join(" ");
+  const text = data.homepage.text.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ");
   let score = 0;
   const statPatterns = /\d+%|\d+\s*(patients|clients|customers|cases|years|professionals|specialists|companies|users|businesses|domains|audits)/i;
-  if (statPatterns.test(text)) {
+  if (statPatterns.test(allText)) {
     const researchContext = /\b(our\s+(?:study|analysis|research|data|survey|findings|report)|we\s+(?:surveyed|analyzed|studied|measured|tracked)|proprietary|methodology|original\s+research)\b/i;
-    if (researchContext.test(text)) {
+    if (researchContext.test(allText)) {
       score += 3;
-      findings.push({ severity: "info", detail: "Proprietary statistics with research context found on homepage" });
+      findings.push({ severity: "info", detail: "Proprietary statistics with research context found" });
     } else {
       score += 1;
       findings.push({ severity: "low", detail: 'Statistics found but without research context (e.g., "500+ clients")', fix: 'Add context about your methodology: "Our analysis of X found..." or "We surveyed Y..."' });
@@ -1075,20 +1077,24 @@ function checkFactDensity(data) {
     findings.push({ severity: "critical", detail: "Could not fetch homepage" });
     return { criterion: "fact_density", criterion_label: "Fact & Data Density", score: 0, status: "not_found", findings, fix_priority: "P2" };
   }
-  const text = data.homepage.text.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ");
+  const allPages = [data.homepage, ...data.blogSample || []].filter(Boolean);
+  const allText = allPages.map((p) => p.text.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ")).join(" ");
+  const text = allText;
+  const pageCount = allPages.length;
   let score = 0;
   const dataPoints = text.match(/\d+(?:\.\d+)?(?:\s*%|\s*\$|\s*USD|\s*EUR)/g) || [];
   const countPhrases = text.match(/\d+(?:,\d{3})*\+?\s+(?:users?|clients?|customers?|companies|businesses|patients?|members?|employees?|projects?|downloads?)/gi) || [];
   const totalDataPoints = dataPoints.length + countPhrases.length;
-  if (totalDataPoints >= 6) {
+  const avgPerPage = pageCount > 0 ? totalDataPoints / pageCount : 0;
+  if (avgPerPage >= 4) {
     score += 5;
-    findings.push({ severity: "info", detail: `${totalDataPoints} quantitative data points found on homepage` });
-  } else if (totalDataPoints >= 3) {
+    findings.push({ severity: "info", detail: `${totalDataPoints} quantitative data points found across ${pageCount} pages (avg ${avgPerPage.toFixed(1)}/page)` });
+  } else if (avgPerPage >= 2) {
     score += 3;
-    findings.push({ severity: "info", detail: `${totalDataPoints} quantitative data points found` });
+    findings.push({ severity: "info", detail: `${totalDataPoints} quantitative data points found across ${pageCount} pages` });
   } else if (totalDataPoints >= 1) {
     score += 1;
-    findings.push({ severity: "low", detail: `Only ${totalDataPoints} quantitative data point(s) found`, fix: "Add more specific numbers, percentages, and metrics to strengthen credibility" });
+    findings.push({ severity: "low", detail: `Only ${totalDataPoints} quantitative data point(s) found across ${pageCount} pages`, fix: "Add more specific numbers, percentages, and metrics to strengthen credibility" });
   } else {
     findings.push({ severity: "high", detail: "No quantitative data points found", fix: "Add specific statistics (percentages, counts, comparisons) that AI engines can cite" });
   }
@@ -1194,9 +1200,9 @@ function countRecentSitemapDates(sitemapText) {
     distinctRecentDays: recentDays.size
   };
 }
-var BLOG_PATH_PATTERNS = /\/(?:blog|articles?|insights?|guides?|resources?|news|posts?|learn|help|how-?to|tutorials?|case-stud|whitepapers?)\b/i;
+var BLOG_PATH_PATTERNS = /\/(?:[^/]*-?)?(?:blog|articles?|insights?|guides?|resources?|news|posts?|learn|help|how-?to|tutorials?|case-stud|whitepapers?)\b/i;
 var EXCLUDE_PATH_PATTERNS = /\/(?:tag|category|author|page|feed|wp-content|wp-admin|wp-json|cart|checkout|login|search|api|static|assets|_next)\b/i;
-function extractBlogUrlsFromSitemap(sitemapText, domain, limit = 5) {
+function extractBlogUrlsFromSitemap(sitemapText, domain, limit = 50) {
   const urlBlocks = sitemapText.match(/<url>([\s\S]*?)<\/url>/gi) || [];
   const candidates = [];
   const cleanDomain = domain.replace(/^www\./, "").toLowerCase();
@@ -1492,7 +1498,7 @@ function jaccardSimilarity(a, b) {
   const union = a.size + b.size - intersection;
   return union === 0 ? 0 : intersection / union;
 }
-function checkContentCannibalization(data) {
+function checkContentCannibalization(data, topicCoherenceScore) {
   const findings = [];
   if (!data.homepage) {
     findings.push({ severity: "critical", detail: "No homepage available for cannibalization analysis" });
@@ -1502,7 +1508,7 @@ function checkContentCannibalization(data) {
     { html: data.homepage.text, url: data.homepage.finalUrl || `https://${data.domain}/` }
   ];
   if (data.blogSample) {
-    for (const page of data.blogSample.slice(0, 5)) {
+    for (const page of data.blogSample) {
       pages.push({ html: page.text, url: page.finalUrl || "" });
     }
   }
@@ -1512,10 +1518,29 @@ function checkContentCannibalization(data) {
   }
   const pageTitles = pages.map((p) => ({ title: extractPageTitle(p.html), url: p.url }));
   const wordSets = pageTitles.map((p) => titleToWordSet(p.title));
+  const termPageCount = /* @__PURE__ */ new Map();
+  for (const ws of wordSets) {
+    for (const w of ws) {
+      termPageCount.set(w, (termPageCount.get(w) || 0) + 1);
+    }
+  }
+  const commonTermThreshold = Math.max(3, pages.length * 0.4);
+  const siteThemeTerms = /* @__PURE__ */ new Set();
+  for (const [term, count] of termPageCount) {
+    if (count >= commonTermThreshold) siteThemeTerms.add(term);
+  }
+  const filteredSets = wordSets.map((ws) => {
+    const filtered = /* @__PURE__ */ new Set();
+    for (const w of ws) {
+      if (!siteThemeTerms.has(w)) filtered.add(w);
+    }
+    return filtered;
+  });
   const cannibalPairs = [];
   for (let i = 0; i < pages.length; i++) {
     for (let j = i + 1; j < pages.length; j++) {
-      const sim = jaccardSimilarity(wordSets[i], wordSets[j]);
+      if (filteredSets[i].size === 0 && filteredSets[j].size === 0) continue;
+      const sim = jaccardSimilarity(filteredSets[i], filteredSets[j]);
       if (sim > 0.6) {
         cannibalPairs.push({
           urlA: pageTitles[i].url.slice(0, 60),
@@ -1525,23 +1550,39 @@ function checkContentCannibalization(data) {
       }
     }
   }
+  const cannibalUrls = /* @__PURE__ */ new Set();
+  for (const pair of cannibalPairs) {
+    cannibalUrls.add(pair.urlA);
+    cannibalUrls.add(pair.urlB);
+  }
+  const cannibalRatio = pages.length > 0 ? cannibalUrls.size / pages.length : 0;
   let score;
   if (cannibalPairs.length === 0) {
     score = 10;
     findings.push({ severity: "info", detail: `${pages.length} pages analyzed - no content cannibalization detected` });
-  } else if (cannibalPairs.length === 1) {
-    score = 8;
-    findings.push({ severity: "low", detail: `1 pair of pages with overlapping topics (${cannibalPairs[0].similarity}% similarity)`, fix: "Differentiate titles and H1 headings to reduce topic overlap" });
-  } else if (cannibalPairs.length === 2) {
+  } else if (cannibalRatio <= 0.05) {
+    score = 9;
+    findings.push({ severity: "info", detail: `${cannibalPairs.length} pair(s) of pages with minor topic overlap (${cannibalUrls.size}/${pages.length} pages affected)` });
+  } else if (cannibalRatio <= 0.1) {
+    score = 7;
+    findings.push({ severity: "low", detail: `${cannibalUrls.size} pages (${Math.round(cannibalRatio * 100)}%) have overlapping topics`, fix: "Differentiate titles and H1 headings to reduce topic overlap" });
+  } else if (cannibalRatio <= 0.2) {
     score = 5;
-    findings.push({ severity: "medium", detail: `${cannibalPairs.length} pairs of pages with overlapping topics`, fix: "Consolidate overlapping pages or differentiate their titles and content focus" });
+    findings.push({ severity: "medium", detail: `${cannibalUrls.size} pages (${Math.round(cannibalRatio * 100)}%) competing for overlapping topics`, fix: "Consolidate overlapping pages or differentiate their titles and content focus" });
+  } else if (cannibalRatio <= 0.4) {
+    score = 3;
+    findings.push({ severity: "medium", detail: `${cannibalUrls.size} pages (${Math.round(cannibalRatio * 100)}%) have significant content overlap`, fix: "Many pages compete for the same topics - consolidate or clearly differentiate them" });
   } else {
     score = 0;
-    findings.push({ severity: "high", detail: `${cannibalPairs.length} pairs of pages competing for the same topics`, fix: "Significant content overlap detected - consolidate or clearly differentiate competing pages" });
+    findings.push({ severity: "high", detail: `${cannibalUrls.size} pages (${Math.round(cannibalRatio * 100)}%) competing for the same topics`, fix: "Severe content cannibalization - consolidate overlapping pages or create clear topic differentiation" });
   }
   for (const pair of cannibalPairs.slice(0, 3)) {
     findings.push({ severity: "low", detail: `Overlap (${pair.similarity}%): ${pair.urlA} vs ${pair.urlB}` });
   }
+  if (topicCoherenceScore !== void 0 && topicCoherenceScore <= 4 && score >= 8) {
+    score = 6;
+    findings.push({ severity: "low", detail: "Low topic overlap but content lacks coherent focus - not a strong signal for AI authority", fix: "Focus content on fewer core topics to build topical authority that AI engines can identify" });
+  }
   return { criterion: "content_cannibalization", criterion_label: "Content Cannibalization", score, status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P1" };
 }
 function checkVisibleDateSignal(data) {
@@ -1767,7 +1808,233 @@ function extractRawDataSummary(data) {
     crawl_skipped: data.crawlStats?.skipped ?? 0
   };
 }
+function getPageTopicText(html) {
+  const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
+  const h1Match = html.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
+  return [
+    titleMatch?.[1] || "",
+    h1Match?.[1]?.replace(/<[^>]*>/g, "") || ""
+  ].join(" ").toLowerCase().trim();
+}
+function extractBigrams(text) {
+  const words = text.split(/[\s,.!?;:()\[\]{}"'\/&]+/).filter((w) => w.length > 2 && !STOP_WORDS.has(w) && !/^\d+$/.test(w));
+  const bigrams = [];
+  for (let i = 0; i < words.length - 1; i++) {
+    bigrams.push(words[i] + " " + words[i + 1]);
+  }
+  return bigrams;
+}
+function checkTopicCoherence(data) {
+  const findings = [];
+  if (!data.homepage) {
+    findings.push({ severity: "critical", detail: "Could not fetch homepage" });
+    return { criterion: "topic_coherence", criterion_label: "Topic Coherence", score: 0, status: "not_found", findings, fix_priority: "P0" };
+  }
+  if (!data.blogSample || data.blogSample.length < 3) {
+    findings.push({ severity: "info", detail: `Only ${data.blogSample?.length || 0} blog pages found - insufficient for topic coherence analysis` });
+    return { criterion: "topic_coherence", criterion_label: "Topic Coherence", score: 5, status: "partial", findings, fix_priority: "P2" };
+  }
+  const blogPages = data.blogSample;
+  const domainBase = data.domain.replace(/^www\./, "").replace(/\.(com|org|net|io|co|ai)$/i, "").toLowerCase();
+  const brandWords = /* @__PURE__ */ new Set();
+  brandWords.add(domainBase);
+  for (const part of domainBase.split(/[-_]/)) {
+    if (part.length > 2) brandWords.add(part);
+  }
+  const rawTermFreq = /* @__PURE__ */ new Map();
+  const pageTitleTexts = [];
+  for (const page of blogPages) {
+    const topicText = getPageTopicText(page.text);
+    pageTitleTexts.push(topicText);
+    const words = topicText.split(/[\s,.!?;:()\[\]{}"'\/&]+/).filter((w) => w.length > 2 && !STOP_WORDS.has(w) && !/^\d+$/.test(w));
+    const uniqueWords = new Set(words);
+    for (const w of uniqueWords) {
+      rawTermFreq.set(w, (rawTermFreq.get(w) || 0) + 1);
+    }
+  }
+  for (const [term, count] of rawTermFreq) {
+    if (count / blogPages.length >= 0.8 && domainBase.includes(term)) {
+      brandWords.add(term);
+    }
+  }
+  const termFreq = /* @__PURE__ */ new Map();
+  for (const page of blogPages) {
+    const topicText = getPageTopicText(page.text);
+    const words = topicText.split(/[\s,.!?;:()\[\]{}"'\/&]+/).filter((w) => w.length > 2 && !STOP_WORDS.has(w) && !/^\d+$/.test(w) && !brandWords.has(w));
+    const uniqueWords = new Set(words);
+    for (const w of uniqueWords) {
+      termFreq.set(w, (termFreq.get(w) || 0) + 1);
+    }
+  }
+  const sortedTerms = [...termFreq.entries()].sort((a, b) => b[1] - a[1]);
+  const topTerm = sortedTerms[0];
+  const bigramFreq = /* @__PURE__ */ new Map();
+  const pageBigrams = [];
+  for (const topicText of pageTitleTexts) {
+    const bigrams = extractBigrams(topicText).filter((bg) => !bg.split(" ").some((w) => brandWords.has(w)));
+    pageBigrams.push(bigrams);
+    const uniqueBigrams = new Set(bigrams);
+    for (const bg of uniqueBigrams) {
+      bigramFreq.set(bg, (bigramFreq.get(bg) || 0) + 1);
+    }
+  }
+  const sortedBigrams = [...bigramFreq.entries()].sort((a, b) => b[1] - a[1]);
+  const topBigram = sortedBigrams[0];
+  const significantBigrams = sortedBigrams.filter(([, count]) => count >= 2);
+  const clusterRoots = [];
+  const assigned = /* @__PURE__ */ new Set();
+  for (const [bg] of significantBigrams) {
+    if (assigned.has(bg)) continue;
+    clusterRoots.push(bg);
+    assigned.add(bg);
+    const [w1, w2] = bg.split(" ");
+    for (const [otherBg] of significantBigrams) {
+      if (assigned.has(otherBg)) continue;
+      if (otherBg.includes(w1) || otherBg.includes(w2)) {
+        assigned.add(otherBg);
+      }
+    }
+  }
+  const topicClusterCount = clusterRoots.length;
+  const dominantTerm = topTerm?.[0] || "";
+  const dominantTermCount = topTerm?.[1] || 0;
+  const focusRatio = blogPages.length > 0 ? dominantTermCount / blogPages.length : 0;
+  const dominantBigram = topBigram?.[0] || "";
+  const dominantBigramCount = topBigram?.[1] || 0;
+  const bigramFocusRatio = blogPages.length > 0 ? dominantBigramCount / blogPages.length : 0;
+  let score = 0;
+  const bestFocusRatio = Math.max(focusRatio, bigramFocusRatio);
+  if (bestFocusRatio >= 0.8) {
+    score += 7;
+  } else if (bestFocusRatio >= 0.6) {
+    score += 6;
+  } else if (bestFocusRatio >= 0.45) {
+    score += 5;
+  } else if (bestFocusRatio >= 0.3) {
+    score += 3;
+  } else if (bestFocusRatio >= 0.15) {
+    score += 2;
+  } else {
+    score += 1;
+  }
+  const clusterPenaltyReduced = focusRatio >= 0.7;
+  if (topicClusterCount <= 3) {
+    score += 3;
+    findings.push({ severity: "info", detail: `${topicClusterCount} topic cluster(s) - tightly focused content` });
+  } else if (topicClusterCount <= 6) {
+    score += clusterPenaltyReduced ? 2 : 1;
+    findings.push({ severity: "info", detail: `${topicClusterCount} topic clusters${clusterPenaltyReduced ? " within a focused niche" : " - moderately focused"}` });
+  } else if (topicClusterCount <= 10) {
+    score += clusterPenaltyReduced ? 1 : 0;
+    if (!clusterPenaltyReduced) {
+      findings.push({ severity: "low", detail: `${topicClusterCount} topic clusters - scattered content`, fix: "Reduce the number of distinct topics. Focus blog content on 2-3 core expertise areas." });
+    } else {
+      findings.push({ severity: "info", detail: `${topicClusterCount} topic clusters but strong core topic focus (${Math.round(focusRatio * 100)}%)` });
+    }
+  } else {
+    score += clusterPenaltyReduced ? 0 : -2;
+    if (!clusterPenaltyReduced) {
+      findings.push({ severity: "medium", detail: `${topicClusterCount} topic clusters - highly scattered content`, fix: "Content covers too many unrelated topics. AI engines cannot identify your expertise. Focus on your core niche." });
+    } else {
+      findings.push({ severity: "low", detail: `${topicClusterCount} topic clusters despite strong core topic focus`, fix: "Consider narrowing subtopics within your niche for even stronger AI visibility." });
+    }
+  }
+  score = Math.max(0, Math.min(10, score));
+  if (dominantTerm) {
+    const focusPct = Math.round(focusRatio * 100);
+    findings.push({ severity: "info", detail: `Dominant topic term: "${dominantTerm}" (${focusPct}% of ${blogPages.length} pages)` });
+  }
+  if (dominantBigram && dominantBigramCount >= 2) {
+    findings.push({ severity: "info", detail: `Dominant topic phrase: "${dominantBigram}" (${dominantBigramCount}/${blogPages.length} pages)` });
+  }
+  const offTopicExamples = [];
+  for (let i = 0; i < pageTitleTexts.length && offTopicExamples.length < 3; i++) {
+    if (dominantTerm && !pageTitleTexts[i].includes(dominantTerm)) {
+      const title = blogPages[i].text.match(/<title[^>]*>([^<]+)<\/title>/i)?.[1]?.trim();
+      if (title && title.length > 3) offTopicExamples.push(title.slice(0, 60));
+    }
+  }
+  if (offTopicExamples.length > 0 && score < 8) {
+    findings.push({ severity: "low", detail: `Off-topic examples: ${offTopicExamples.join("; ")}` });
+  }
+  return { criterion: "topic_coherence", criterion_label: "Topic Coherence", score, status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P0" };
+}
+function countWords(html) {
+  const text = html.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, "").replace(/<style[^>]*>[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
+  return text.split(/\s+/).filter((w) => w.length > 0).length;
+}
+function countHeadings(html) {
+  const headings = html.match(/<h[2-6][^>]*>/gi) || [];
+  return headings.length;
+}
+function checkContentDepth(data, topicCoherenceScore) {
+  const findings = [];
+  if (!data.blogSample || data.blogSample.length < 2) {
+    findings.push({ severity: "info", detail: `Only ${data.blogSample?.length || 0} blog pages found - insufficient for depth analysis` });
+    return { criterion: "content_depth", criterion_label: "Content Depth", score: 3, status: "partial", findings, fix_priority: "P2" };
+  }
+  const blogPages = data.blogSample;
+  const wordCounts = blogPages.map((p) => countWords(p.text));
+  const headingCounts = blogPages.map((p) => countHeadings(p.text));
+  const avgWords = wordCounts.reduce((a, b) => a + b, 0) / wordCounts.length;
+  const avgHeadings = headingCounts.reduce((a, b) => a + b, 0) / headingCounts.length;
+  const deepPages = wordCounts.filter((w) => w >= 1e3).length;
+  const thinPages = wordCounts.filter((w) => w < 300).length;
+  const deepRatio = deepPages / blogPages.length;
+  const thinRatio = thinPages / blogPages.length;
+  let score = 0;
+  if (avgWords >= 2e3) {
+    score += 5;
+    findings.push({ severity: "info", detail: `Average ${Math.round(avgWords)} words per page across ${blogPages.length} pages - excellent depth` });
+  } else if (avgWords >= 1200) {
+    score += 4;
+    findings.push({ severity: "info", detail: `Average ${Math.round(avgWords)} words per page across ${blogPages.length} pages - good depth` });
+  } else if (avgWords >= 800) {
+    score += 3;
+    findings.push({ severity: "info", detail: `Average ${Math.round(avgWords)} words per page - moderate depth` });
+  } else if (avgWords >= 400) {
+    score += 2;
+    findings.push({ severity: "low", detail: `Average ${Math.round(avgWords)} words per page - shallow content`, fix: "Expand articles with more detail, examples, and expert analysis to build AI citation authority" });
+  } else {
+    score += 1;
+    findings.push({ severity: "medium", detail: `Average ${Math.round(avgWords)} words per page - very thin content`, fix: "Content is too thin for AI engines to cite. Aim for 1000+ words per article with structured sections." });
+  }
+  if (avgHeadings >= 8) {
+    score += 3;
+    findings.push({ severity: "info", detail: `Average ${avgHeadings.toFixed(1)} subheadings per page - well-structured` });
+  } else if (avgHeadings >= 5) {
+    score += 2;
+    findings.push({ severity: "info", detail: `Average ${avgHeadings.toFixed(1)} subheadings per page - decent structure` });
+  } else if (avgHeadings >= 2) {
+    score += 1;
+    findings.push({ severity: "low", detail: `Average ${avgHeadings.toFixed(1)} subheadings per page`, fix: "Add more H2/H3 headings to break content into extractable sections" });
+  } else {
+    findings.push({ severity: "medium", detail: `Average ${avgHeadings.toFixed(1)} subheadings per page - minimal structure`, fix: "Add question-format H2/H3 headings so AI engines can extract specific answers" });
+  }
+  if (deepRatio >= 0.5) {
+    score += 2;
+    findings.push({ severity: "info", detail: `${deepPages}/${blogPages.length} pages (${Math.round(deepRatio * 100)}%) have 1000+ words` });
+  } else if (deepRatio >= 0.25) {
+    score += 1;
+    findings.push({ severity: "info", detail: `${deepPages}/${blogPages.length} pages have 1000+ words` });
+  }
+  if (thinRatio >= 0.5) {
+    score = Math.max(0, score - 2);
+    findings.push({ severity: "medium", detail: `${thinPages}/${blogPages.length} pages (${Math.round(thinRatio * 100)}%) have under 300 words - high thin content ratio`, fix: "Remove or expand thin pages. Thin content dilutes site quality for AI engines." });
+  } else if (thinRatio >= 0.25) {
+    score = Math.max(0, score - 1);
+    findings.push({ severity: "low", detail: `${thinPages}/${blogPages.length} pages have under 300 words` });
+  }
+  let finalScore = Math.min(10, score);
+  if (topicCoherenceScore !== void 0 && topicCoherenceScore <= 4 && finalScore >= 8) {
+    finalScore = 7;
+    findings.push({ severity: "low", detail: "Deep content but low topic coherence - depth on scattered topics has reduced AI citation value", fix: "Focus content depth on your core expertise area for maximum AI visibility" });
+  }
+  return { criterion: "content_depth", criterion_label: "Content Depth", score: finalScore, status: finalScore >= 7 ? "pass" : finalScore >= 4 ? "partial" : "fail", findings, fix_priority: finalScore >= 7 ? "P3" : "P1" };
+}
 function auditSiteFromData(data) {
+  const topicCoherence = checkTopicCoherence(data);
+  const cannibalization = checkContentCannibalization(data, topicCoherence.score);
   return [
     checkLlmsTxt(data),
     checkSchemaMarkup(data),
@@ -1793,52 +2060,84 @@ function auditSiteFromData(data) {
     checkSchemaCoverage(data),
     checkSpeakableSchema(data),
     checkQueryAnswerAlignment(data),
-    checkContentCannibalization(data),
-    checkVisibleDateSignal(data)
+    cannibalization,
+    checkVisibleDateSignal(data),
+    topicCoherence,
+    checkContentDepth(data, topicCoherence.score)
   ];
 }
 // src/scoring.ts
 var WEIGHTS = {
-  // Original 10
-  llms_txt: 0.1,
-  schema_markup: 0.15,
-  qa_content_format: 0.15,
-  clean_html: 0.1,
-  entity_consistency: 0.1,
-  robots_txt: 0.05,
-  faq_section: 0.1,
+  // ─── Content Substance (~55%) ─────────────────────────────────────────────
+  // WHY an AI engine would cite you. These drive citation quality directly.
+  topic_coherence: 0.14,
+  // Topical authority - THE gating signal
   original_data: 0.1,
-  internal_linking: 0.1,
-  semantic_html: 0.05,
-  // New 12
-  content_freshness: 0.07,
-  sitemap_completeness: 0.05,
-  rss_feed: 0.03,
-  table_list_extractability: 0.07,
-  definition_patterns: 0.04,
-  direct_answer_density: 0.07,
-  content_licensing: 0.04,
-  author_schema_depth: 0.04,
-  fact_density: 0.05,
-  canonical_url: 0.04,
-  content_velocity: 0.03,
-  schema_coverage: 0.03,
-  speakable_schema: 0.03,
-  query_answer_alignment: 0.08,
-  content_cannibalization: 0.05,
-  visible_date_signal: 0.04
+  // Unique value AI can't find elsewhere
+  content_depth: 0.07,
+  // Comprehensive vs thin coverage
+  fact_density: 0.06,
+  // Information density per page
+  direct_answer_density: 0.05,
+  // Direct answers to queries
+  qa_content_format: 0.05,
+  // Answer-shaped content structure
+  query_answer_alignment: 0.05,
+  // Relevance to actual AI queries
+  faq_section: 0.04,
+  // Structured Q&A pairs
+  // ─── Content Organization (~30%) ──────────────────────────────────────────
+  // HOW easily AI engines can extract and trust your content.
+  entity_consistency: 0.05,
+  // Brand authority and E-E-A-T
+  internal_linking: 0.04,
+  // Site structure and topic clusters
+  content_freshness: 0.04,
+  // Recency signals
+  schema_markup: 0.03,
+  // Structured data for discovery
+  author_schema_depth: 0.03,
+  // Expert attribution
+  table_list_extractability: 0.03,
+  // Extractable structured data
+  definition_patterns: 0.02,
+  // Clear definitions
+  visible_date_signal: 0.02,
+  // Publication date trust
+  semantic_html: 0.02,
+  // Clean semantic structure
+  clean_html: 0.02,
+  // Parseable markup
+  // ─── Technical Plumbing (~15%) ────────────────────────────────────────────
+  // WHETHER AI crawlers can find you. Table stakes with diminishing returns.
+  content_cannibalization: 0.02,
+  llms_txt: 0.02,
+  robots_txt: 0.02,
+  content_velocity: 0.02,
+  content_licensing: 0.02,
+  sitemap_completeness: 0.01,
+  canonical_url: 0.01,
+  rss_feed: 0.01,
+  schema_coverage: 0.01,
+  speakable_schema: 0.01
 };
 function calculateOverallScore(criteria) {
   let totalWeight = 0;
   let weightedSum = 0;
   for (const c of criteria) {
-    const weight = WEIGHTS[c.criterion] ?? 0.1;
+    const weight = WEIGHTS[c.criterion] ?? 0.05;
     weightedSum += c.score / 10 * weight * 100;
     totalWeight += weight;
   }
   if (totalWeight === 0) return 0;
-  return Math.round(weightedSum / totalWeight);
+  let score = Math.round(weightedSum / totalWeight);
+  const coherence = criteria.find((c) => c.criterion === "topic_coherence");
+  if (coherence && coherence.score < 6) {
+    const cap2 = 35 + coherence.score * 5;
+    score = Math.min(score, cap2);
+  }
+  return score;
 }
 // src/scorecard-builder.ts
@@ -1868,7 +2167,9 @@ var CRITERION_LABELS = {
   "Speakable Schema": "Speakable Schema",
   "Query-Answer Alignment": "Query-Answer Alignment",
   "Content Cannibalization": "Content Cannibalization",
-  "Visible Date Signal": "Visible Date Signal"
+  "Visible Date Signal": "Visible Date Signal",
+  "Topic Coherence": "Topic Coherence",
+  "Content Depth": "Content Depth"
 };
 function scoreToStatus(score) {
   if (score === 0) return "MISSING";
@@ -1954,32 +2255,37 @@ function buildDetailedFindings(results) {
 // src/narrative-generator.ts
 var CRITERION_WEIGHTS = {
-  llms_txt: 0.1,
-  schema_markup: 0.15,
-  qa_content_format: 0.15,
-  clean_html: 0.1,
-  entity_consistency: 0.1,
-  robots_txt: 0.05,
-  faq_section: 0.1,
+  // Content Substance (~55%)
+  topic_coherence: 0.14,
   original_data: 0.1,
-  internal_linking: 0.1,
-  semantic_html: 0.05,
-  content_freshness: 0.07,
-  sitemap_completeness: 0.05,
-  rss_feed: 0.03,
-  table_list_extractability: 0.07,
-  definition_patterns: 0.04,
-  direct_answer_density: 0.07,
-  content_licensing: 0.04,
-  author_schema_depth: 0.04,
-  fact_density: 0.05,
-  canonical_url: 0.04,
-  content_velocity: 0.03,
-  schema_coverage: 0.03,
-  speakable_schema: 0.03,
-  query_answer_alignment: 0.08,
-  content_cannibalization: 0.05,
-  visible_date_signal: 0.04
+  content_depth: 0.07,
+  fact_density: 0.06,
+  direct_answer_density: 0.05,
+  qa_content_format: 0.05,
+  query_answer_alignment: 0.05,
+  faq_section: 0.04,
+  // Content Organization (~30%)
+  entity_consistency: 0.05,
+  internal_linking: 0.04,
+  content_freshness: 0.04,
+  schema_markup: 0.03,
+  author_schema_depth: 0.03,
+  table_list_extractability: 0.03,
+  definition_patterns: 0.02,
+  visible_date_signal: 0.02,
+  semantic_html: 0.02,
+  clean_html: 0.02,
+  // Technical Plumbing (~15%)
+  content_cannibalization: 0.02,
+  llms_txt: 0.02,
+  robots_txt: 0.02,
+  content_velocity: 0.02,
+  content_licensing: 0.02,
+  sitemap_completeness: 0.01,
+  canonical_url: 0.01,
+  rss_feed: 0.01,
+  schema_coverage: 0.01,
+  speakable_schema: 0.01
 };
 var OPPORTUNITY_TEMPLATES = {
   llms_txt: {
@@ -2111,6 +2417,16 @@ var OPPORTUNITY_TEMPLATES = {
     name: "Add Visible Date Signals",
     effort: "Low",
     description: "Display publication/modification dates visibly using <time> elements and add datePublished/dateModified to JSON-LD schema."
+  },
+  topic_coherence: {
+    name: "Focus Content on Core Topics",
+    effort: "High",
+    description: 'Ensure blog content consistently covers your core expertise areas rather than scattering across unrelated topics. AI engines build authority models - a site about "Medicare coverage" that also publishes about humidifiers and groceries dilutes its topical authority.'
+  },
+  content_depth: {
+    name: "Increase Content Depth",
+    effort: "Medium",
+    description: "Expand articles to 1000+ words with structured H2/H3 sections, comparison tables, and expert analysis. Thin content (under 300 words) is rarely cited by AI engines. Deep, well-structured articles demonstrate expertise."
   }
 };
 function calculateImpact(score, weight, effort) {
@@ -2232,7 +2548,7 @@ function generatePitchNumbers(score, rawData, scorecard) {
   const passing = scorecard.filter((s) => s.score >= 7).length;
   metrics.push({
     metric: "Criteria Passing",
-    value: `${passing}/26`,
+    value: `${passing}/28`,
     significance: passing >= 18 ? "Excellent coverage across AEO dimensions" : passing >= 12 ? "Good foundation with room to improve remaining criteria" : `${26 - passing} criteria need attention for full AI visibility`
   });
   return metrics;
@@ -2424,20 +2740,23 @@ async function fetchMultiPageData(siteData, options) {
 // src/page-scorer.ts
 var PAGE_CRITERIA = {
-  schema_markup: { weight: 0.15, label: "Schema.org Structured Data" },
-  qa_content_format: { weight: 0.15, label: "Q&A Content Format" },
-  clean_html: { weight: 0.1, label: "Clean, Crawlable HTML" },
-  faq_section: { weight: 0.1, label: "FAQ Section Content" },
+  // Content Substance
   original_data: { weight: 0.1, label: "Original Data & Expert Content" },
-  query_answer_alignment: { weight: 0.08, label: "Query-Answer Alignment" },
-  content_freshness: { weight: 0.07, label: "Content Freshness Signals" },
-  table_list_extractability: { weight: 0.07, label: "Table & List Extractability" },
-  direct_answer_density: { weight: 0.07, label: "Direct Answer Paragraphs" },
-  semantic_html: { weight: 0.05, label: "Semantic HTML5 & Accessibility" },
-  fact_density: { weight: 0.05, label: "Fact & Data Density" },
-  definition_patterns: { weight: 0.04, label: "Definition Patterns" },
-  canonical_url: { weight: 0.04, label: "Canonical URL Strategy" },
-  visible_date_signal: { weight: 0.04, label: "Visible Date Signal" }
+  fact_density: { weight: 0.06, label: "Fact & Data Density" },
+  direct_answer_density: { weight: 0.05, label: "Direct Answer Paragraphs" },
+  qa_content_format: { weight: 0.05, label: "Q&A Content Format" },
+  query_answer_alignment: { weight: 0.05, label: "Query-Answer Alignment" },
+  faq_section: { weight: 0.04, label: "FAQ Section Content" },
+  // Content Organization
+  content_freshness: { weight: 0.04, label: "Content Freshness Signals" },
+  schema_markup: { weight: 0.03, label: "Schema.org Structured Data" },
+  table_list_extractability: { weight: 0.03, label: "Table & List Extractability" },
+  definition_patterns: { weight: 0.02, label: "Definition Patterns" },
+  visible_date_signal: { weight: 0.02, label: "Visible Date Signal" },
+  semantic_html: { weight: 0.02, label: "Semantic HTML5 & Accessibility" },
+  clean_html: { weight: 0.02, label: "Clean, Crawlable HTML" },
+  // Technical Plumbing
+  canonical_url: { weight: 0.01, label: "Canonical URL Strategy" }
 };
 function extractJsonLdBlocks(html) {
   const blocks = [];
@@ -2777,7 +3096,7 @@ function extractTitle(html) {
 function getTextContent2(html) {
   return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
 }
-function countWords(text) {
+function countWords2(text) {
   if (!text) return 0;
   return text.split(/\s+/).filter((w) => w.length > 0).length;
 }
@@ -2928,7 +3247,7 @@ function checkHasQuestionHeadings(html) {
 function analyzePage(html, url, category) {
   const title = extractTitle(html);
   const textContent = getTextContent2(html);
-  const wordCount = countWords(textContent);
+  const wordCount = countWords2(textContent);
   const issues = [];
   const strengths = [];
   const issueChecks = [
@@ -2996,7 +3315,7 @@ function extractTitle2(html) {
 function getTextContent3(html) {
   return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
 }
-function countWords2(text) {
+function countWords3(text) {
   if (!text) return 0;
   return text.split(/\s+/).filter((w) => w.length > 0).length;
 }
@@ -3144,7 +3463,7 @@ function buildLinkGraph(pages, domain, homepageUrl) {
     if (nodes.has(norm)) continue;
     const title = extractTitle2(page.text);
     const text = getTextContent3(page.text);
-    const wordCount = countWords2(text);
+    const wordCount = countWords3(text);
     nodes.set(norm, {
       url: norm,
       title,
@@ -3207,32 +3526,37 @@ function buildLinkGraph(pages, domain, homepageUrl) {
 // src/fix-engine.ts
 var CRITERION_WEIGHTS2 = {
-  llms_txt: 0.1,
-  schema_markup: 0.15,
-  qa_content_format: 0.15,
-  clean_html: 0.1,
-  entity_consistency: 0.1,
-  robots_txt: 0.05,
-  faq_section: 0.1,
+  // Content Substance (~55%)
+  topic_coherence: 0.14,
   original_data: 0.1,
-  internal_linking: 0.1,
-  semantic_html: 0.05,
-  content_freshness: 0.07,
-  sitemap_completeness: 0.05,
-  rss_feed: 0.03,
-  table_list_extractability: 0.07,
-  definition_patterns: 0.04,
-  direct_answer_density: 0.07,
-  content_licensing: 0.04,
-  author_schema_depth: 0.04,
-  fact_density: 0.05,
-  canonical_url: 0.04,
-  content_velocity: 0.03,
-  schema_coverage: 0.03,
-  speakable_schema: 0.03,
-  query_answer_alignment: 0.08,
-  content_cannibalization: 0.05,
-  visible_date_signal: 0.04
+  content_depth: 0.07,
+  fact_density: 0.06,
+  direct_answer_density: 0.05,
+  qa_content_format: 0.05,
+  query_answer_alignment: 0.05,
+  faq_section: 0.04,
+  // Content Organization (~30%)
+  entity_consistency: 0.05,
+  internal_linking: 0.04,
+  content_freshness: 0.04,
+  schema_markup: 0.03,
+  author_schema_depth: 0.03,
+  table_list_extractability: 0.03,
+  definition_patterns: 0.02,
+  visible_date_signal: 0.02,
+  semantic_html: 0.02,
+  clean_html: 0.02,
+  // Technical Plumbing (~15%)
+  content_cannibalization: 0.02,
+  llms_txt: 0.02,
+  robots_txt: 0.02,
+  content_velocity: 0.02,
+  content_licensing: 0.02,
+  sitemap_completeness: 0.01,
+  canonical_url: 0.01,
+  rss_feed: 0.01,
+  schema_coverage: 0.01,
+  speakable_schema: 0.01
 };
 var PHASE_CONFIG = [
   {
@@ -3255,7 +3579,9 @@ var PHASE_CONFIG = [
       "content_freshness",
       "table_list_extractability",
       "query_answer_alignment",
-      "visible_date_signal"
+      "visible_date_signal",
+      "topic_coherence",
+      "content_depth"
     ]
   },
   {
@@ -4159,6 +4485,55 @@ Summarization: yes`,
       affectedPages: affected,
       pageCount: affected?.length
     }];
+  },
+  topic_coherence: (c) => {
+    if (c.score >= 10) return [];
+    const impact = impactFromScore(c.score);
+    const effort = effortForCriterion("topic_coherence", c.score);
+    return [{
+      id: "fix-topic-coherence",
+      criterion: c.criterion_label,
+      criterionId: c.criterion,
+      title: "Focus blog content on core expertise",
+      description: "Ensure blog content consistently covers your core topic areas. Scattered content across unrelated topics weakens AI engine authority signals.",
+      impact,
+      effort: effort === "trivial" ? "low" : effort,
+      impactScore: 0,
+      category: "content",
+      steps: [
+        "Identify 2-3 core expertise areas your brand is known for",
+        "Audit existing blog posts and remove or consolidate off-topic content",
+        "Create a content calendar focused on core topics",
+        "Use topic clusters: pillar pages linking to supporting articles within the same niche"
+      ],
+      successCriteria: "80%+ of blog content covers core expertise areas with consistent topic focus"
+    }];
+  },
+  content_depth: (c, pages) => {
+    if (c.score >= 10) return [];
+    const impact = impactFromScore(c.score);
+    const effort = effortForCriterion("content_depth", c.score);
+    const affected = getAffectedPages("content_depth", pages);
+    return [{
+      id: "fix-content-depth",
+      criterion: c.criterion_label,
+      criterionId: c.criterion,
+      title: "Increase content depth and structure",
+      description: "Expand thin content with more detail, examples, and structured sections. AI engines prefer comprehensive articles with clear heading hierarchies.",
+      impact,
+      effort: effort === "trivial" ? "low" : effort,
+      impactScore: 0,
+      category: "content",
+      steps: [
+        "Aim for 1000+ words per article with expert analysis and examples",
+        "Use H2/H3 subheadings every 200-300 words for clear structure",
+        "Add comparison tables, numbered steps, and data points",
+        "Remove or expand thin pages (under 300 words) that dilute site quality"
+      ],
+      successCriteria: "Average article length exceeds 1000 words with 5+ subheadings per page",
+      affectedPages: affected,
+      pageCount: affected?.length
+    }];
   }
 };
 function generateFixPlan(domain, overallScore, criteria, pagesReviewed, linkGraph) {
@@ -4400,7 +4775,7 @@ async function audit(domain, options) {
     }
   }
   if (options?.fullCrawl) {
-    const { crawlFullSite: crawlFullSite2 } = await import("./full-site-crawler-VFARFR2C.js");
+    const { crawlFullSite: crawlFullSite2 } = await import("./full-site-crawler-UIOMKOZA.js");
     const crawlResult = await crawlFullSite2(siteData, {
       maxPages: options.maxPages ?? 200,
       concurrency: options.concurrency ?? 5