aeorank 1.4.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -326,21 +326,28 @@ __export(index_exports, {
326
326
  audit: () => audit,
327
327
  auditSiteFromData: () => auditSiteFromData,
328
328
  buildDetailedFindings: () => buildDetailedFindings,
329
+ buildLinkGraph: () => buildLinkGraph,
329
330
  buildScorecard: () => buildScorecard,
331
+ calculateDepths: () => calculateDepths,
330
332
  calculateOverallScore: () => calculateOverallScore,
331
333
  classifyRendering: () => classifyRendering,
332
334
  compare: () => compare,
333
335
  crawlFullSite: () => crawlFullSite,
336
+ detectClusters: () => detectClusters,
337
+ detectHubs: () => detectHubs,
334
338
  detectParkedDomain: () => detectParkedDomain,
339
+ detectPillars: () => detectPillars,
335
340
  extractAllUrlsFromSitemap: () => extractAllUrlsFromSitemap,
336
341
  extractContentPagesFromSitemap: () => extractContentPagesFromSitemap,
337
342
  extractInternalLinks: () => extractInternalLinks,
343
+ extractLinksWithAnchors: () => extractLinksWithAnchors,
338
344
  extractNavLinks: () => extractNavLinks,
339
345
  extractRawDataSummary: () => extractRawDataSummary,
340
346
  fetchMultiPageData: () => fetchMultiPageData,
341
347
  fetchWithHeadless: () => fetchWithHeadless,
342
348
  generateBottomLine: () => generateBottomLine,
343
349
  generateComparisonHtmlReport: () => generateComparisonHtmlReport,
350
+ generateFixPlan: () => generateFixPlan,
344
351
  generateHtmlReport: () => generateHtmlReport,
345
352
  generateOpportunities: () => generateOpportunities,
346
353
  generatePitchNumbers: () => generatePitchNumbers,
@@ -348,7 +355,10 @@ __export(index_exports, {
348
355
  inferCategory: () => inferCategory,
349
356
  isSpaShell: () => isSpaShell,
350
357
  prefetchSiteData: () => prefetchSiteData,
351
- scoreToStatus: () => scoreToStatus
358
+ scoreAllPages: () => scoreAllPages,
359
+ scorePage: () => scorePage,
360
+ scoreToStatus: () => scoreToStatus,
361
+ serializeLinkGraph: () => serializeLinkGraph
352
362
  });
353
363
  module.exports = __toCommonJS(index_exports);
354
364
 
@@ -2870,12 +2880,359 @@ async function fetchMultiPageData(siteData, options) {
2870
2880
  return added;
2871
2881
  }
2872
2882
 
2883
+ // src/page-scorer.ts
2884
+ var PAGE_CRITERIA = {
2885
+ schema_markup: { weight: 0.15, label: "Schema.org Structured Data" },
2886
+ qa_content_format: { weight: 0.15, label: "Q&A Content Format" },
2887
+ clean_html: { weight: 0.1, label: "Clean, Crawlable HTML" },
2888
+ faq_section: { weight: 0.1, label: "FAQ Section Content" },
2889
+ original_data: { weight: 0.1, label: "Original Data & Expert Content" },
2890
+ query_answer_alignment: { weight: 0.08, label: "Query-Answer Alignment" },
2891
+ content_freshness: { weight: 0.07, label: "Content Freshness Signals" },
2892
+ table_list_extractability: { weight: 0.07, label: "Table & List Extractability" },
2893
+ direct_answer_density: { weight: 0.07, label: "Direct Answer Paragraphs" },
2894
+ semantic_html: { weight: 0.05, label: "Semantic HTML5 & Accessibility" },
2895
+ fact_density: { weight: 0.05, label: "Fact & Data Density" },
2896
+ definition_patterns: { weight: 0.04, label: "Definition Patterns" },
2897
+ canonical_url: { weight: 0.04, label: "Canonical URL Strategy" },
2898
+ visible_date_signal: { weight: 0.04, label: "Visible Date Signal" }
2899
+ };
2900
+ function extractJsonLdBlocks(html) {
2901
+ const blocks = [];
2902
+ const regex = /<script[^>]*type=["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/gi;
2903
+ let match;
2904
+ while ((match = regex.exec(html)) !== null) {
2905
+ blocks.push(match[1]);
2906
+ }
2907
+ return blocks;
2908
+ }
2909
+ function extractTypesFromJsonLd(blocks) {
2910
+ const types = /* @__PURE__ */ new Set();
2911
+ for (const block of blocks) {
2912
+ const typeMatches = block.match(/"@type"\s*:\s*"([^"]+)"/g) || [];
2913
+ for (const m of typeMatches) {
2914
+ const t = m.match(/"@type"\s*:\s*"([^"]+)"/);
2915
+ if (t) types.add(t[1]);
2916
+ }
2917
+ }
2918
+ return types;
2919
+ }
2920
+ function getTextContent(html) {
2921
+ return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
2922
+ }
2923
+ function extractQuestionHeadings2(html) {
2924
+ const headings = html.match(/<h[2-3][^>]*>([\s\S]*?)<\/h[2-3]>/gi) || [];
2925
+ const questions = [];
2926
+ for (const h of headings) {
2927
+ const text = h.replace(/<[^>]*>/g, "").trim();
2928
+ if (/\?$/.test(text) || /^(what|how|why|when|where|who|which|can|do|does|is|are|should|will)\b/i.test(text)) {
2929
+ questions.push(text);
2930
+ }
2931
+ }
2932
+ return questions;
2933
+ }
2934
+ function countAnsweredQuestions(html) {
2935
+ const questions = extractQuestionHeadings2(html);
2936
+ if (questions.length === 0) return { total: 0, answered: 0 };
2937
+ let answered = 0;
2938
+ for (const q of questions) {
2939
+ const escaped = q.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
2940
+ const pattern = new RegExp(escaped + "[\\s\\S]*?</h[2-3]>\\s*<p[^>]*>([\\s\\S]*?)</p>", "i");
2941
+ const match = html.match(pattern);
2942
+ if (match && match[1].replace(/<[^>]*>/g, "").trim().length >= 20) {
2943
+ answered++;
2944
+ }
2945
+ }
2946
+ return { total: questions.length, answered };
2947
+ }
2948
+ function cap(value, max) {
2949
+ return Math.min(value, max);
2950
+ }
2951
+ function scoreSchemaMarkup(html) {
2952
+ const blocks = extractJsonLdBlocks(html);
2953
+ if (blocks.length === 0) return 0;
2954
+ let score = 3;
2955
+ const types = extractTypesFromJsonLd(blocks);
2956
+ const knownTypes = [
2957
+ "Organization",
2958
+ "LocalBusiness",
2959
+ "Article",
2960
+ "FAQPage",
2961
+ "Product",
2962
+ "WebPage",
2963
+ "BreadcrumbList",
2964
+ "HowTo",
2965
+ "Person",
2966
+ "WebSite",
2967
+ "BlogPosting",
2968
+ "Service"
2969
+ ];
2970
+ let knownCount = 0;
2971
+ for (const t of types) {
2972
+ if (knownTypes.includes(t)) knownCount++;
2973
+ }
2974
+ score += cap(knownCount * 2, 4);
2975
+ if (types.has("Organization") || types.has("LocalBusiness")) score += 2;
2976
+ if (types.has("FAQPage")) score += 1;
2977
+ return cap(score, 10);
2978
+ }
2979
+ function scoreQAFormat(html) {
2980
+ const questions = extractQuestionHeadings2(html);
2981
+ let score = 0;
2982
+ if (questions.length >= 10) score += 5;
2983
+ else if (questions.length >= 3) score += 3;
2984
+ else if (questions.length >= 1) score += 1;
2985
+ const { answered } = countAnsweredQuestions(html);
2986
+ if (answered >= 1) score += 3;
2987
+ const h1Matches = html.match(/<h1[\s>]/gi) || [];
2988
+ if (h1Matches.length === 1) score += 2;
2989
+ return cap(score, 10);
2990
+ }
2991
+ function scoreCleanHtml(html) {
2992
+ let score = 0;
2993
+ const semantics = ["<main", "<article", "<section"];
2994
+ let semCount = 0;
2995
+ for (const tag of semantics) {
2996
+ if (html.toLowerCase().includes(tag)) semCount++;
2997
+ }
2998
+ score += cap(semCount, 3);
2999
+ const h1Matches = html.match(/<h1[\s>]/gi) || [];
3000
+ if (h1Matches.length === 1) score += 2;
3001
+ const text = getTextContent(html);
3002
+ if (text.length > 500) score += 3;
3003
+ const hasTitle = /<title[^>]*>[^<]+<\/title>/i.test(html);
3004
+ const hasDesc = /<meta\s[^>]*name=["']description["'][^>]*content=["'][^"']+["']/i.test(html) || /<meta\s[^>]*content=["'][^"']+["'][^>]*name=["']description["']/i.test(html);
3005
+ if (hasTitle && hasDesc) score += 2;
3006
+ return cap(score, 10);
3007
+ }
3008
+ function scoreFaqSection(html) {
3009
+ let score = 0;
3010
+ const lowerHtml = html.toLowerCase();
3011
+ if (/frequently\s*asked|faq/i.test(html)) score += 2;
3012
+ const blocks = extractJsonLdBlocks(html);
3013
+ const types = extractTypesFromJsonLd(blocks);
3014
+ if (types.has("FAQPage")) score += 3;
3015
+ const questions = extractQuestionHeadings2(html);
3016
+ if (questions.length >= 10) score += 1;
3017
+ if (/<details[\s>]/i.test(html) || /accordion|collapsible|toggle/i.test(lowerHtml)) score += 1;
3018
+ return cap(score, 10);
3019
+ }
3020
+ function scoreOriginalData(html) {
3021
+ let score = 0;
3022
+ const text = getTextContent(html);
3023
+ if (/\b(our (study|analysis|research|survey|data|findings))\b/i.test(text)) {
3024
+ score += 3;
3025
+ } else if (/\d+(\.\d+)?%|\$[\d,.]+|\b\d{1,3}(,\d{3})+\b/.test(text)) {
3026
+ score += 1;
3027
+ }
3028
+ if (/\bcase\s+stud(y|ies)\b/i.test(text) && /\d+(\.\d+)?%|\$[\d,.]+/.test(text)) {
3029
+ score += 3;
3030
+ } else if (/\bcase\s+stud(y|ies)\b/i.test(text)) {
3031
+ score += 1;
3032
+ }
3033
+ if (/\baccording\s+to\b|\bexpert|\b(Ph\.?D|MD|professor|analyst|researcher)\b/i.test(text)) {
3034
+ score += 2;
3035
+ }
3036
+ if (/href=["'][^"']*\/blog\b/i.test(html)) {
3037
+ score += 2;
3038
+ }
3039
+ return cap(score, 10);
3040
+ }
3041
+ function scoreQueryAnswerAlignment(html) {
3042
+ const { total, answered } = countAnsweredQuestions(html);
3043
+ if (total === 0) return 5;
3044
+ const ratio = answered / total;
3045
+ if (ratio >= 0.8) return 10;
3046
+ if (ratio >= 0.5) return 7;
3047
+ if (answered > 0) return 4;
3048
+ return 0;
3049
+ }
3050
+ function scoreContentFreshness(html) {
3051
+ let score = 0;
3052
+ const blocks = extractJsonLdBlocks(html);
3053
+ const allJsonLd = blocks.join(" ");
3054
+ if (/datePublished|dateModified/i.test(allJsonLd)) score += 3;
3055
+ const timeElements = html.match(/<time[\s>]/gi) || [];
3056
+ if (timeElements.length >= 2) score += 3;
3057
+ else if (timeElements.length === 1) score += 1;
3058
+ if (/<meta\s[^>]*property=["']article:(published_time|modified_time)["']/i.test(html)) score += 2;
3059
+ const currentYear = (/* @__PURE__ */ new Date()).getFullYear();
3060
+ const yearPattern = new RegExp(`\\b(${currentYear}|${currentYear - 1})\\b`);
3061
+ if (yearPattern.test(html)) score += 2;
3062
+ return cap(score, 10);
3063
+ }
3064
+ function scoreTableListExtractability(html) {
3065
+ let score = 0;
3066
+ const tablesWithHeaders = html.match(/<table[\s\S]*?<th[\s>]/gi) || [];
3067
+ if (tablesWithHeaders.length >= 2) score += 4;
3068
+ else if (tablesWithHeaders.length === 1) score += 3;
3069
+ if (tablesWithHeaders.length === 0 && /<table[\s>]/i.test(html)) score += 1;
3070
+ if (/<ol[\s>]/i.test(html)) score += 2;
3071
+ if (/<ul[\s>]/i.test(html)) score += 2;
3072
+ const listItems = html.match(/<li[\s>]/gi) || [];
3073
+ if (listItems.length >= 10) score += 1;
3074
+ if (/<dl[\s>]/i.test(html)) score += 1;
3075
+ return cap(score, 10);
3076
+ }
3077
+ function scoreDirectAnswerDensity(html) {
3078
+ let score = 0;
3079
+ const { answered } = countAnsweredQuestions(html);
3080
+ if (answered >= 3) score += 6;
3081
+ else if (answered >= 1) score += 3;
3082
+ const paragraphs = html.match(/<p[^>]*>([\s\S]*?)<\/p>/gi) || [];
3083
+ let snippetCount = 0;
3084
+ for (const p of paragraphs) {
3085
+ const text = p.replace(/<[^>]*>/g, "").trim();
3086
+ const words = text.split(/\s+/).filter((w) => w.length > 0).length;
3087
+ if (words >= 40 && words <= 150) snippetCount++;
3088
+ }
3089
+ if (snippetCount >= 3) score += 2;
3090
+ else if (snippetCount >= 1) score += 1;
3091
+ const directOpeners = getTextContent(html).match(/\b(yes|no|in short|the answer is|simply put|in summary)\b/gi) || [];
3092
+ if (directOpeners.length >= 2) score += 2;
3093
+ return cap(score, 10);
3094
+ }
3095
+ function scoreSemanticHtml(html) {
3096
+ let score = 0;
3097
+ const lowerHtml = html.toLowerCase();
3098
+ const elements = ["<main", "<article", "<time", "<nav", "<header", "<footer"];
3099
+ let count = 0;
3100
+ for (const el of elements) {
3101
+ if (lowerHtml.includes(el)) count++;
3102
+ }
3103
+ score += cap(Math.floor(count * 0.7), 4);
3104
+ const imgTags = html.match(/<img\s[^>]*>/gi) || [];
3105
+ if (imgTags.length > 0) {
3106
+ let withAlt = 0;
3107
+ for (const img of imgTags) {
3108
+ if (/\salt=["'][^"']*["']/i.test(img)) withAlt++;
3109
+ }
3110
+ if (withAlt / imgTags.length >= 0.8) score += 2;
3111
+ }
3112
+ if (/<html[^>]*\slang=["'][^"']+["']/i.test(html)) score += 2;
3113
+ if (/\baria-/i.test(html)) score += 2;
3114
+ return cap(score, 10);
3115
+ }
3116
+ function scoreFactDensity(html) {
3117
+ let score = 0;
3118
+ const text = getTextContent(html);
3119
+ const numericPatterns = text.match(/\d+(\.\d+)?%|\$[\d,.]+|\b\d{1,3}(,\d{3})+\b|\b\d+\s*(million|billion|thousand|users|customers|employees)\b/gi) || [];
3120
+ if (numericPatterns.length >= 6) score += 5;
3121
+ else if (numericPatterns.length >= 3) score += 3;
3122
+ else if (numericPatterns.length >= 1) score += 1;
3123
+ const years = /* @__PURE__ */ new Set();
3124
+ const yearMatches = text.match(/\b(19|20)\d{2}\b/g) || [];
3125
+ for (const y of yearMatches) years.add(y);
3126
+ if (years.size >= 2) score += 2;
3127
+ else if (years.size === 1) score += 1;
3128
+ if (/\baccording to\b|\bsource:\s|\bcited\b|\breported by\b/i.test(text)) score += 2;
3129
+ const units = text.match(/\b\d+\s*(kg|lb|miles|km|hours|minutes|days|months|years|GB|MB|TB)\b/gi) || [];
3130
+ if (units.length >= 2) score += 1;
3131
+ return cap(score, 10);
3132
+ }
3133
+ function scoreDefinitionPatterns(html) {
3134
+ let score = 0;
3135
+ const text = getTextContent(html);
3136
+ const defPatterns = text.match(/\b(is a|is an|refers to|defined as|means that|also known as|abbreviated as)\b/gi) || [];
3137
+ if (defPatterns.length >= 3) score += 5;
3138
+ else if (defPatterns.length >= 1) score += 3;
3139
+ const early = text.slice(0, 2e3);
3140
+ if (/\b(is a|is an|refers to|defined as)\b/i.test(early)) score += 2;
3141
+ if (/<dfn[\s>]/i.test(html) || /<abbr[\s>]/i.test(html)) score += 1;
3142
+ if (/<dl[\s>]/i.test(html) || /glossary/i.test(html)) score += 2;
3143
+ return cap(score, 10);
3144
+ }
3145
+ function scoreCanonicalUrl(html, url) {
3146
+ let score = 0;
3147
+ const canonicalMatch = html.match(/<link[^>]*rel=["']canonical["'][^>]*href=["']([^"']+)["']/i) || html.match(/<link[^>]*href=["']([^"']+)["'][^>]*rel=["']canonical["']/i);
3148
+ if (!canonicalMatch) return 0;
3149
+ score += 4;
3150
+ const canonicalHref = canonicalMatch[1];
3151
+ if (url) {
3152
+ try {
3153
+ const canonicalUrl = new URL(canonicalHref, url);
3154
+ const pageUrl = new URL(url);
3155
+ if (canonicalUrl.pathname === pageUrl.pathname && canonicalUrl.hostname === pageUrl.hostname) {
3156
+ score += 3;
3157
+ }
3158
+ } catch {
3159
+ }
3160
+ }
3161
+ if (canonicalHref.startsWith("https://")) score += 2;
3162
+ const allCanonicals = html.match(/<link[^>]*rel=["']canonical["'][^>]*>/gi) || [];
3163
+ if (allCanonicals.length === 1) score += 1;
3164
+ return cap(score, 10);
3165
+ }
3166
+ function scoreVisibleDateSignal(html) {
3167
+ let score = 0;
3168
+ const timeWithDatetime = html.match(/<time[^>]*datetime=["'][^"']+["'][^>]*>[^<]+<\/time>/gi) || [];
3169
+ if (timeWithDatetime.length > 0) score += 5;
3170
+ const blocks = extractJsonLdBlocks(html);
3171
+ const allJsonLd = blocks.join(" ");
3172
+ if (/datePublished|dateModified/i.test(allJsonLd)) score += 3;
3173
+ if (/<meta\s[^>]*property=["']article:(published_time|modified_time)["']/i.test(html)) score += 2;
3174
+ const modifiedMatch = allJsonLd.match(/"dateModified"\s*:\s*"([^"]+)"/i);
3175
+ if (modifiedMatch) {
3176
+ try {
3177
+ const modified = new Date(modifiedMatch[1]);
3178
+ const daysDiff = (Date.now() - modified.getTime()) / (1e3 * 60 * 60 * 24);
3179
+ if (daysDiff <= 180) score += 1;
3180
+ } catch {
3181
+ }
3182
+ }
3183
+ return cap(score, 10);
3184
+ }
3185
+ var SCORING_FUNCTIONS = {
3186
+ schema_markup: scoreSchemaMarkup,
3187
+ qa_content_format: scoreQAFormat,
3188
+ clean_html: scoreCleanHtml,
3189
+ faq_section: scoreFaqSection,
3190
+ original_data: scoreOriginalData,
3191
+ query_answer_alignment: scoreQueryAnswerAlignment,
3192
+ content_freshness: scoreContentFreshness,
3193
+ table_list_extractability: scoreTableListExtractability,
3194
+ direct_answer_density: scoreDirectAnswerDensity,
3195
+ semantic_html: scoreSemanticHtml,
3196
+ fact_density: scoreFactDensity,
3197
+ definition_patterns: scoreDefinitionPatterns,
3198
+ canonical_url: scoreCanonicalUrl,
3199
+ visible_date_signal: scoreVisibleDateSignal
3200
+ };
3201
+ function scorePage(html, url) {
3202
+ let totalWeight = 0;
3203
+ let weightedSum = 0;
3204
+ const criterionScores = [];
3205
+ for (const [criterion, { weight, label }] of Object.entries(PAGE_CRITERIA)) {
3206
+ const fn = SCORING_FUNCTIONS[criterion];
3207
+ const score = fn(html, url);
3208
+ criterionScores.push({ criterion, criterion_label: label, score, weight });
3209
+ weightedSum += score / 10 * weight * 100;
3210
+ totalWeight += weight;
3211
+ }
3212
+ const aeoScore = totalWeight === 0 ? 0 : Math.round(weightedSum / totalWeight);
3213
+ return { aeoScore, criterionScores };
3214
+ }
3215
+ function scoreAllPages(siteData) {
3216
+ const results = [];
3217
+ if (siteData.homepage) {
3218
+ const url = siteData.protocol ? `${siteData.protocol}://${siteData.domain}` : void 0;
3219
+ results.push(scorePage(siteData.homepage.text, url));
3220
+ }
3221
+ if (siteData.blogSample) {
3222
+ for (const page of siteData.blogSample) {
3223
+ const url = page.finalUrl || void 0;
3224
+ results.push(scorePage(page.text, url));
3225
+ }
3226
+ }
3227
+ return results;
3228
+ }
3229
+
2873
3230
  // src/page-analyzer.ts
2874
3231
  function extractTitle(html) {
2875
3232
  const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
2876
3233
  return match ? match[1].replace(/\s+/g, " ").trim() : "";
2877
3234
  }
2878
- function getTextContent(html) {
3235
+ function getTextContent2(html) {
2879
3236
  return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
2880
3237
  }
2881
3238
  function countWords(text) {
@@ -3028,7 +3385,7 @@ function checkHasQuestionHeadings(html) {
3028
3385
  }
3029
3386
  function analyzePage(html, url, category) {
3030
3387
  const title = extractTitle(html);
3031
- const textContent = getTextContent(html);
3388
+ const textContent = getTextContent2(html);
3032
3389
  const wordCount = countWords(textContent);
3033
3390
  const issues = [];
3034
3391
  const strengths = [];
@@ -3054,7 +3411,8 @@ function analyzePage(html, url, category) {
3054
3411
  for (const result of strengthChecks) {
3055
3412
  if (result) strengths.push(result);
3056
3413
  }
3057
- return { url, title, category, wordCount, issues, strengths };
3414
+ const { aeoScore, criterionScores } = scorePage(html, url);
3415
+ return { url, title, category, wordCount, issues, strengths, aeoScore, criterionScores };
3058
3416
  }
3059
3417
  function analyzeAllPages(siteData) {
3060
3418
  const reviews = [];
@@ -3156,6 +3514,1314 @@ async function audit(domain, options) {
3156
3514
  // src/index.ts
3157
3515
  init_full_site_crawler();
3158
3516
 
3517
+ // src/link-graph.ts
3518
+ function serializeLinkGraph(graph) {
3519
+ return {
3520
+ nodes: Array.from(graph.nodes.values()),
3521
+ stats: graph.stats,
3522
+ clusters: graph.clusters
3523
+ };
3524
+ }
3525
+ function normalizeUrl2(url) {
3526
+ try {
3527
+ const parsed = new URL(url);
3528
+ return (parsed.origin + parsed.pathname.replace(/\/+$/, "") + parsed.search).toLowerCase();
3529
+ } catch {
3530
+ return url.toLowerCase();
3531
+ }
3532
+ }
3533
+ var RESOURCE_EXTENSIONS2 = /\.(js|css|png|jpg|jpeg|gif|svg|ico|pdf|xml|txt|woff|woff2|ttf|eot|mp4|mp3|webp|avif|zip|gz|tar|json)$/i;
3534
+ var SKIP_PATH_PATTERNS2 = /^\/(api|wp-admin|wp-json|static|assets|_next|auth|login|signup|cart|checkout|admin|feed|xmlrpc)\b/i;
3535
+ function extractTitle2(html) {
3536
+ const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
3537
+ return match ? match[1].replace(/\s+/g, " ").trim() : "";
3538
+ }
3539
+ function getTextContent3(html) {
3540
+ return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
3541
+ }
3542
+ function countWords2(text) {
3543
+ if (!text) return 0;
3544
+ return text.split(/\s+/).filter((w) => w.length > 0).length;
3545
+ }
3546
+ function extractLinksWithAnchors(html, sourceUrl, domain) {
3547
+ const cleanDomain = domain.replace(/^www\./, "").toLowerCase();
3548
+ const edges = [];
3549
+ const seen = /* @__PURE__ */ new Set();
3550
+ const anchorRegex = /<a\s[^>]*href="([^"]*)"[^>]*>([\s\S]*?)<\/a>/gi;
3551
+ let match;
3552
+ while ((match = anchorRegex.exec(html)) !== null) {
3553
+ const href = match[1];
3554
+ const rawAnchor = match[2];
3555
+ if (!href || !href.trim()) continue;
3556
+ let fullUrl;
3557
+ if (href.startsWith("//")) {
3558
+ fullUrl = `https:${href}`;
3559
+ } else if (href.startsWith("/")) {
3560
+ if (href === "/" || href.startsWith("/#")) continue;
3561
+ fullUrl = `https://${domain}${href}`;
3562
+ } else if (href.startsWith("http")) {
3563
+ fullUrl = href;
3564
+ } else if (href.startsWith("#") || href.startsWith("?") || href.startsWith("mailto:") || href.startsWith("tel:") || href.startsWith("javascript:")) {
3565
+ continue;
3566
+ } else {
3567
+ fullUrl = `https://${domain}/${href}`;
3568
+ }
3569
+ try {
3570
+ const parsed = new URL(fullUrl);
3571
+ const linkDomain = parsed.hostname.replace(/^www\./, "").toLowerCase();
3572
+ if (linkDomain !== cleanDomain) continue;
3573
+ parsed.hash = "";
3574
+ const path = parsed.pathname;
3575
+ if (path === "/" || path === "") continue;
3576
+ if (RESOURCE_EXTENSIONS2.test(path)) continue;
3577
+ if (SKIP_PATH_PATTERNS2.test(path)) continue;
3578
+ const normalized = normalizeUrl2(fullUrl);
3579
+ const sourceNorm = normalizeUrl2(sourceUrl);
3580
+ if (normalized === sourceNorm) continue;
3581
+ const edgeKey = `${sourceNorm}->${normalized}`;
3582
+ if (seen.has(edgeKey)) continue;
3583
+ seen.add(edgeKey);
3584
+ const anchorText = rawAnchor.replace(/<[^>]*>/g, "").replace(/\s+/g, " ").trim();
3585
+ edges.push({
3586
+ source: sourceNorm,
3587
+ target: normalized,
3588
+ anchorText
3589
+ });
3590
+ } catch {
3591
+ continue;
3592
+ }
3593
+ }
3594
+ return edges;
3595
+ }
3596
+ function calculateDepths(nodes, adjacency, homepageUrl) {
3597
+ const homeNorm = normalizeUrl2(homepageUrl);
3598
+ for (const node of nodes.values()) {
3599
+ node.depth = Infinity;
3600
+ }
3601
+ const homeNode = nodes.get(homeNorm);
3602
+ if (homeNode) {
3603
+ homeNode.depth = 0;
3604
+ }
3605
+ const queue = [homeNorm];
3606
+ const visited = /* @__PURE__ */ new Set([homeNorm]);
3607
+ while (queue.length > 0) {
3608
+ const current = queue.shift();
3609
+ const currentNode = nodes.get(current);
3610
+ if (!currentNode) continue;
3611
+ const nextDepth = currentNode.depth + 1;
3612
+ const neighbors = adjacency.get(current);
3613
+ if (!neighbors) continue;
3614
+ for (const neighbor of neighbors) {
3615
+ if (visited.has(neighbor)) continue;
3616
+ visited.add(neighbor);
3617
+ const neighborNode = nodes.get(neighbor);
3618
+ if (neighborNode) {
3619
+ neighborNode.depth = nextDepth;
3620
+ queue.push(neighbor);
3621
+ }
3622
+ }
3623
+ }
3624
+ }
3625
+ var PILLAR_CATEGORIES = /* @__PURE__ */ new Set(["blog", "content", "resources", "docs"]);
3626
+ function detectPillars(nodes) {
3627
+ for (const node of nodes.values()) {
3628
+ node.isPillar = node.wordCount >= 1500 && node.inDegree >= 3 && node.outDegree >= 3 && PILLAR_CATEGORIES.has(node.category) && node.depth > 0;
3629
+ }
3630
+ }
3631
+ var HUB_CATEGORIES = /* @__PURE__ */ new Set(["homepage", "resources", "docs"]);
3632
+ function detectHubs(nodes) {
3633
+ for (const node of nodes.values()) {
3634
+ node.isHub = node.outDegree >= 10 && HUB_CATEGORIES.has(node.category) || node.outDegree >= 15;
3635
+ }
3636
+ }
3637
+ function detectClusters(nodes, edges) {
3638
+ const clusters = [];
3639
+ const edgeSet = /* @__PURE__ */ new Set();
3640
+ for (const edge of edges) {
3641
+ edgeSet.add(`${edge.source}->${edge.target}`);
3642
+ }
3643
+ for (const node of nodes.values()) {
3644
+ if (!node.isPillar) continue;
3645
+ const pillarNorm = normalizeUrl2(node.url);
3646
+ const spokeSet = /* @__PURE__ */ new Set();
3647
+ for (const edge of edges) {
3648
+ if (edge.source === pillarNorm && nodes.has(edge.target)) {
3649
+ spokeSet.add(edge.target);
3650
+ }
3651
+ if (edge.target === pillarNorm && nodes.has(edge.source)) {
3652
+ spokeSet.add(edge.source);
3653
+ }
3654
+ }
3655
+ spokeSet.delete(pillarNorm);
3656
+ if (spokeSet.size < 2) continue;
3657
+ const spokes = Array.from(spokeSet);
3658
+ const members = [pillarNorm, ...spokes];
3659
+ let actualEdges = 0;
3660
+ const possibleEdges = members.length * (members.length - 1);
3661
+ for (const from of members) {
3662
+ for (const to of members) {
3663
+ if (from === to) continue;
3664
+ if (edgeSet.has(`${from}->${to}`)) {
3665
+ actualEdges++;
3666
+ }
3667
+ }
3668
+ }
3669
+ const cohesion = possibleEdges > 0 ? Math.round(actualEdges / possibleEdges * 100) : 0;
3670
+ clusters.push({
3671
+ pillarUrl: node.url,
3672
+ pillarTitle: node.title,
3673
+ spokes,
3674
+ cohesion
3675
+ });
3676
+ }
3677
+ return clusters;
3678
+ }
3679
+ function buildLinkGraph(pages, domain, homepageUrl) {
3680
+ const nodes = /* @__PURE__ */ new Map();
3681
+ const allEdges = [];
3682
+ const adjacency = /* @__PURE__ */ new Map();
3683
+ const inDegreeMap = /* @__PURE__ */ new Map();
3684
+ for (const page of pages) {
3685
+ const url = page.finalUrl || `https://${domain}`;
3686
+ const norm = normalizeUrl2(url);
3687
+ if (nodes.has(norm)) continue;
3688
+ const title = extractTitle2(page.text);
3689
+ const text = getTextContent3(page.text);
3690
+ const wordCount = countWords2(text);
3691
+ nodes.set(norm, {
3692
+ url: norm,
3693
+ title,
3694
+ wordCount,
3695
+ category: page.category || "content",
3696
+ inDegree: 0,
3697
+ outDegree: 0,
3698
+ depth: Infinity,
3699
+ isPillar: false,
3700
+ isHub: false,
3701
+ isOrphan: false
3702
+ });
3703
+ }
3704
+ for (const page of pages) {
3705
+ const url = page.finalUrl || `https://${domain}`;
3706
+ const sourceNorm = normalizeUrl2(url);
3707
+ const edges = extractLinksWithAnchors(page.text, url, domain);
3708
+ for (const edge of edges) {
3709
+ const targetNorm = normalizeUrl2(edge.target);
3710
+ if (!nodes.has(targetNorm)) continue;
3711
+ allEdges.push({
3712
+ source: sourceNorm,
3713
+ target: targetNorm,
3714
+ anchorText: edge.anchorText
3715
+ });
3716
+ if (!adjacency.has(sourceNorm)) {
3717
+ adjacency.set(sourceNorm, /* @__PURE__ */ new Set());
3718
+ }
3719
+ adjacency.get(sourceNorm).add(targetNorm);
3720
+ inDegreeMap.set(targetNorm, (inDegreeMap.get(targetNorm) || 0) + 1);
3721
+ }
3722
+ }
3723
+ for (const [url, node] of nodes) {
3724
+ node.inDegree = inDegreeMap.get(url) || 0;
3725
+ node.outDegree = adjacency.get(url)?.size || 0;
3726
+ }
3727
+ calculateDepths(nodes, adjacency, homepageUrl);
3728
+ detectPillars(nodes);
3729
+ detectHubs(nodes);
3730
+ const homeNorm = normalizeUrl2(homepageUrl);
3731
+ for (const [url, node] of nodes) {
3732
+ node.isOrphan = node.inDegree === 0 && url !== homeNorm;
3733
+ }
3734
+ const clusters = detectClusters(nodes, allEdges);
3735
+ const depthValues = Array.from(nodes.values()).map((n) => n.depth).filter((d) => d !== Infinity);
3736
+ const avgDepth = depthValues.length > 0 ? Math.round(depthValues.reduce((s, d) => s + d, 0) / depthValues.length * 10) / 10 : 0;
3737
+ const maxDepth = depthValues.length > 0 ? Math.max(...depthValues) : 0;
3738
+ const stats = {
3739
+ totalPages: nodes.size,
3740
+ totalEdges: allEdges.length,
3741
+ orphanPages: Array.from(nodes.values()).filter((n) => n.isOrphan).length,
3742
+ pillarPages: Array.from(nodes.values()).filter((n) => n.isPillar).length,
3743
+ hubPages: Array.from(nodes.values()).filter((n) => n.isHub).length,
3744
+ avgDepth,
3745
+ maxDepth,
3746
+ clusters: clusters.length
3747
+ };
3748
+ return { nodes, edges: allEdges, stats, clusters };
3749
+ }
3750
+
3751
+ // src/fix-engine.ts
3752
+ var CRITERION_WEIGHTS2 = {
3753
+ llms_txt: 0.1,
3754
+ schema_markup: 0.15,
3755
+ qa_content_format: 0.15,
3756
+ clean_html: 0.1,
3757
+ entity_consistency: 0.1,
3758
+ robots_txt: 0.05,
3759
+ faq_section: 0.1,
3760
+ original_data: 0.1,
3761
+ internal_linking: 0.1,
3762
+ semantic_html: 0.05,
3763
+ content_freshness: 0.07,
3764
+ sitemap_completeness: 0.05,
3765
+ rss_feed: 0.03,
3766
+ table_list_extractability: 0.07,
3767
+ definition_patterns: 0.04,
3768
+ direct_answer_density: 0.07,
3769
+ content_licensing: 0.04,
3770
+ author_schema_depth: 0.04,
3771
+ fact_density: 0.05,
3772
+ canonical_url: 0.04,
3773
+ content_velocity: 0.03,
3774
+ schema_coverage: 0.03,
3775
+ speakable_schema: 0.03,
3776
+ query_answer_alignment: 0.08,
3777
+ content_cannibalization: 0.05,
3778
+ visible_date_signal: 0.04
3779
+ };
3780
+ var PHASE_CONFIG = [
3781
+ {
3782
+ phase: 1,
3783
+ title: "Foundation",
3784
+ description: "Discovery and structural fixes that enable AI crawlers to access and parse your content.",
3785
+ criteria: ["robots_txt", "llms_txt", "canonical_url", "clean_html", "sitemap_completeness"]
3786
+ },
3787
+ {
3788
+ phase: 2,
3789
+ title: "Content",
3790
+ description: "Content quality and format improvements that make your pages citable by AI engines.",
3791
+ criteria: [
3792
+ "qa_content_format",
3793
+ "faq_section",
3794
+ "original_data",
3795
+ "definition_patterns",
3796
+ "direct_answer_density",
3797
+ "fact_density",
3798
+ "content_freshness",
3799
+ "table_list_extractability",
3800
+ "query_answer_alignment",
3801
+ "visible_date_signal"
3802
+ ]
3803
+ },
3804
+ {
3805
+ phase: 3,
3806
+ title: "Authority",
3807
+ description: "Trust signals, schema depth, and semantic structure that establish credibility with AI engines.",
3808
+ criteria: [
3809
+ "schema_markup",
3810
+ "schema_coverage",
3811
+ "speakable_schema",
3812
+ "author_schema_depth",
3813
+ "content_licensing",
3814
+ "entity_consistency",
3815
+ "semantic_html"
3816
+ ]
3817
+ },
3818
+ {
3819
+ phase: 4,
3820
+ title: "Architecture",
3821
+ description: "Site architecture, linking patterns, and publishing cadence that support long-term AI visibility.",
3822
+ criteria: ["internal_linking", "content_velocity", "content_cannibalization", "rss_feed"]
3823
+ }
3824
+ ];
3825
+ function impactFromScore(score) {
3826
+ if (score <= 3) return "critical";
3827
+ if (score <= 5) return "high";
3828
+ if (score <= 7) return "medium";
3829
+ return "low";
3830
+ }
3831
+ function effortForCriterion(criterion, score) {
3832
+ const trivialCriteria = ["llms_txt", "robots_txt", "canonical_url", "content_licensing", "visible_date_signal"];
3833
+ const lowCriteria = ["rss_feed", "sitemap_completeness", "speakable_schema", "author_schema_depth", "semantic_html", "definition_patterns", "content_freshness"];
3834
+ const highCriteria = ["original_data", "content_velocity", "content_cannibalization"];
3835
+ if (trivialCriteria.includes(criterion)) return score <= 3 ? "low" : "trivial";
3836
+ if (lowCriteria.includes(criterion)) return score <= 3 ? "medium" : "low";
3837
+ if (highCriteria.includes(criterion)) return score <= 5 ? "high" : "medium";
3838
+ return score <= 3 ? "medium" : "low";
3839
+ }
3840
+ function getAffectedPages(criterion, pages, threshold = 7) {
3841
+ if (!pages || pages.length === 0) return void 0;
3842
+ const affected = pages.filter((p) => {
3843
+ const cs = p.criterionScores?.find((c) => c.criterion === criterion);
3844
+ return cs && cs.score < threshold;
3845
+ });
3846
+ if (affected.length === 0) return void 0;
3847
+ return affected.map((p) => p.url);
3848
+ }
3849
+ function effortToHours(effort) {
3850
+ switch (effort) {
3851
+ case "trivial":
3852
+ return 0.5;
3853
+ case "low":
3854
+ return 1;
3855
+ case "medium":
3856
+ return 3;
3857
+ case "high":
3858
+ return 8;
3859
+ }
3860
+ }
3861
+ var FIX_GENERATORS = {
3862
+ llms_txt: (c) => {
3863
+ if (c.score >= 10) return [];
3864
+ const impact = impactFromScore(c.score);
3865
+ const effort = effortForCriterion("llms_txt", c.score);
3866
+ const fixes = [];
3867
+ if (c.score <= 6) {
3868
+ fixes.push({
3869
+ id: "fix-llms-txt-create",
3870
+ criterion: c.criterion_label,
3871
+ criterionId: c.criterion,
3872
+ title: "Create /llms.txt file",
3873
+ description: "Add a machine-readable llms.txt file at your domain root that describes your site, services, and key pages for AI engines.",
3874
+ impact,
3875
+ effort,
3876
+ impactScore: 0,
3877
+ // calculated later
3878
+ category: "discovery",
3879
+ steps: [
3880
+ "Create a file named llms.txt in your site root",
3881
+ "Add site name, description, and core URLs in markdown format",
3882
+ "Include key service/product pages and their descriptions",
3883
+ "Deploy and verify access at yourdomain.com/llms.txt"
3884
+ ],
3885
+ codeExample: `# Site Name
3886
+ > One-line site description
3887
+
3888
+ ## Core Pages
3889
+ - [About](/about): Company overview
3890
+ - [Services](/services): Service offerings
3891
+ - [Blog](/blog): Latest articles
3892
+
3893
+ ## Key Topics
3894
+ - Topic 1
3895
+ - Topic 2`,
3896
+ successCriteria: "/llms.txt returns 200 with valid markdown content"
3897
+ });
3898
+ }
3899
+ if (c.score <= 3) {
3900
+ fixes.push({
3901
+ id: "fix-llms-txt-full",
3902
+ criterion: c.criterion_label,
3903
+ criterionId: c.criterion,
3904
+ title: "Add llms-full.txt with extended content",
3905
+ description: "Create a comprehensive llms-full.txt with detailed page descriptions, content summaries, and topic taxonomy.",
3906
+ impact: "medium",
3907
+ effort: "low",
3908
+ impactScore: 0,
3909
+ category: "discovery",
3910
+ steps: [
3911
+ "Create llms-full.txt alongside llms.txt",
3912
+ "Include full page descriptions with word counts",
3913
+ "Add topic categories and content clusters",
3914
+ "Link from llms.txt to llms-full.txt"
3915
+ ],
3916
+ successCriteria: "/llms-full.txt returns 200 with comprehensive site map"
3917
+ });
3918
+ }
3919
+ return fixes;
3920
+ },
3921
+ schema_markup: (c, pages) => {
3922
+ if (c.score >= 10) return [];
3923
+ const impact = impactFromScore(c.score);
3924
+ const effort = effortForCriterion("schema_markup", c.score);
3925
+ const affected = getAffectedPages("schema_markup", pages);
3926
+ const fixes = [{
3927
+ id: "fix-schema-markup",
3928
+ criterion: c.criterion_label,
3929
+ criterionId: c.criterion,
3930
+ title: "Add JSON-LD structured data",
3931
+ description: "Implement Organization, WebSite, and page-specific schema.org JSON-LD to help AI engines extract your content.",
3932
+ impact,
3933
+ effort,
3934
+ impactScore: 0,
3935
+ category: "trust",
3936
+ steps: [
3937
+ "Add Organization JSON-LD to your homepage with name, url, logo, sameAs",
3938
+ "Add WebSite schema with SearchAction",
3939
+ "Add page-specific schema (Article, Service, Product, FAQPage) to relevant pages",
3940
+ "Validate with Google Rich Results Test"
3941
+ ],
3942
+ codeExample: `<script type="application/ld+json">
3943
+ {
3944
+ "@context": "https://schema.org",
3945
+ "@type": "Organization",
3946
+ "name": "Your Company",
3947
+ "url": "https://example.com",
3948
+ "logo": "https://example.com/logo.png",
3949
+ "sameAs": [
3950
+ "https://twitter.com/company",
3951
+ "https://linkedin.com/company/company"
3952
+ ]
3953
+ }
3954
+ </script>`,
3955
+ successCriteria: "Homepage and key pages have valid JSON-LD schema",
3956
+ dependsOn: ["fix-clean-html-structure"],
3957
+ affectedPages: affected,
3958
+ pageCount: affected?.length
3959
+ }];
3960
+ return fixes;
3961
+ },
3962
+ qa_content_format: (c, pages) => {
3963
+ if (c.score >= 10) return [];
3964
+ const impact = impactFromScore(c.score);
3965
+ const effort = effortForCriterion("qa_content_format", c.score);
3966
+ const affected = getAffectedPages("qa_content_format", pages);
3967
+ return [{
3968
+ id: "fix-qa-format",
3969
+ criterion: c.criterion_label,
3970
+ criterionId: c.criterion,
3971
+ title: "Add question-based headings",
3972
+ description: "Restructure content with H2/H3 question headings that match how users query AI assistants.",
3973
+ impact,
3974
+ effort,
3975
+ impactScore: 0,
3976
+ category: "content",
3977
+ steps: [
3978
+ "Identify top user questions for each page topic",
3979
+ "Convert section headings to question format (What, How, Why, When)",
3980
+ "Follow each question heading with a direct 2-3 sentence answer",
3981
+ "Add a summary answer box at the top of long-form content"
3982
+ ],
3983
+ successCriteria: "At least 50% of H2/H3 headings use question format",
3984
+ affectedPages: affected,
3985
+ pageCount: affected?.length
3986
+ }];
3987
+ },
3988
+ clean_html: (c, pages) => {
3989
+ if (c.score >= 10) return [];
3990
+ const impact = impactFromScore(c.score);
3991
+ const effort = effortForCriterion("clean_html", c.score);
3992
+ const affected = getAffectedPages("clean_html", pages);
3993
+ const fixes = [{
3994
+ id: "fix-clean-html-structure",
3995
+ criterion: c.criterion_label,
3996
+ criterionId: c.criterion,
3997
+ title: "Fix HTML structure and meta tags",
3998
+ description: "Ensure clean, well-structured HTML with proper meta tags, HTTPS, and parseable content for AI crawlers.",
3999
+ impact,
4000
+ effort,
4001
+ impactScore: 0,
4002
+ category: "structure",
4003
+ steps: [
4004
+ "Enable HTTPS and redirect HTTP to HTTPS",
4005
+ "Add proper <title>, meta description, and viewport meta tags",
4006
+ "Fix HTML validation errors (unclosed tags, invalid nesting)",
4007
+ "Ensure content is server-rendered (not client-side only)"
4008
+ ],
4009
+ successCriteria: "Pages pass HTML validation with proper meta tags and HTTPS",
4010
+ affectedPages: affected,
4011
+ pageCount: affected?.length
4012
+ }];
4013
+ return fixes;
4014
+ },
4015
+ entity_consistency: (c) => {
4016
+ if (c.score >= 10) return [];
4017
+ const impact = impactFromScore(c.score);
4018
+ const effort = effortForCriterion("entity_consistency", c.score);
4019
+ return [{
4020
+ id: "fix-entity-consistency",
4021
+ criterion: c.criterion_label,
4022
+ criterionId: c.criterion,
4023
+ title: "Strengthen entity authority (NAP)",
4024
+ description: "Add consistent name, address, phone (NAP) and sameAs links across all pages to strengthen entity recognition.",
4025
+ impact,
4026
+ effort,
4027
+ impactScore: 0,
4028
+ category: "trust",
4029
+ steps: [
4030
+ "Ensure company name is consistent across all pages",
4031
+ "Add Organization schema with full NAP details",
4032
+ "Include sameAs links to social profiles and directories",
4033
+ "Add logo and brand marks consistently"
4034
+ ],
4035
+ successCriteria: "Organization schema present with consistent NAP on all pages"
4036
+ }];
4037
+ },
4038
+ robots_txt: (c) => {
4039
+ if (c.score >= 10) return [];
4040
+ const impact = impactFromScore(c.score);
4041
+ const effort = effortForCriterion("robots_txt", c.score);
4042
+ return [{
4043
+ id: "fix-robots-txt",
4044
+ criterion: c.criterion_label,
4045
+ criterionId: c.criterion,
4046
+ title: "Configure robots.txt for AI crawlers",
4047
+ description: "Update robots.txt to explicitly allow AI crawlers and include sitemap directive.",
4048
+ impact,
4049
+ effort,
4050
+ impactScore: 0,
4051
+ category: "discovery",
4052
+ steps: [
4053
+ "Create or update robots.txt at domain root",
4054
+ "Add User-agent rules for GPTBot, ClaudeBot, PerplexityBot",
4055
+ "Include Sitemap directive pointing to sitemap.xml",
4056
+ "Verify no accidental Disallow rules blocking content pages"
4057
+ ],
4058
+ codeExample: `User-agent: *
4059
+ Allow: /
4060
+
4061
+ User-agent: GPTBot
4062
+ Allow: /
4063
+
4064
+ User-agent: ClaudeBot
4065
+ Allow: /
4066
+
4067
+ User-agent: PerplexityBot
4068
+ Allow: /
4069
+
4070
+ Sitemap: https://example.com/sitemap.xml`,
4071
+ successCriteria: "robots.txt returns 200 with AI crawler directives and Sitemap"
4072
+ }];
4073
+ },
4074
+ faq_section: (c, pages) => {
4075
+ if (c.score >= 10) return [];
4076
+ const impact = impactFromScore(c.score);
4077
+ const effort = effortForCriterion("faq_section", c.score);
4078
+ const affected = getAffectedPages("faq_section", pages);
4079
+ return [{
4080
+ id: "fix-faq-section",
4081
+ criterion: c.criterion_label,
4082
+ criterionId: c.criterion,
4083
+ title: "Build FAQ sections with schema",
4084
+ description: "Create FAQ content with FAQPage schema markup on key pages to become a direct answer source for AI engines.",
4085
+ impact,
4086
+ effort,
4087
+ impactScore: 0,
4088
+ category: "content",
4089
+ steps: [
4090
+ "Identify 8-10 most common customer questions per service area",
4091
+ "Create dedicated FAQ page with categorized Q&A pairs",
4092
+ "Add inline FAQ sections to key service/product pages",
4093
+ "Implement FAQPage JSON-LD schema on all FAQ content"
4094
+ ],
4095
+ successCriteria: "FAQ page exists with FAQPage schema, key pages have inline FAQ sections",
4096
+ affectedPages: affected,
4097
+ pageCount: affected?.length
4098
+ }];
4099
+ },
4100
+ original_data: (c, pages) => {
4101
+ if (c.score >= 10) return [];
4102
+ const impact = impactFromScore(c.score);
4103
+ const effort = effortForCriterion("original_data", c.score);
4104
+ const affected = getAffectedPages("original_data", pages);
4105
+ return [{
4106
+ id: "fix-original-data",
4107
+ criterion: c.criterion_label,
4108
+ criterionId: c.criterion,
4109
+ title: "Add original data and case studies",
4110
+ description: "Publish proprietary data, statistics, case studies, or research that AI engines cannot find elsewhere.",
4111
+ impact,
4112
+ effort,
4113
+ impactScore: 0,
4114
+ category: "content",
4115
+ steps: [
4116
+ "Identify internal data assets (customer metrics, case study results, survey data)",
4117
+ "Create data-driven content with specific numbers and percentages",
4118
+ "Publish case studies with measurable outcomes",
4119
+ "Add comparison tables with proprietary benchmarks"
4120
+ ],
4121
+ successCriteria: "At least 3 pages contain original data points not found elsewhere online",
4122
+ affectedPages: affected,
4123
+ pageCount: affected?.length
4124
+ }];
4125
+ },
4126
+ internal_linking: (c, pages, linkGraph) => {
4127
+ if (c.score >= 10) return [];
4128
+ const impact = impactFromScore(c.score);
4129
+ const effort = effortForCriterion("internal_linking", c.score);
4130
+ const fixes = [];
4131
+ if (linkGraph) {
4132
+ const orphans = [];
4133
+ linkGraph.nodes.forEach((node) => {
4134
+ if (node.isOrphan) orphans.push(node.url);
4135
+ });
4136
+ if (orphans.length > 0) {
4137
+ fixes.push({
4138
+ id: "fix-internal-linking-orphans",
4139
+ criterion: c.criterion_label,
4140
+ criterionId: c.criterion,
4141
+ title: "Link orphan pages into site navigation",
4142
+ description: `${orphans.length} pages have no incoming internal links. These are invisible to AI crawlers that follow links.`,
4143
+ impact: orphans.length > 5 ? "critical" : "high",
4144
+ effort: orphans.length > 10 ? "medium" : "low",
4145
+ impactScore: 0,
4146
+ category: "structure",
4147
+ steps: [
4148
+ `Identify the ${orphans.length} orphan pages with zero incoming links`,
4149
+ "Add contextual links from related content pages",
4150
+ "Include orphan pages in navigation menus or footer links",
4151
+ 'Add "Related Content" sections on relevant pages'
4152
+ ],
4153
+ successCriteria: "All content pages have at least 1 incoming internal link",
4154
+ affectedPages: orphans.slice(0, 20),
4155
+ pageCount: orphans.length
4156
+ });
4157
+ }
4158
+ if (linkGraph.stats.maxDepth > 3) {
4159
+ fixes.push({
4160
+ id: "fix-internal-linking-depth",
4161
+ criterion: c.criterion_label,
4162
+ criterionId: c.criterion,
4163
+ title: "Reduce page depth for deep content",
4164
+ description: `Max depth is ${linkGraph.stats.maxDepth} clicks from homepage. AI crawlers rarely follow links beyond 3 levels.`,
4165
+ impact: "medium",
4166
+ effort: "medium",
4167
+ impactScore: 0,
4168
+ category: "structure",
4169
+ steps: [
4170
+ "Identify pages more than 3 clicks from the homepage",
4171
+ "Add direct links from high-level pages to deep content",
4172
+ "Consider flattening URL structure for key pages",
4173
+ "Add hub pages that aggregate related deep content"
4174
+ ],
4175
+ successCriteria: "All important content pages reachable within 3 clicks from homepage"
4176
+ });
4177
+ }
4178
+ if (linkGraph.clusters.length === 0) {
4179
+ fixes.push({
4180
+ id: "fix-internal-linking-clusters",
4181
+ criterion: c.criterion_label,
4182
+ criterionId: c.criterion,
4183
+ title: "Create topic clusters with pillar pages",
4184
+ description: "No topic clusters detected. Organizing content into pillar-spoke clusters strengthens topical authority for AI engines.",
4185
+ impact: "high",
4186
+ effort: "high",
4187
+ impactScore: 0,
4188
+ category: "structure",
4189
+ steps: [
4190
+ "Identify 3-5 core topic areas for your business",
4191
+ "Create comprehensive pillar pages (3000+ words) for each topic",
4192
+ "Write 5-7 supporting articles per pillar linking back to pillar",
4193
+ "Interlink supporting articles within each cluster"
4194
+ ],
4195
+ successCriteria: "At least 2 topic clusters with pillar page and 5+ spoke pages"
4196
+ });
4197
+ }
4198
+ } else {
4199
+ fixes.push({
4200
+ id: "fix-internal-linking-generic",
4201
+ criterion: c.criterion_label,
4202
+ criterionId: c.criterion,
4203
+ title: "Improve internal linking architecture",
4204
+ description: "Strengthen internal linking with descriptive anchor text between related pages.",
4205
+ impact,
4206
+ effort,
4207
+ impactScore: 0,
4208
+ category: "structure",
4209
+ steps: [
4210
+ "Audit current internal link structure",
4211
+ "Add contextual links between related content pages",
4212
+ "Ensure every key page is reachable within 3 clicks from homepage",
4213
+ 'Use descriptive anchor text instead of "click here" or "read more"'
4214
+ ],
4215
+ successCriteria: "Key pages have 3+ incoming internal links with descriptive anchors"
4216
+ });
4217
+ }
4218
+ return fixes;
4219
+ },
4220
+ semantic_html: (c, pages) => {
4221
+ if (c.score >= 10) return [];
4222
+ const impact = impactFromScore(c.score);
4223
+ const effort = effortForCriterion("semantic_html", c.score);
4224
+ const affected = getAffectedPages("semantic_html", pages);
4225
+ return [{
4226
+ id: "fix-semantic-html",
4227
+ criterion: c.criterion_label,
4228
+ criterionId: c.criterion,
4229
+ title: "Implement semantic HTML5 elements",
4230
+ description: "Use semantic HTML5 elements (main, article, nav, header, footer, section) to give AI parsers clear content structure.",
4231
+ impact,
4232
+ effort,
4233
+ impactScore: 0,
4234
+ category: "structure",
4235
+ steps: [
4236
+ "Wrap main content in <main> element",
4237
+ "Use <article> for self-contained content blocks",
4238
+ "Add <nav> for navigation and <aside> for sidebars",
4239
+ "Add lang attribute to <html> and ARIA labels for accessibility"
4240
+ ],
4241
+ successCriteria: "Pages use semantic HTML5 elements with lang attribute",
4242
+ affectedPages: affected,
4243
+ pageCount: affected?.length
4244
+ }];
4245
+ },
4246
+ content_freshness: (c, pages) => {
4247
+ if (c.score >= 10) return [];
4248
+ const impact = impactFromScore(c.score);
4249
+ const effort = effortForCriterion("content_freshness", c.score);
4250
+ const affected = getAffectedPages("content_freshness", pages);
4251
+ return [{
4252
+ id: "fix-content-freshness",
4253
+ criterion: c.criterion_label,
4254
+ criterionId: c.criterion,
4255
+ title: "Add content freshness signals",
4256
+ description: "Include dateModified schema, visible dates, and recent content updates to signal freshness to AI engines.",
4257
+ impact,
4258
+ effort,
4259
+ impactScore: 0,
4260
+ category: "content",
4261
+ steps: [
4262
+ "Add datePublished and dateModified to Article schema",
4263
+ 'Display visible "Last updated" dates on content pages',
4264
+ "Update stale content with current information",
4265
+ "Add <time> elements with datetime attributes for all dates"
4266
+ ],
4267
+ successCriteria: "Content pages show visible dates and have dateModified in schema",
4268
+ affectedPages: affected,
4269
+ pageCount: affected?.length
4270
+ }];
4271
+ },
4272
+ sitemap_completeness: (c) => {
4273
+ if (c.score >= 10) return [];
4274
+ const impact = impactFromScore(c.score);
4275
+ const effort = effortForCriterion("sitemap_completeness", c.score);
4276
+ return [{
4277
+ id: "fix-sitemap",
4278
+ criterion: c.criterion_label,
4279
+ criterionId: c.criterion,
4280
+ title: "Create complete sitemap.xml",
4281
+ description: "Generate a comprehensive sitemap with lastmod dates for all important pages.",
4282
+ impact,
4283
+ effort,
4284
+ impactScore: 0,
4285
+ category: "discovery",
4286
+ steps: [
4287
+ "Generate sitemap.xml listing all content pages",
4288
+ "Include <lastmod> dates for each URL",
4289
+ "Set <changefreq> and <priority> appropriately",
4290
+ "Reference sitemap in robots.txt"
4291
+ ],
4292
+ successCriteria: "sitemap.xml returns 200 with all content pages and lastmod dates"
4293
+ }];
4294
+ },
4295
+ rss_feed: (c) => {
4296
+ if (c.score >= 10) return [];
4297
+ const impact = impactFromScore(c.score);
4298
+ const effort = effortForCriterion("rss_feed", c.score);
4299
+ return [{
4300
+ id: "fix-rss-feed",
4301
+ criterion: c.criterion_label,
4302
+ criterionId: c.criterion,
4303
+ title: "Deploy RSS/Atom feed",
4304
+ description: "Add an RSS or Atom feed for your blog/news content to signal active publishing to AI engines.",
4305
+ impact,
4306
+ effort,
4307
+ impactScore: 0,
4308
+ category: "discovery",
4309
+ steps: [
4310
+ "Create RSS 2.0 or Atom feed for blog/news content",
4311
+ "Include title, description, pubDate, and full content for each item",
4312
+ 'Add <link rel="alternate" type="application/rss+xml"> to page head',
4313
+ "Auto-generate feed on each new publish"
4314
+ ],
4315
+ codeExample: `<?xml version="1.0" encoding="UTF-8"?>
4316
+ <rss version="2.0">
4317
+ <channel>
4318
+ <title>Your Site Blog</title>
4319
+ <link>https://example.com/blog</link>
4320
+ <description>Latest articles</description>
4321
+ <item>
4322
+ <title>Article Title</title>
4323
+ <link>https://example.com/blog/article</link>
4324
+ <pubDate>Mon, 01 Jan 2024 00:00:00 GMT</pubDate>
4325
+ <description>Article summary</description>
4326
+ </item>
4327
+ </channel>
4328
+ </rss>`,
4329
+ successCriteria: "RSS feed returns valid XML with recent content items"
4330
+ }];
4331
+ },
4332
+ table_list_extractability: (c, pages) => {
4333
+ if (c.score >= 10) return [];
4334
+ const impact = impactFromScore(c.score);
4335
+ const effort = effortForCriterion("table_list_extractability", c.score);
4336
+ const affected = getAffectedPages("table_list_extractability", pages);
4337
+ return [{
4338
+ id: "fix-tables-lists",
4339
+ criterion: c.criterion_label,
4340
+ criterionId: c.criterion,
4341
+ title: "Add structured tables and lists",
4342
+ description: "Use HTML tables for comparison data and lists for features, steps, and specifications.",
4343
+ impact,
4344
+ effort,
4345
+ impactScore: 0,
4346
+ category: "content",
4347
+ steps: [
4348
+ "Identify data suitable for table format (comparisons, pricing, specs)",
4349
+ "Convert bullet points to proper <ul>/<ol> lists",
4350
+ "Add comparison tables with <th> headers",
4351
+ "Ensure tables have descriptive captions"
4352
+ ],
4353
+ successCriteria: "Key pages contain at least one HTML table or structured list",
4354
+ affectedPages: affected,
4355
+ pageCount: affected?.length
4356
+ }];
4357
+ },
4358
+ definition_patterns: (c, pages) => {
4359
+ if (c.score >= 10) return [];
4360
+ const impact = impactFromScore(c.score);
4361
+ const effort = effortForCriterion("definition_patterns", c.score);
4362
+ const affected = getAffectedPages("definition_patterns", pages);
4363
+ return [{
4364
+ id: "fix-definitions",
4365
+ criterion: c.criterion_label,
4366
+ criterionId: c.criterion,
4367
+ title: "Add definition-style content",
4368
+ description: 'Include clear definition patterns for key terms and concepts that AI engines can cite for "what is" queries.',
4369
+ impact,
4370
+ effort,
4371
+ impactScore: 0,
4372
+ category: "content",
4373
+ steps: [
4374
+ "Identify key industry terms your audience searches for",
4375
+ 'Write clear definitions using "X is..." or "X refers to..." patterns',
4376
+ "Place definitions near the top of relevant pages",
4377
+ "Consider a glossary page for comprehensive term coverage"
4378
+ ],
4379
+ successCriteria: "Key pages contain definition patterns for relevant terms",
4380
+ affectedPages: affected,
4381
+ pageCount: affected?.length
4382
+ }];
4383
+ },
4384
+ direct_answer_density: (c, pages) => {
4385
+ if (c.score >= 10) return [];
4386
+ const impact = impactFromScore(c.score);
4387
+ const effort = effortForCriterion("direct_answer_density", c.score);
4388
+ const affected = getAffectedPages("direct_answer_density", pages);
4389
+ return [{
4390
+ id: "fix-direct-answers",
4391
+ criterion: c.criterion_label,
4392
+ criterionId: c.criterion,
4393
+ title: "Add direct answer paragraphs",
4394
+ description: "Write concise, standalone answer paragraphs after question headings for AI engine citations.",
4395
+ impact,
4396
+ effort,
4397
+ impactScore: 0,
4398
+ category: "content",
4399
+ steps: [
4400
+ "Identify question-format headings on each page",
4401
+ "Write a 2-3 sentence direct answer immediately after each heading",
4402
+ "Ensure answers are self-contained (don't require context from other sections)",
4403
+ "Use bold for key facts within answer paragraphs"
4404
+ ],
4405
+ successCriteria: "Question headings are followed by direct, concise answer paragraphs",
4406
+ affectedPages: affected,
4407
+ pageCount: affected?.length
4408
+ }];
4409
+ },
4410
+ content_licensing: (c) => {
4411
+ if (c.score >= 10) return [];
4412
+ const impact = impactFromScore(c.score);
4413
+ const effort = effortForCriterion("content_licensing", c.score);
4414
+ return [{
4415
+ id: "fix-content-licensing",
4416
+ criterion: c.criterion_label,
4417
+ criterionId: c.criterion,
4418
+ title: "Add ai.txt and content licensing",
4419
+ description: "Create an /ai.txt file specifying AI usage permissions and add license schema to structured data.",
4420
+ impact,
4421
+ effort,
4422
+ impactScore: 0,
4423
+ category: "trust",
4424
+ steps: [
4425
+ "Create ai.txt at domain root with usage permissions",
4426
+ "Specify allowed AI uses (training, citation, summarization)",
4427
+ "Add license information to schema markup",
4428
+ "Consider a content licensing page linked from footer"
4429
+ ],
4430
+ codeExample: `# ai.txt - AI Usage Policy for example.com
4431
+
4432
+ User-Agent: *
4433
+ Allow: /blog/
4434
+ Allow: /docs/
4435
+
4436
+ # Permissions
4437
+ Training: yes
4438
+ Citation: yes with attribution
4439
+ Summarization: yes`,
4440
+ successCriteria: "/ai.txt returns 200 with clear AI usage permissions"
4441
+ }];
4442
+ },
4443
+ author_schema_depth: (c) => {
4444
+ if (c.score >= 10) return [];
4445
+ const impact = impactFromScore(c.score);
4446
+ const effort = effortForCriterion("author_schema_depth", c.score);
4447
+ return [{
4448
+ id: "fix-author-schema",
4449
+ criterion: c.criterion_label,
4450
+ criterionId: c.criterion,
4451
+ title: "Enhance author and expert schema",
4452
+ description: "Add Person schema for content authors with credentials and sameAs links for E-E-A-T signals.",
4453
+ impact,
4454
+ effort,
4455
+ impactScore: 0,
4456
+ category: "trust",
4457
+ steps: [
4458
+ "Create author profile pages for content creators",
4459
+ "Add Person schema with name, jobTitle, credentials, sameAs",
4460
+ "Link articles to author profiles via schema author property",
4461
+ "Include author bio and expertise on article pages"
4462
+ ],
4463
+ successCriteria: "Articles have Person schema for authors with credentials"
4464
+ }];
4465
+ },
4466
+ fact_density: (c, pages) => {
4467
+ if (c.score >= 10) return [];
4468
+ const impact = impactFromScore(c.score);
4469
+ const effort = effortForCriterion("fact_density", c.score);
4470
+ const affected = getAffectedPages("fact_density", pages);
4471
+ return [{
4472
+ id: "fix-fact-density",
4473
+ criterion: c.criterion_label,
4474
+ criterionId: c.criterion,
4475
+ title: "Increase fact and data density",
4476
+ description: "Add specific numbers, percentages, statistics, and data points that AI engines can cite.",
4477
+ impact,
4478
+ effort,
4479
+ impactScore: 0,
4480
+ category: "content",
4481
+ steps: [
4482
+ "Review content for vague claims and replace with specific data",
4483
+ "Add statistics, percentages, and measurable outcomes",
4484
+ "Include source citations for data points",
4485
+ "Add data tables or comparison charts where appropriate"
4486
+ ],
4487
+ successCriteria: "Key pages contain at least 3 specific data points per 500 words",
4488
+ affectedPages: affected,
4489
+ pageCount: affected?.length
4490
+ }];
4491
+ },
4492
+ canonical_url: (c, pages) => {
4493
+ if (c.score >= 10) return [];
4494
+ const impact = impactFromScore(c.score);
4495
+ const effort = effortForCriterion("canonical_url", c.score);
4496
+ const affected = getAffectedPages("canonical_url", pages);
4497
+ return [{
4498
+ id: "fix-canonical-url",
4499
+ criterion: c.criterion_label,
4500
+ criterionId: c.criterion,
4501
+ title: "Fix canonical URL strategy",
4502
+ description: 'Add rel="canonical" tags to all pages to prevent duplicate content confusion.',
4503
+ impact,
4504
+ effort,
4505
+ impactScore: 0,
4506
+ category: "structure",
4507
+ steps: [
4508
+ 'Add <link rel="canonical"> to every page pointing to preferred URL',
4509
+ "Ensure canonical URLs use consistent scheme (https) and format",
4510
+ "Handle www vs non-www with proper redirects",
4511
+ "Set canonical for paginated content to the main page"
4512
+ ],
4513
+ codeExample: `<link rel="canonical" href="https://example.com/page" />`,
4514
+ successCriteria: 'All pages have rel="canonical" pointing to the correct URL',
4515
+ affectedPages: affected,
4516
+ pageCount: affected?.length
4517
+ }];
4518
+ },
4519
+ content_velocity: (c) => {
4520
+ if (c.score >= 10) return [];
4521
+ const impact = impactFromScore(c.score);
4522
+ const effort = effortForCriterion("content_velocity", c.score);
4523
+ return [{
4524
+ id: "fix-content-velocity",
4525
+ criterion: c.criterion_label,
4526
+ criterionId: c.criterion,
4527
+ title: "Increase publishing frequency",
4528
+ description: "Establish a regular content publishing cadence to signal active, current information to AI engines.",
4529
+ impact,
4530
+ effort,
4531
+ impactScore: 0,
4532
+ category: "content",
4533
+ steps: [
4534
+ "Set a publishing schedule (weekly or bi-weekly minimum)",
4535
+ "Create a content calendar covering key topics",
4536
+ "Update sitemap and RSS feed with each new publish",
4537
+ "Refresh existing evergreen content with current data"
4538
+ ],
4539
+ successCriteria: "At least 2 new or updated content pages per month with dated entries"
4540
+ }];
4541
+ },
4542
+ schema_coverage: (c) => {
4543
+ if (c.score >= 10) return [];
4544
+ const impact = impactFromScore(c.score);
4545
+ const effort = effortForCriterion("schema_coverage", c.score);
4546
+ return [{
4547
+ id: "fix-schema-coverage",
4548
+ criterion: c.criterion_label,
4549
+ criterionId: c.criterion,
4550
+ title: "Extend schema to inner pages",
4551
+ description: "Add page-specific structured data beyond the homepage to articles, services, and product pages.",
4552
+ impact,
4553
+ effort,
4554
+ impactScore: 0,
4555
+ category: "trust",
4556
+ steps: [
4557
+ "Add Article schema to blog/news pages",
4558
+ "Add Service/Product schema to service/product pages",
4559
+ "Add BreadcrumbList schema to all inner pages",
4560
+ "Validate each page type with Rich Results Test"
4561
+ ],
4562
+ successCriteria: "At least 50% of content pages have page-specific schema",
4563
+ dependsOn: ["fix-schema-markup"]
4564
+ }];
4565
+ },
4566
+ speakable_schema: (c) => {
4567
+ if (c.score >= 10) return [];
4568
+ const impact = impactFromScore(c.score);
4569
+ const effort = effortForCriterion("speakable_schema", c.score);
4570
+ return [{
4571
+ id: "fix-speakable-schema",
4572
+ criterion: c.criterion_label,
4573
+ criterionId: c.criterion,
4574
+ title: "Add SpeakableSpecification schema",
4575
+ description: "Add Speakable schema to tell voice assistants which content sections are best for spoken answers.",
4576
+ impact,
4577
+ effort,
4578
+ impactScore: 0,
4579
+ category: "trust",
4580
+ steps: [
4581
+ "Identify key paragraphs suitable for voice readout",
4582
+ "Add SpeakableSpecification with CSS selectors to Article schema",
4583
+ "Point speakable selectors to headline and summary paragraphs",
4584
+ "Test with Google structured data testing tool"
4585
+ ],
4586
+ codeExample: `"speakable": {
4587
+ "@type": "SpeakableSpecification",
4588
+ "cssSelector": [
4589
+ ".article-headline",
4590
+ ".article-summary"
4591
+ ]
4592
+ }`,
4593
+ successCriteria: "Article pages include SpeakableSpecification in schema",
4594
+ dependsOn: ["fix-schema-markup"]
4595
+ }];
4596
+ },
4597
+ query_answer_alignment: (c, pages) => {
4598
+ if (c.score >= 10) return [];
4599
+ const impact = impactFromScore(c.score);
4600
+ const effort = effortForCriterion("query_answer_alignment", c.score);
4601
+ const affected = getAffectedPages("query_answer_alignment", pages);
4602
+ return [{
4603
+ id: "fix-query-answer-alignment",
4604
+ criterion: c.criterion_label,
4605
+ criterionId: c.criterion,
4606
+ title: "Improve query-answer alignment",
4607
+ description: "Ensure question headings are followed by direct, concise answers in the first paragraph.",
4608
+ impact,
4609
+ effort,
4610
+ impactScore: 0,
4611
+ category: "content",
4612
+ steps: [
4613
+ "Audit question-format headings and their following paragraphs",
4614
+ "Add direct answers in the first 1-2 sentences after each question heading",
4615
+ "Remove filler text between question and answer",
4616
+ "Ensure answers are self-contained and citable"
4617
+ ],
4618
+ successCriteria: "Question headings have direct answer paragraphs within 50 words",
4619
+ affectedPages: affected,
4620
+ pageCount: affected?.length
4621
+ }];
4622
+ },
4623
+ content_cannibalization: (c, pages, linkGraph) => {
4624
+ if (c.score >= 10) return [];
4625
+ const impact = impactFromScore(c.score);
4626
+ const effort = effortForCriterion("content_cannibalization", c.score);
4627
+ const fixes = [];
4628
+ if (linkGraph && linkGraph.clusters.length > 0) {
4629
+ const lowCohesion = linkGraph.clusters.filter((cl) => cl.cohesion < 50);
4630
+ if (lowCohesion.length > 0) {
4631
+ const affected = lowCohesion.flatMap((cl) => [cl.pillarUrl, ...cl.spokes]);
4632
+ fixes.push({
4633
+ id: "fix-content-cannibalization-overlap",
4634
+ criterion: c.criterion_label,
4635
+ criterionId: c.criterion,
4636
+ title: "Consolidate overlapping content",
4637
+ description: `${lowCohesion.length} content clusters have low cohesion, suggesting pages compete for the same topics.`,
4638
+ impact,
4639
+ effort,
4640
+ impactScore: 0,
4641
+ category: "content",
4642
+ steps: [
4643
+ "Identify pages targeting the same keywords or topics",
4644
+ "Merge overlapping pages into single authoritative pages",
4645
+ "Set up 301 redirects from merged pages to consolidated page",
4646
+ "Differentiate remaining similar pages with distinct angles"
4647
+ ],
4648
+ successCriteria: "No two pages target the same primary keyword or topic",
4649
+ affectedPages: affected.slice(0, 20),
4650
+ pageCount: affected.length
4651
+ });
4652
+ }
4653
+ }
4654
+ if (fixes.length === 0) {
4655
+ const affected = getAffectedPages("content_cannibalization", pages);
4656
+ fixes.push({
4657
+ id: "fix-content-cannibalization",
4658
+ criterion: c.criterion_label,
4659
+ criterionId: c.criterion,
4660
+ title: "Resolve content cannibalization",
4661
+ description: "Multiple pages may be targeting the same topics, diluting AI engine citations.",
4662
+ impact,
4663
+ effort,
4664
+ impactScore: 0,
4665
+ category: "content",
4666
+ steps: [
4667
+ "Audit pages for overlapping topic coverage",
4668
+ "Consolidate similar pages into comprehensive single pages",
4669
+ "Differentiate remaining pages with distinct angles and keywords",
4670
+ "Add canonical tags to prevent duplicate content issues"
4671
+ ],
4672
+ successCriteria: "Each topic is covered by a single authoritative page",
4673
+ affectedPages: affected,
4674
+ pageCount: affected?.length
4675
+ });
4676
+ }
4677
+ return fixes;
4678
+ },
4679
+ visible_date_signal: (c, pages) => {
4680
+ if (c.score >= 10) return [];
4681
+ const impact = impactFromScore(c.score);
4682
+ const effort = effortForCriterion("visible_date_signal", c.score);
4683
+ const affected = getAffectedPages("visible_date_signal", pages);
4684
+ return [{
4685
+ id: "fix-visible-dates",
4686
+ criterion: c.criterion_label,
4687
+ criterionId: c.criterion,
4688
+ title: "Add visible date signals",
4689
+ description: "Add visible publication and modification dates using <time> elements for AI engine freshness assessment.",
4690
+ impact,
4691
+ effort,
4692
+ impactScore: 0,
4693
+ category: "content",
4694
+ steps: [
4695
+ 'Add visible "Published" and "Last updated" dates to content pages',
4696
+ "Use <time> elements with datetime attributes",
4697
+ "Ensure dates match dateModified in schema markup",
4698
+ "Update dates when content is refreshed"
4699
+ ],
4700
+ codeExample: `<time datetime="2024-01-15">January 15, 2024</time>`,
4701
+ successCriteria: "Content pages show visible dates with <time> elements",
4702
+ affectedPages: affected,
4703
+ pageCount: affected?.length
4704
+ }];
4705
+ }
4706
+ };
4707
+ function generateFixPlan(domain, overallScore, criteria, pagesReviewed, linkGraph) {
4708
+ const allFixes = [];
4709
+ for (const criterion of criteria) {
4710
+ const generator = FIX_GENERATORS[criterion.criterion];
4711
+ if (!generator) continue;
4712
+ const fixes = generator(criterion, pagesReviewed, linkGraph);
4713
+ for (const fix of fixes) {
4714
+ const weight = CRITERION_WEIGHTS2[criterion.criterion] ?? 0.05;
4715
+ fix.impactScore = Math.round((10 - criterion.score) * weight * 100);
4716
+ allFixes.push(fix);
4717
+ }
4718
+ }
4719
+ const phases = PHASE_CONFIG.map((config) => {
4720
+ const phaseFixes = allFixes.filter((fix) => config.criteria.includes(fix.criterionId)).sort((a, b) => b.impactScore - a.impactScore);
4721
+ return {
4722
+ phase: config.phase,
4723
+ title: config.title,
4724
+ description: config.description,
4725
+ fixes: phaseFixes,
4726
+ estimatedImpact: 0
4727
+ // calculated after projected score
4728
+ };
4729
+ });
4730
+ for (const phase of phases) {
4731
+ for (const fix of phase.fixes) {
4732
+ if (!fix.dependsOn) continue;
4733
+ for (const depId of fix.dependsOn) {
4734
+ const depPhase = phases.find((p) => p.fixes.some((f) => f.id === depId));
4735
+ if (depPhase && depPhase.phase > phase.phase) {
4736
+ phase.fixes = phase.fixes.filter((f) => f.id !== fix.id);
4737
+ depPhase.fixes.push(fix);
4738
+ depPhase.fixes.sort((a, b) => b.impactScore - a.impactScore);
4739
+ break;
4740
+ }
4741
+ }
4742
+ }
4743
+ }
4744
+ const totalWeight = Object.values(CRITERION_WEIGHTS2).reduce((s, w) => s + w, 0);
4745
+ const bestDeltaPerCriterion = /* @__PURE__ */ new Map();
4746
+ for (const fix of allFixes) {
4747
+ const criterion = criteria.find((c) => c.criterion === fix.criterionId);
4748
+ if (!criterion) continue;
4749
+ const weight = CRITERION_WEIGHTS2[fix.criterionId] ?? 0.05;
4750
+ let targetScore;
4751
+ switch (fix.effort) {
4752
+ case "trivial":
4753
+ case "low":
4754
+ targetScore = 8;
4755
+ break;
4756
+ case "medium":
4757
+ targetScore = 7;
4758
+ break;
4759
+ case "high":
4760
+ targetScore = 6;
4761
+ break;
4762
+ }
4763
+ const improvement = Math.max(0, targetScore - criterion.score);
4764
+ const delta = improvement * weight / totalWeight * 100;
4765
+ const existing = bestDeltaPerCriterion.get(fix.criterionId) ?? 0;
4766
+ if (delta > existing) bestDeltaPerCriterion.set(fix.criterionId, delta);
4767
+ }
4768
+ const scoreDelta = Array.from(bestDeltaPerCriterion.values()).reduce((s, d) => s + d, 0);
4769
+ const projectedScore = Math.min(100, Math.round(overallScore + scoreDelta));
4770
+ for (const phase of phases) {
4771
+ let phaseImpact = 0;
4772
+ const seenCriteria = /* @__PURE__ */ new Set();
4773
+ for (const fix of phase.fixes) {
4774
+ if (seenCriteria.has(fix.criterionId)) continue;
4775
+ seenCriteria.add(fix.criterionId);
4776
+ const criterion = criteria.find((c) => c.criterion === fix.criterionId);
4777
+ if (!criterion) continue;
4778
+ const weight = CRITERION_WEIGHTS2[fix.criterionId] ?? 0.05;
4779
+ let targetScore;
4780
+ switch (fix.effort) {
4781
+ case "trivial":
4782
+ case "low":
4783
+ targetScore = 8;
4784
+ break;
4785
+ case "medium":
4786
+ targetScore = 7;
4787
+ break;
4788
+ case "high":
4789
+ targetScore = 6;
4790
+ break;
4791
+ }
4792
+ const improvement = Math.max(0, targetScore - criterion.score);
4793
+ phaseImpact += improvement * weight / totalWeight * 100;
4794
+ }
4795
+ phase.estimatedImpact = Math.round(phaseImpact);
4796
+ }
4797
+ const quickWins = allFixes.filter(
4798
+ (f) => (f.effort === "trivial" || f.effort === "low") && (f.impact === "critical" || f.impact === "high")
4799
+ );
4800
+ const summary = {
4801
+ criticalCount: allFixes.filter((f) => f.impact === "critical").length,
4802
+ highCount: allFixes.filter((f) => f.impact === "high").length,
4803
+ mediumCount: allFixes.filter((f) => f.impact === "medium").length,
4804
+ lowCount: allFixes.filter((f) => f.impact === "low").length,
4805
+ quickWinCount: quickWins.length,
4806
+ topOpportunity: allFixes.length > 0 ? allFixes.sort((a, b) => b.impactScore - a.impactScore)[0].title : "None",
4807
+ estimatedTotalEffort: formatEffort(allFixes.reduce((s, f) => s + effortToHours(f.effort), 0))
4808
+ };
4809
+ return {
4810
+ domain,
4811
+ generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
4812
+ overallScore,
4813
+ projectedScore,
4814
+ totalFixes: allFixes.length,
4815
+ phases,
4816
+ quickWins,
4817
+ summary
4818
+ };
4819
+ }
4820
+ function formatEffort(hours) {
4821
+ if (hours < 1) return "<1h";
4822
+ return `~${Math.round(hours)}h`;
4823
+ }
4824
+
3159
4825
  // src/html-report.ts
3160
4826
  function scoreColor(score) {
3161
4827
  if (score <= 40) return "#F44336";
@@ -3265,17 +4931,22 @@ function generateHtmlReport(result) {
3265
4931
  <td>${escapeHtml(opp.description)}</td>
3266
4932
  </tr>`;
3267
4933
  }).join("\n");
3268
- const pagesRows = (result.pagesReviewed || []).map((page) => {
4934
+ const pagesReviewed = result.pagesReviewed || [];
4935
+ const pagesRows = pagesReviewed.map((page) => {
3269
4936
  const issueCount = page.issues.length;
3270
4937
  const strengthCount = page.strengths.length;
4938
+ const aeoDisplay = page.aeoScore != null ? `<span style="font-weight:600;color:${scoreColor(page.aeoScore)}">${page.aeoScore}</span>` : "-";
3271
4939
  return `<tr>
3272
4940
  <td>${escapeHtml(page.url)}</td>
3273
4941
  <td>${escapeHtml(page.category)}</td>
3274
4942
  <td>${page.wordCount}</td>
4943
+ <td>${aeoDisplay}</td>
3275
4944
  <td>${issueCount}</td>
3276
4945
  <td>${strengthCount}</td>
3277
4946
  </tr>`;
3278
4947
  }).join("\n");
4948
+ const scoredPages = pagesReviewed.filter((p) => p.aeoScore != null);
4949
+ const avgPageScore = scoredPages.length > 0 ? Math.round(scoredPages.reduce((sum, p) => sum + p.aeoScore, 0) / scoredPages.length) : null;
3279
4950
  const now = (/* @__PURE__ */ new Date()).toISOString();
3280
4951
  return `<!DOCTYPE html>
3281
4952
  <html lang="en">
@@ -3311,10 +4982,11 @@ function generateHtmlReport(result) {
3311
4982
  </table>
3312
4983
  ` : ""}
3313
4984
 
3314
- ${(result.pagesReviewed || []).length > 0 ? `
3315
- <h2 class="section-title">Pages Reviewed (${(result.pagesReviewed || []).length})</h2>
4985
+ ${pagesReviewed.length > 0 ? `
4986
+ <h2 class="section-title">Pages Reviewed (${pagesReviewed.length})</h2>
4987
+ ${avgPageScore != null ? `<div class="summary-box"><div class="summary-stat"><div class="num" style="color:${scoreColor(avgPageScore)}">${avgPageScore}</div><div class="label">Avg Page AEO Score</div></div></div>` : ""}
3316
4988
  <table>
3317
- <thead><tr><th>URL</th><th>Category</th><th>Words</th><th>Issues</th><th>Strengths</th></tr></thead>
4989
+ <thead><tr><th>URL</th><th>Category</th><th>Words</th><th>AEO Score</th><th>Issues</th><th>Strengths</th></tr></thead>
3318
4990
  <tbody>${pagesRows}</tbody>
3319
4991
  </table>
3320
4992
  ` : ""}
@@ -3486,21 +5158,28 @@ async function compare(domainA, domainB, options) {
3486
5158
  audit,
3487
5159
  auditSiteFromData,
3488
5160
  buildDetailedFindings,
5161
+ buildLinkGraph,
3489
5162
  buildScorecard,
5163
+ calculateDepths,
3490
5164
  calculateOverallScore,
3491
5165
  classifyRendering,
3492
5166
  compare,
3493
5167
  crawlFullSite,
5168
+ detectClusters,
5169
+ detectHubs,
3494
5170
  detectParkedDomain,
5171
+ detectPillars,
3495
5172
  extractAllUrlsFromSitemap,
3496
5173
  extractContentPagesFromSitemap,
3497
5174
  extractInternalLinks,
5175
+ extractLinksWithAnchors,
3498
5176
  extractNavLinks,
3499
5177
  extractRawDataSummary,
3500
5178
  fetchMultiPageData,
3501
5179
  fetchWithHeadless,
3502
5180
  generateBottomLine,
3503
5181
  generateComparisonHtmlReport,
5182
+ generateFixPlan,
3504
5183
  generateHtmlReport,
3505
5184
  generateOpportunities,
3506
5185
  generatePitchNumbers,
@@ -3508,6 +5187,9 @@ async function compare(domainA, domainB, options) {
3508
5187
  inferCategory,
3509
5188
  isSpaShell,
3510
5189
  prefetchSiteData,
3511
- scoreToStatus
5190
+ scoreAllPages,
5191
+ scorePage,
5192
+ scoreToStatus,
5193
+ serializeLinkGraph
3512
5194
  });
3513
5195
  //# sourceMappingURL=index.cjs.map