aeorank 3.0.3 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -23
- package/dist/browser.d.ts +3 -3
- package/dist/browser.js +450 -24
- package/dist/browser.js.map +1 -1
- package/dist/cli.js +386 -20
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +450 -24
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +3 -3
- package/dist/index.d.ts +3 -3
- package/dist/index.js +450 -24
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
package/dist/index.cjs
CHANGED
|
@@ -2942,6 +2942,234 @@ function checkImageContextAI(data) {
|
|
|
2942
2942
|
}
|
|
2943
2943
|
return { criterion: "image_context_ai", criterion_label: "Image Context for AI", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P2" };
|
|
2944
2944
|
}
|
|
2945
|
+
var BOILERPLATE_RE = /\b(sign up|subscribe|get started|contact us|request a demo|free trial|book a call|schedule a|learn more|click here|follow us|share this|copyright|all rights reserved|privacy policy|terms of service)\b/i;
|
|
2946
|
+
function isBoilerplateParagraph(text) {
|
|
2947
|
+
const words = text.split(/\s+/).length;
|
|
2948
|
+
if (words < 20 && BOILERPLATE_RE.test(text)) return true;
|
|
2949
|
+
if (/\b(cookie|gdpr|consent|opt.out)\b/i.test(text) && words < 30) return true;
|
|
2950
|
+
return false;
|
|
2951
|
+
}
|
|
2952
|
+
function toShingles(text, n = 4) {
|
|
2953
|
+
const words = text.split(/\s+/).filter((w) => w.length > 1);
|
|
2954
|
+
const shingles = /* @__PURE__ */ new Set();
|
|
2955
|
+
for (let i = 0; i <= words.length - n; i++) {
|
|
2956
|
+
shingles.add(words.slice(i, i + n).join(" "));
|
|
2957
|
+
}
|
|
2958
|
+
return shingles;
|
|
2959
|
+
}
|
|
2960
|
+
function shingleSimilarity(a, b) {
|
|
2961
|
+
if (a.size === 0 && b.size === 0) return 0;
|
|
2962
|
+
let intersection = 0;
|
|
2963
|
+
for (const s of a) {
|
|
2964
|
+
if (b.has(s)) intersection++;
|
|
2965
|
+
}
|
|
2966
|
+
const union = a.size + b.size - intersection;
|
|
2967
|
+
return union === 0 ? 0 : intersection / union;
|
|
2968
|
+
}
|
|
2969
|
+
function extractPageParagraphs(html) {
|
|
2970
|
+
const cleaned = html.replace(/<(script|style|nav|header|footer|noscript)\b[^>]*>[\s\S]*?<\/\1>/gi, "").replace(/<aside\b[^>]*>[\s\S]*?<\/aside>/gi, "");
|
|
2971
|
+
const pMatches = cleaned.match(/<p\b[^>]*>([\s\S]*?)<\/p>/gi) || [];
|
|
2972
|
+
return pMatches.map((p) => {
|
|
2973
|
+
const text = p.replace(/<[^>]*>/g, " ").replace(/&\w+;/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
|
|
2974
|
+
return { text, shingles: toShingles(text) };
|
|
2975
|
+
}).filter((p) => p.shingles.size >= 3 && !isBoilerplateParagraph(p.text));
|
|
2976
|
+
}
|
|
2977
|
+
function splitIntoSectionsWithParagraphs(html) {
|
|
2978
|
+
const cleaned = html.replace(/<(script|style|nav|header|footer|noscript)\b[^>]*>[\s\S]*?<\/\1>/gi, "").replace(/<aside\b[^>]*>[\s\S]*?<\/aside>/gi, "");
|
|
2979
|
+
const parts = cleaned.split(/(?=<h[23]\b[^>]*>)/i);
|
|
2980
|
+
const sections = [];
|
|
2981
|
+
for (const part of parts) {
|
|
2982
|
+
const hMatch = part.match(/<h[23]\b[^>]*>([\s\S]*?)<\/h[23]>/i);
|
|
2983
|
+
const heading = hMatch ? hMatch[1].replace(/<[^>]*>/g, "").trim() : "(intro)";
|
|
2984
|
+
const pMatches = part.match(/<p\b[^>]*>([\s\S]*?)<\/p>/gi) || [];
|
|
2985
|
+
const paragraphs = pMatches.map((p) => {
|
|
2986
|
+
const text = p.replace(/<[^>]*>/g, " ").replace(/&\w+;/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
|
|
2987
|
+
return { text, shingles: toShingles(text) };
|
|
2988
|
+
}).filter((p) => p.shingles.size >= 3 && !isBoilerplateParagraph(p.text));
|
|
2989
|
+
if (paragraphs.length > 0) sections.push({ heading, paragraphs });
|
|
2990
|
+
}
|
|
2991
|
+
return sections;
|
|
2992
|
+
}
|
|
2993
|
+
function findIntraPageDuplicates(html) {
|
|
2994
|
+
const sections = splitIntoSectionsWithParagraphs(html);
|
|
2995
|
+
if (sections.length < 2) return [];
|
|
2996
|
+
const pairs = [];
|
|
2997
|
+
for (let i = 0; i < sections.length; i++) {
|
|
2998
|
+
for (let j = i + 1; j < sections.length; j++) {
|
|
2999
|
+
let found = false;
|
|
3000
|
+
for (const pA of sections[i].paragraphs) {
|
|
3001
|
+
if (found) break;
|
|
3002
|
+
for (const pB of sections[j].paragraphs) {
|
|
3003
|
+
const sim = shingleSimilarity(pA.shingles, pB.shingles);
|
|
3004
|
+
if (sim > 0.4) {
|
|
3005
|
+
pairs.push({
|
|
3006
|
+
headingA: sections[i].heading,
|
|
3007
|
+
headingB: sections[j].heading,
|
|
3008
|
+
similarity: Math.round(sim * 100),
|
|
3009
|
+
sample: pA.text.slice(0, 80)
|
|
3010
|
+
});
|
|
3011
|
+
found = true;
|
|
3012
|
+
break;
|
|
3013
|
+
}
|
|
3014
|
+
}
|
|
3015
|
+
}
|
|
3016
|
+
}
|
|
3017
|
+
}
|
|
3018
|
+
return pairs;
|
|
3019
|
+
}
|
|
3020
|
+
function checkDuplicateContent(data) {
|
|
3021
|
+
const findings = [];
|
|
3022
|
+
const pages = [];
|
|
3023
|
+
if (data.homepage) {
|
|
3024
|
+
pages.push({ html: data.homepage.text, url: data.homepage.finalUrl || `https://${data.domain}/` });
|
|
3025
|
+
}
|
|
3026
|
+
if (data.blogSample) {
|
|
3027
|
+
for (const page of data.blogSample) {
|
|
3028
|
+
pages.push({ html: page.text, url: page.finalUrl || "" });
|
|
3029
|
+
}
|
|
3030
|
+
}
|
|
3031
|
+
if (pages.length === 0) {
|
|
3032
|
+
findings.push({ severity: "critical", detail: "No pages available for duplicate content analysis" });
|
|
3033
|
+
return { criterion: "duplicate_content", criterion_label: "Duplicate Content Blocks", score: 0, status: "fail", findings, fix_priority: "P1" };
|
|
3034
|
+
}
|
|
3035
|
+
let totalDupPages = 0;
|
|
3036
|
+
let totalDupPairs = 0;
|
|
3037
|
+
const dupDetails = [];
|
|
3038
|
+
for (const page of pages) {
|
|
3039
|
+
const pairs = findIntraPageDuplicates(page.html);
|
|
3040
|
+
if (pairs.length > 0) {
|
|
3041
|
+
totalDupPages++;
|
|
3042
|
+
totalDupPairs += pairs.length;
|
|
3043
|
+
dupDetails.push({ url: page.url, pairs });
|
|
3044
|
+
}
|
|
3045
|
+
}
|
|
3046
|
+
const dupRatio = totalDupPages / pages.length;
|
|
3047
|
+
let score;
|
|
3048
|
+
if (totalDupPairs === 0) {
|
|
3049
|
+
score = 10;
|
|
3050
|
+
findings.push({ severity: "info", detail: `${pages.length} pages analyzed - no duplicate content blocks detected` });
|
|
3051
|
+
} else if (dupRatio <= 0.05 && totalDupPairs <= 2) {
|
|
3052
|
+
score = 9;
|
|
3053
|
+
findings.push({ severity: "info", detail: `${totalDupPairs} duplicate block pair(s) on ${totalDupPages} page(s) - minor` });
|
|
3054
|
+
} else if (dupRatio <= 0.1) {
|
|
3055
|
+
score = 7;
|
|
3056
|
+
findings.push({ severity: "low", detail: `${totalDupPairs} duplicate block pair(s) across ${totalDupPages} page(s)`, fix: "Rewrite duplicate sections to provide unique content in each" });
|
|
3057
|
+
} else if (dupRatio <= 0.2) {
|
|
3058
|
+
score = 5;
|
|
3059
|
+
findings.push({ severity: "medium", detail: `${totalDupPages} pages (${Math.round(dupRatio * 100)}%) contain duplicate content blocks`, fix: "Rewrite or remove repeated text blocks - LLMs may flag this as low-quality content" });
|
|
3060
|
+
} else if (dupRatio <= 0.4) {
|
|
3061
|
+
score = 3;
|
|
3062
|
+
findings.push({ severity: "medium", detail: `${totalDupPages} pages (${Math.round(dupRatio * 100)}%) have significant duplicate content`, fix: "Widespread duplicate blocks reduce content authority - rewrite each section with unique angles" });
|
|
3063
|
+
} else {
|
|
3064
|
+
score = 0;
|
|
3065
|
+
findings.push({ severity: "high", detail: `${totalDupPages} pages (${Math.round(dupRatio * 100)}%) contain duplicate content blocks`, fix: "Severe content duplication across the site - LLMs will likely reduce citation authority" });
|
|
3066
|
+
}
|
|
3067
|
+
for (const dup of dupDetails.slice(0, 3)) {
|
|
3068
|
+
const shortUrl = dup.url.slice(0, 60);
|
|
3069
|
+
for (const pair of dup.pairs.slice(0, 2)) {
|
|
3070
|
+
findings.push({
|
|
3071
|
+
severity: "low",
|
|
3072
|
+
detail: `${shortUrl}: '${pair.headingA}' and '${pair.headingB}' share ${pair.similarity}% similar text ("${pair.sample}...")`,
|
|
3073
|
+
fix: `Rewrite one of these sections to eliminate duplicate content`
|
|
3074
|
+
});
|
|
3075
|
+
}
|
|
3076
|
+
}
|
|
3077
|
+
return { criterion: "duplicate_content", criterion_label: "Duplicate Content Blocks", score, status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P1" };
|
|
3078
|
+
}
|
|
3079
|
+
function checkCrossPageDuplication(data) {
|
|
3080
|
+
const findings = [];
|
|
3081
|
+
const pages = [];
|
|
3082
|
+
if (data.homepage) {
|
|
3083
|
+
pages.push({ url: data.homepage.finalUrl || `https://${data.domain}/`, paragraphs: extractPageParagraphs(data.homepage.text) });
|
|
3084
|
+
}
|
|
3085
|
+
if (data.blogSample) {
|
|
3086
|
+
for (const page of data.blogSample) {
|
|
3087
|
+
pages.push({ url: page.finalUrl || "", paragraphs: extractPageParagraphs(page.text) });
|
|
3088
|
+
}
|
|
3089
|
+
}
|
|
3090
|
+
if (pages.length <= 1) {
|
|
3091
|
+
findings.push({ severity: "info", detail: "Not enough pages to assess cross-page duplication" });
|
|
3092
|
+
return { criterion: "cross_page_duplication", criterion_label: "Cross-Page Duplicate Content", score: 5, status: "partial", findings, fix_priority: "P3" };
|
|
3093
|
+
}
|
|
3094
|
+
const paragraphPageCount = /* @__PURE__ */ new Map();
|
|
3095
|
+
for (const page of pages) {
|
|
3096
|
+
const seen = /* @__PURE__ */ new Set();
|
|
3097
|
+
for (const p of page.paragraphs) {
|
|
3098
|
+
const fp = [...p.shingles].slice(0, 5).join("|");
|
|
3099
|
+
if (!seen.has(fp)) {
|
|
3100
|
+
seen.add(fp);
|
|
3101
|
+
paragraphPageCount.set(fp, (paragraphPageCount.get(fp) || 0) + 1);
|
|
3102
|
+
}
|
|
3103
|
+
}
|
|
3104
|
+
}
|
|
3105
|
+
const boilerplateThreshold = Math.max(3, pages.length * 0.4);
|
|
3106
|
+
const siteBoilerprints = /* @__PURE__ */ new Set();
|
|
3107
|
+
for (const [fp, count] of paragraphPageCount) {
|
|
3108
|
+
if (count >= boilerplateThreshold) siteBoilerprints.add(fp);
|
|
3109
|
+
}
|
|
3110
|
+
const crossDupPairs = [];
|
|
3111
|
+
for (let i = 0; i < pages.length; i++) {
|
|
3112
|
+
for (let j = i + 1; j < pages.length; j++) {
|
|
3113
|
+
let dupCount = 0;
|
|
3114
|
+
let sample = "";
|
|
3115
|
+
for (const pA of pages[i].paragraphs) {
|
|
3116
|
+
const fpA = [...pA.shingles].slice(0, 5).join("|");
|
|
3117
|
+
if (siteBoilerprints.has(fpA)) continue;
|
|
3118
|
+
for (const pB of pages[j].paragraphs) {
|
|
3119
|
+
const sim = shingleSimilarity(pA.shingles, pB.shingles);
|
|
3120
|
+
if (sim > 0.4) {
|
|
3121
|
+
dupCount++;
|
|
3122
|
+
if (!sample) sample = pA.text.slice(0, 80);
|
|
3123
|
+
break;
|
|
3124
|
+
}
|
|
3125
|
+
}
|
|
3126
|
+
}
|
|
3127
|
+
if (dupCount >= 2) {
|
|
3128
|
+
crossDupPairs.push({
|
|
3129
|
+
urlA: pages[i].url.slice(0, 60),
|
|
3130
|
+
urlB: pages[j].url.slice(0, 60),
|
|
3131
|
+
dupCount,
|
|
3132
|
+
sample
|
|
3133
|
+
});
|
|
3134
|
+
}
|
|
3135
|
+
}
|
|
3136
|
+
}
|
|
3137
|
+
const affectedUrls = /* @__PURE__ */ new Set();
|
|
3138
|
+
for (const pair of crossDupPairs) {
|
|
3139
|
+
affectedUrls.add(pair.urlA);
|
|
3140
|
+
affectedUrls.add(pair.urlB);
|
|
3141
|
+
}
|
|
3142
|
+
const affectedRatio = pages.length > 0 ? affectedUrls.size / pages.length : 0;
|
|
3143
|
+
const totalDupParagraphs = crossDupPairs.reduce((s, p) => s + p.dupCount, 0);
|
|
3144
|
+
let score;
|
|
3145
|
+
if (crossDupPairs.length === 0) {
|
|
3146
|
+
score = 10;
|
|
3147
|
+
findings.push({ severity: "info", detail: `${pages.length} pages analyzed - no cross-page content duplication detected` });
|
|
3148
|
+
} else if (affectedRatio <= 0.05 && totalDupParagraphs <= 4) {
|
|
3149
|
+
score = 9;
|
|
3150
|
+
findings.push({ severity: "info", detail: `${totalDupParagraphs} shared paragraph(s) across ${affectedUrls.size} page(s) - minor` });
|
|
3151
|
+
} else if (affectedRatio <= 0.1) {
|
|
3152
|
+
score = 7;
|
|
3153
|
+
findings.push({ severity: "low", detail: `${totalDupParagraphs} shared paragraphs across ${affectedUrls.size} pages`, fix: "Rewrite shared content so each page provides a unique perspective" });
|
|
3154
|
+
} else if (affectedRatio <= 0.2) {
|
|
3155
|
+
score = 5;
|
|
3156
|
+
findings.push({ severity: "medium", detail: `${affectedUrls.size} pages (${Math.round(affectedRatio * 100)}%) share duplicate paragraphs`, fix: "Significant cross-page duplication - AI engines may only index one version" });
|
|
3157
|
+
} else if (affectedRatio <= 0.4) {
|
|
3158
|
+
score = 3;
|
|
3159
|
+
findings.push({ severity: "medium", detail: `${affectedUrls.size} pages (${Math.round(affectedRatio * 100)}%) contain shared content blocks`, fix: "Widespread copy-paste content across pages reduces overall site authority" });
|
|
3160
|
+
} else {
|
|
3161
|
+
score = 0;
|
|
3162
|
+
findings.push({ severity: "high", detail: `${affectedUrls.size} pages (${Math.round(affectedRatio * 100)}%) share duplicate content`, fix: "Severe cross-page duplication - AI engines will likely ignore redundant pages entirely" });
|
|
3163
|
+
}
|
|
3164
|
+
for (const pair of crossDupPairs.slice(0, 3)) {
|
|
3165
|
+
findings.push({
|
|
3166
|
+
severity: "low",
|
|
3167
|
+
detail: `${pair.dupCount} shared paragraph(s): ${pair.urlA} \u2194 ${pair.urlB} ("${pair.sample}...")`,
|
|
3168
|
+
fix: "Rewrite shared paragraphs so each page has unique content"
|
|
3169
|
+
});
|
|
3170
|
+
}
|
|
3171
|
+
return { criterion: "cross_page_duplication", criterion_label: "Cross-Page Duplicate Content", score, status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P1" };
|
|
3172
|
+
}
|
|
2945
3173
|
function auditSiteFromData(data) {
|
|
2946
3174
|
const topicCoherence = checkTopicCoherence(data);
|
|
2947
3175
|
const cannibalization = checkContentCannibalization(data, topicCoherence.score);
|
|
@@ -2980,7 +3208,10 @@ function auditSiteFromData(data) {
|
|
|
2980
3208
|
checkEvidencePackaging(data),
|
|
2981
3209
|
checkEntityDisambiguation(data),
|
|
2982
3210
|
checkExtractionFriction(data),
|
|
2983
|
-
checkImageContextAI(data)
|
|
3211
|
+
checkImageContextAI(data),
|
|
3212
|
+
// V3 criteria (#35-#36)
|
|
3213
|
+
checkDuplicateContent(data),
|
|
3214
|
+
checkCrossPageDuplication(data)
|
|
2984
3215
|
];
|
|
2985
3216
|
}
|
|
2986
3217
|
async function auditSite(targetUrl) {
|
|
@@ -3004,11 +3235,11 @@ var WEIGHTS = {
|
|
|
3004
3235
|
// Information density per page
|
|
3005
3236
|
direct_answer_density: 0.05,
|
|
3006
3237
|
// Direct answers to queries
|
|
3007
|
-
qa_content_format: 0.
|
|
3238
|
+
qa_content_format: 0.04,
|
|
3008
3239
|
// Answer-shaped content structure
|
|
3009
|
-
query_answer_alignment: 0.
|
|
3240
|
+
query_answer_alignment: 0.04,
|
|
3010
3241
|
// Relevance to actual AI queries
|
|
3011
|
-
faq_section: 0.
|
|
3242
|
+
faq_section: 0.03,
|
|
3012
3243
|
// Structured Q&A pairs
|
|
3013
3244
|
// ─── Content Organization (~30%) ──────────────────────────────────────────
|
|
3014
3245
|
// HOW easily AI engines can extract and trust your content.
|
|
@@ -3056,8 +3287,13 @@ var WEIGHTS = {
|
|
|
3056
3287
|
// Clear entity boundaries
|
|
3057
3288
|
extraction_friction: 0.02,
|
|
3058
3289
|
// Sentence length, voice, jargon
|
|
3059
|
-
image_context_ai: 0.01
|
|
3290
|
+
image_context_ai: 0.01,
|
|
3060
3291
|
// Figure/figcaption, alt text quality
|
|
3292
|
+
// ─── V3 Criteria ────────────────────────────────────────────────────────
|
|
3293
|
+
duplicate_content: 0.05,
|
|
3294
|
+
// Duplicate text blocks within pages
|
|
3295
|
+
cross_page_duplication: 0.03
|
|
3296
|
+
// Same paragraphs copied across pages
|
|
3061
3297
|
};
|
|
3062
3298
|
function calculateOverallScore(criteria) {
|
|
3063
3299
|
let totalWeight = 0;
|
|
@@ -3187,7 +3423,9 @@ var PILLARS = {
|
|
|
3187
3423
|
"fact_density",
|
|
3188
3424
|
"citation_ready_writing",
|
|
3189
3425
|
"answer_first_placement",
|
|
3190
|
-
"evidence_packaging"
|
|
3426
|
+
"evidence_packaging",
|
|
3427
|
+
"duplicate_content",
|
|
3428
|
+
"cross_page_duplication"
|
|
3191
3429
|
],
|
|
3192
3430
|
"Content Structure": [
|
|
3193
3431
|
"direct_answer_density",
|
|
@@ -3252,6 +3490,8 @@ var CLIENT_NAMES = {
|
|
|
3252
3490
|
image_context_ai: "Image Context for AI",
|
|
3253
3491
|
schema_coverage: "Schema Coverage",
|
|
3254
3492
|
speakable_schema: "Speakable Schema",
|
|
3493
|
+
duplicate_content: "Duplicate Content Blocks",
|
|
3494
|
+
cross_page_duplication: "Cross-Page Duplicate Content",
|
|
3255
3495
|
content_cannibalization: "Content Cannibalization",
|
|
3256
3496
|
llms_txt: "llms.txt File",
|
|
3257
3497
|
robots_txt: "robots.txt for AI",
|
|
@@ -3269,10 +3509,12 @@ var PILLAR_WEIGHTS = {
|
|
|
3269
3509
|
citation_ready_writing: 0.04,
|
|
3270
3510
|
answer_first_placement: 0.03,
|
|
3271
3511
|
evidence_packaging: 0.03,
|
|
3512
|
+
duplicate_content: 0.05,
|
|
3513
|
+
cross_page_duplication: 0.03,
|
|
3272
3514
|
direct_answer_density: 0.05,
|
|
3273
|
-
qa_content_format: 0.
|
|
3274
|
-
query_answer_alignment: 0.
|
|
3275
|
-
faq_section: 0.
|
|
3515
|
+
qa_content_format: 0.04,
|
|
3516
|
+
query_answer_alignment: 0.04,
|
|
3517
|
+
faq_section: 0.03,
|
|
3276
3518
|
table_list_extractability: 0.03,
|
|
3277
3519
|
definition_patterns: 0.02,
|
|
3278
3520
|
entity_disambiguation: 0.02,
|
|
@@ -3305,6 +3547,8 @@ var CRITERION_EFFORT = {
|
|
|
3305
3547
|
citation_ready_writing: "Medium",
|
|
3306
3548
|
answer_first_placement: "Medium",
|
|
3307
3549
|
evidence_packaging: "Medium",
|
|
3550
|
+
duplicate_content: "Medium",
|
|
3551
|
+
cross_page_duplication: "Medium",
|
|
3308
3552
|
direct_answer_density: "Medium",
|
|
3309
3553
|
qa_content_format: "Medium",
|
|
3310
3554
|
query_answer_alignment: "Medium",
|
|
@@ -3360,6 +3604,8 @@ var FIX_DESCRIPTIONS = {
|
|
|
3360
3604
|
image_context_ai: "Wrap images in <figure>/<figcaption> with descriptive alt text.",
|
|
3361
3605
|
schema_coverage: "Extend structured data to inner pages (articles, services, products).",
|
|
3362
3606
|
speakable_schema: "Add SpeakableSpecification schema for voice assistant compatibility.",
|
|
3607
|
+
duplicate_content: "Rewrite duplicate text blocks so each section provides unique value.",
|
|
3608
|
+
cross_page_duplication: "Rewrite shared paragraphs across pages so each page has unique content.",
|
|
3363
3609
|
content_cannibalization: "Consolidate overlapping pages or differentiate titles and H1 headings.",
|
|
3364
3610
|
llms_txt: "Create a /llms.txt file describing your site for AI engines.",
|
|
3365
3611
|
robots_txt: "Update robots.txt to explicitly allow AI crawlers.",
|
|
@@ -3455,7 +3701,9 @@ var CRITERION_LABELS = {
|
|
|
3455
3701
|
"Evidence Packaging": "Evidence Packaging",
|
|
3456
3702
|
"Entity Disambiguation": "Entity Disambiguation",
|
|
3457
3703
|
"Extraction Friction Score": "Extraction Friction Score",
|
|
3458
|
-
"Image Context for AI": "Image Context for AI"
|
|
3704
|
+
"Image Context for AI": "Image Context for AI",
|
|
3705
|
+
"Duplicate Content Blocks": "Duplicate Content Blocks",
|
|
3706
|
+
"Cross-Page Duplicate Content": "Cross-Page Duplicate Content"
|
|
3459
3707
|
};
|
|
3460
3708
|
function scoreToStatus(score) {
|
|
3461
3709
|
if (score === 0) return "MISSING";
|
|
@@ -3550,9 +3798,9 @@ var CRITERION_WEIGHTS = {
|
|
|
3550
3798
|
content_depth: 0.07,
|
|
3551
3799
|
fact_density: 0.06,
|
|
3552
3800
|
direct_answer_density: 0.05,
|
|
3553
|
-
qa_content_format: 0.
|
|
3554
|
-
query_answer_alignment: 0.
|
|
3555
|
-
faq_section: 0.
|
|
3801
|
+
qa_content_format: 0.04,
|
|
3802
|
+
query_answer_alignment: 0.04,
|
|
3803
|
+
faq_section: 0.03,
|
|
3556
3804
|
// Content Organization (~30%)
|
|
3557
3805
|
entity_consistency: 0.05,
|
|
3558
3806
|
internal_linking: 0.04,
|
|
@@ -3581,7 +3829,10 @@ var CRITERION_WEIGHTS = {
|
|
|
3581
3829
|
evidence_packaging: 0.03,
|
|
3582
3830
|
entity_disambiguation: 0.02,
|
|
3583
3831
|
extraction_friction: 0.02,
|
|
3584
|
-
image_context_ai: 0.01
|
|
3832
|
+
image_context_ai: 0.01,
|
|
3833
|
+
// V3 Criteria
|
|
3834
|
+
duplicate_content: 0.05,
|
|
3835
|
+
cross_page_duplication: 0.03
|
|
3585
3836
|
};
|
|
3586
3837
|
var OPPORTUNITY_TEMPLATES = {
|
|
3587
3838
|
llms_txt: {
|
|
@@ -3704,6 +3955,16 @@ var OPPORTUNITY_TEMPLATES = {
|
|
|
3704
3955
|
effort: "Medium",
|
|
3705
3956
|
description: "Ensure every question-format heading (H2/H3) is followed by a direct answer paragraph. This pattern is ideal for AI engine snippet extraction."
|
|
3706
3957
|
},
|
|
3958
|
+
duplicate_content: {
|
|
3959
|
+
name: "Fix Duplicate Content Blocks",
|
|
3960
|
+
effort: "Medium",
|
|
3961
|
+
description: "Sections within pages contain identical or near-identical text. LLMs may flag this as low-quality or thin content, reducing citation authority. Rewrite duplicate blocks with unique angles."
|
|
3962
|
+
},
|
|
3963
|
+
cross_page_duplication: {
|
|
3964
|
+
name: "Eliminate Cross-Page Duplicate Content",
|
|
3965
|
+
effort: "Medium",
|
|
3966
|
+
description: "The same paragraphs appear on multiple pages. AI engines may only index one version and ignore the rest. Rewrite shared content so each page offers a unique perspective."
|
|
3967
|
+
},
|
|
3707
3968
|
content_cannibalization: {
|
|
3708
3969
|
name: "Resolve Content Cannibalization",
|
|
3709
3970
|
effort: "Medium",
|
|
@@ -4112,9 +4373,9 @@ var PAGE_CRITERIA = {
|
|
|
4112
4373
|
original_data: { weight: 0.1, label: "Original Data & Expert Content" },
|
|
4113
4374
|
fact_density: { weight: 0.06, label: "Fact & Data Density" },
|
|
4114
4375
|
direct_answer_density: { weight: 0.05, label: "Direct Answer Paragraphs" },
|
|
4115
|
-
qa_content_format: { weight: 0.
|
|
4116
|
-
query_answer_alignment: { weight: 0.
|
|
4117
|
-
faq_section: { weight: 0.
|
|
4376
|
+
qa_content_format: { weight: 0.04, label: "Q&A Content Format" },
|
|
4377
|
+
query_answer_alignment: { weight: 0.04, label: "Query-Answer Alignment" },
|
|
4378
|
+
faq_section: { weight: 0.03, label: "FAQ Section Content" },
|
|
4118
4379
|
// Content Organization
|
|
4119
4380
|
content_freshness: { weight: 0.04, label: "Content Freshness Signals" },
|
|
4120
4381
|
schema_markup: { weight: 0.03, label: "Schema.org Structured Data" },
|
|
@@ -4131,7 +4392,8 @@ var PAGE_CRITERIA = {
|
|
|
4131
4392
|
evidence_packaging: { weight: 0.03, label: "Evidence Packaging" },
|
|
4132
4393
|
entity_disambiguation: { weight: 0.02, label: "Entity Disambiguation" },
|
|
4133
4394
|
extraction_friction: { weight: 0.02, label: "Extraction Friction Score" },
|
|
4134
|
-
image_context_ai: { weight: 0.01, label: "Image Context for AI" }
|
|
4395
|
+
image_context_ai: { weight: 0.01, label: "Image Context for AI" },
|
|
4396
|
+
duplicate_content: { weight: 0.05, label: "Duplicate Content Blocks" }
|
|
4135
4397
|
};
|
|
4136
4398
|
function extractJsonLdBlocks(html) {
|
|
4137
4399
|
const blocks = [];
|
|
@@ -4580,6 +4842,90 @@ function scoreImageContextAI(html) {
|
|
|
4580
4842
|
if (contextualImages.length > 0) score += 3;
|
|
4581
4843
|
return cap(score, 10);
|
|
4582
4844
|
}
|
|
4845
|
+
var BOILERPLATE_PATTERNS = /\b(sign up|subscribe|get started|contact us|request a demo|free trial|book a call|schedule a|learn more|click here|follow us|share this|copyright|all rights reserved|privacy policy|terms of service)\b/i;
|
|
4846
|
+
function isBoilerplate(text) {
|
|
4847
|
+
const words = text.split(/\s+/).length;
|
|
4848
|
+
if (words < 20 && BOILERPLATE_PATTERNS.test(text)) return true;
|
|
4849
|
+
if (/\b(cookie|gdpr|consent|opt.out)\b/i.test(text) && words < 30) return true;
|
|
4850
|
+
return false;
|
|
4851
|
+
}
|
|
4852
|
+
function scoreDuplicateContent(html) {
|
|
4853
|
+
return scoreDuplicateContentDetailed(html).score;
|
|
4854
|
+
}
|
|
4855
|
+
function scoreDuplicateContentDetailed(html) {
|
|
4856
|
+
const sections = extractSectionsWithParagraphs(html);
|
|
4857
|
+
if (sections.length < 2) return { score: 10, duplicates: [] };
|
|
4858
|
+
const totalParagraphs = sections.reduce((sum, s) => sum + s.paragraphs.length, 0);
|
|
4859
|
+
const duplicates = [];
|
|
4860
|
+
let dupParagraphCount = 0;
|
|
4861
|
+
for (let i = 0; i < sections.length; i++) {
|
|
4862
|
+
for (let j = i + 1; j < sections.length; j++) {
|
|
4863
|
+
for (const pA of sections[i].paragraphs) {
|
|
4864
|
+
for (const pB of sections[j].paragraphs) {
|
|
4865
|
+
const sim = shingleJaccard(pA.shingles, pB.shingles);
|
|
4866
|
+
if (sim > 0.4) {
|
|
4867
|
+
dupParagraphCount++;
|
|
4868
|
+
duplicates.push({
|
|
4869
|
+
headingA: sections[i].heading,
|
|
4870
|
+
headingB: sections[j].heading,
|
|
4871
|
+
similarity: Math.round(sim * 100),
|
|
4872
|
+
sample: pA.text.slice(0, 80)
|
|
4873
|
+
});
|
|
4874
|
+
break;
|
|
4875
|
+
}
|
|
4876
|
+
}
|
|
4877
|
+
}
|
|
4878
|
+
}
|
|
4879
|
+
}
|
|
4880
|
+
if (dupParagraphCount === 0) return { score: 10, duplicates: [] };
|
|
4881
|
+
const dupRatio = totalParagraphs > 0 ? dupParagraphCount / totalParagraphs : 0;
|
|
4882
|
+
let score;
|
|
4883
|
+
if (dupParagraphCount === 1 && dupRatio <= 0.05) {
|
|
4884
|
+
score = 6;
|
|
4885
|
+
} else if (dupParagraphCount === 1) {
|
|
4886
|
+
score = 4;
|
|
4887
|
+
} else if (dupParagraphCount === 2) {
|
|
4888
|
+
score = 2;
|
|
4889
|
+
} else {
|
|
4890
|
+
score = 0;
|
|
4891
|
+
}
|
|
4892
|
+
return { score, duplicates };
|
|
4893
|
+
}
|
|
4894
|
+
function extractSectionsWithParagraphs(html) {
|
|
4895
|
+
const cleaned = html.replace(/<(script|style|nav|header|footer|noscript)\b[^>]*>[\s\S]*?<\/\1>/gi, "").replace(/<aside\b[^>]*>[\s\S]*?<\/aside>/gi, "");
|
|
4896
|
+
const parts = cleaned.split(/(?=<h[23]\b[^>]*>)/i);
|
|
4897
|
+
const sections = [];
|
|
4898
|
+
for (const part of parts) {
|
|
4899
|
+
const headingMatch = part.match(/<h[23]\b[^>]*>([\s\S]*?)<\/h[23]>/i);
|
|
4900
|
+
const heading = headingMatch ? headingMatch[1].replace(/<[^>]*>/g, "").trim() : "(intro)";
|
|
4901
|
+
const pMatches = part.match(/<p\b[^>]*>([\s\S]*?)<\/p>/gi) || [];
|
|
4902
|
+
const paragraphs = pMatches.map((p) => {
|
|
4903
|
+
const text = p.replace(/<[^>]*>/g, " ").replace(/&\w+;/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
|
|
4904
|
+
return { text, shingles: buildShingles(text, 4) };
|
|
4905
|
+
}).filter((p) => p.shingles.size >= 3 && !isBoilerplate(p.text));
|
|
4906
|
+
if (paragraphs.length > 0) {
|
|
4907
|
+
sections.push({ heading, paragraphs });
|
|
4908
|
+
}
|
|
4909
|
+
}
|
|
4910
|
+
return sections;
|
|
4911
|
+
}
|
|
4912
|
+
function buildShingles(text, n) {
|
|
4913
|
+
const words = text.split(/\s+/).filter((w) => w.length > 1);
|
|
4914
|
+
const shingles = /* @__PURE__ */ new Set();
|
|
4915
|
+
for (let i = 0; i <= words.length - n; i++) {
|
|
4916
|
+
shingles.add(words.slice(i, i + n).join(" "));
|
|
4917
|
+
}
|
|
4918
|
+
return shingles;
|
|
4919
|
+
}
|
|
4920
|
+
function shingleJaccard(a, b) {
|
|
4921
|
+
if (a.size === 0 && b.size === 0) return 0;
|
|
4922
|
+
let intersection = 0;
|
|
4923
|
+
for (const s of a) {
|
|
4924
|
+
if (b.has(s)) intersection++;
|
|
4925
|
+
}
|
|
4926
|
+
const union = a.size + b.size - intersection;
|
|
4927
|
+
return union === 0 ? 0 : intersection / union;
|
|
4928
|
+
}
|
|
4583
4929
|
var SCORING_FUNCTIONS = {
|
|
4584
4930
|
schema_markup: scoreSchemaMarkup,
|
|
4585
4931
|
qa_content_format: scoreQAFormat,
|
|
@@ -4600,7 +4946,8 @@ var SCORING_FUNCTIONS = {
|
|
|
4600
4946
|
evidence_packaging: scoreEvidencePackaging,
|
|
4601
4947
|
entity_disambiguation: scoreEntityDisambiguation,
|
|
4602
4948
|
extraction_friction: scoreExtractionFriction,
|
|
4603
|
-
image_context_ai: scoreImageContextAI
|
|
4949
|
+
image_context_ai: scoreImageContextAI,
|
|
4950
|
+
duplicate_content: scoreDuplicateContent
|
|
4604
4951
|
};
|
|
4605
4952
|
function scorePage(html, url) {
|
|
4606
4953
|
let totalWeight = 0;
|
|
@@ -4614,6 +4961,11 @@ function scorePage(html, url) {
|
|
|
4614
4961
|
totalWeight += weight;
|
|
4615
4962
|
}
|
|
4616
4963
|
let aeoScore = totalWeight === 0 ? 0 : Math.round(weightedSum / totalWeight);
|
|
4964
|
+
const dupScore = criterionScores.find((c) => c.criterion === "duplicate_content")?.score ?? 10;
|
|
4965
|
+
if (dupScore <= 6) {
|
|
4966
|
+
const dupCap = 35 + dupScore * 5;
|
|
4967
|
+
aeoScore = Math.min(aeoScore, dupCap);
|
|
4968
|
+
}
|
|
4617
4969
|
const scoreCapped = aeoScore > 75;
|
|
4618
4970
|
if (scoreCapped) aeoScore = 75;
|
|
4619
4971
|
return { aeoScore, criterionScores, scoreCapped };
|
|
@@ -4833,6 +5185,15 @@ function checkHasCitationReadyContent(html) {
|
|
|
4833
5185
|
}
|
|
4834
5186
|
return null;
|
|
4835
5187
|
}
|
|
5188
|
+
function checkDuplicateContentBlocks(html) {
|
|
5189
|
+
const { score, duplicates } = scoreDuplicateContentDetailed(html);
|
|
5190
|
+
if (score <= 6 && duplicates.length > 0) {
|
|
5191
|
+
const first = duplicates[0];
|
|
5192
|
+
const label = duplicates.length === 1 ? `Duplicate content: '${first.headingA}' and '${first.headingB}' share ${first.similarity}% similar text ("${first.sample}...")` : `${duplicates.length} duplicate blocks found (e.g. '${first.headingA}' and '${first.headingB}' \u2014 "${first.sample}...")`;
|
|
5193
|
+
return { check: "duplicate-content", label, severity: score <= 3 ? "error" : "warning" };
|
|
5194
|
+
}
|
|
5195
|
+
return null;
|
|
5196
|
+
}
|
|
4836
5197
|
function analyzePage(html, url, category) {
|
|
4837
5198
|
const title = extractTitle(html);
|
|
4838
5199
|
const textContent = getTextContent2(html);
|
|
@@ -4851,7 +5212,8 @@ function analyzePage(html, url, category) {
|
|
|
4851
5212
|
checkImagesMissingAlt(html),
|
|
4852
5213
|
checkNoInternalLinks(html, url),
|
|
4853
5214
|
checkNoAnswerBlock(html),
|
|
4854
|
-
checkNoEvidence(html, url)
|
|
5215
|
+
checkNoEvidence(html, url),
|
|
5216
|
+
checkDuplicateContentBlocks(html)
|
|
4855
5217
|
];
|
|
4856
5218
|
for (const result of issueChecks) {
|
|
4857
5219
|
if (result) issues.push(result);
|
|
@@ -5217,9 +5579,9 @@ var CRITERION_WEIGHTS2 = {
|
|
|
5217
5579
|
content_depth: 0.07,
|
|
5218
5580
|
fact_density: 0.06,
|
|
5219
5581
|
direct_answer_density: 0.05,
|
|
5220
|
-
qa_content_format: 0.
|
|
5221
|
-
query_answer_alignment: 0.
|
|
5222
|
-
faq_section: 0.
|
|
5582
|
+
qa_content_format: 0.04,
|
|
5583
|
+
query_answer_alignment: 0.04,
|
|
5584
|
+
faq_section: 0.03,
|
|
5223
5585
|
// Content Organization (~30%)
|
|
5224
5586
|
entity_consistency: 0.05,
|
|
5225
5587
|
internal_linking: 0.04,
|
|
@@ -5233,6 +5595,8 @@ var CRITERION_WEIGHTS2 = {
|
|
|
5233
5595
|
clean_html: 0.02,
|
|
5234
5596
|
// Technical Plumbing (~15%)
|
|
5235
5597
|
content_cannibalization: 0.02,
|
|
5598
|
+
duplicate_content: 0.05,
|
|
5599
|
+
cross_page_duplication: 0.03,
|
|
5236
5600
|
llms_txt: 0.02,
|
|
5237
5601
|
robots_txt: 0.02,
|
|
5238
5602
|
content_velocity: 0.02,
|
|
@@ -5277,7 +5641,9 @@ var PHASE_CONFIG = [
|
|
|
5277
5641
|
"citation_ready_writing",
|
|
5278
5642
|
"answer_first_placement",
|
|
5279
5643
|
"evidence_packaging",
|
|
5280
|
-
"entity_disambiguation"
|
|
5644
|
+
"entity_disambiguation",
|
|
5645
|
+
"duplicate_content",
|
|
5646
|
+
"cross_page_duplication"
|
|
5281
5647
|
]
|
|
5282
5648
|
},
|
|
5283
5649
|
{
|
|
@@ -6157,6 +6523,66 @@ Summarization: yes`,
|
|
|
6157
6523
|
}
|
|
6158
6524
|
return fixes;
|
|
6159
6525
|
},
|
|
6526
|
+
duplicate_content: (c, pages) => {
|
|
6527
|
+
if (c.score >= 10) return [];
|
|
6528
|
+
const impact = impactFromScore(c.score);
|
|
6529
|
+
const effort = effortForCriterion("duplicate_content", c.score);
|
|
6530
|
+
const affected = getAffectedPages("duplicate_content", pages);
|
|
6531
|
+
const sectionPairs = c.findings.filter((f) => f.detail.includes("' and '")).map((f) => {
|
|
6532
|
+
const match = f.detail.match(/'([^']+)' and '([^']+)'/);
|
|
6533
|
+
return match ? { a: match[1], b: match[2] } : null;
|
|
6534
|
+
}).filter(Boolean);
|
|
6535
|
+
const steps = [
|
|
6536
|
+
"Identify sections with duplicate or near-identical text",
|
|
6537
|
+
"Rewrite each section to provide a unique angle on the topic",
|
|
6538
|
+
"Ensure each heading section adds new information for the reader"
|
|
6539
|
+
];
|
|
6540
|
+
if (sectionPairs.length > 0) {
|
|
6541
|
+
const pair = sectionPairs[0];
|
|
6542
|
+
steps.unshift(`Start with '${pair.a}' and '${pair.b}' which share similar text`);
|
|
6543
|
+
}
|
|
6544
|
+
return [{
|
|
6545
|
+
id: "fix-duplicate-content",
|
|
6546
|
+
criterion: c.criterion_label,
|
|
6547
|
+
criterionId: c.criterion,
|
|
6548
|
+
title: "Fix duplicate content blocks",
|
|
6549
|
+
description: "Sections within pages contain identical or near-identical text. LLMs may flag this as low-quality content, reducing the authority of the page.",
|
|
6550
|
+
impact,
|
|
6551
|
+
effort,
|
|
6552
|
+
impactScore: 0,
|
|
6553
|
+
category: "content",
|
|
6554
|
+
steps,
|
|
6555
|
+
successCriteria: "Each section within a page provides unique content",
|
|
6556
|
+
affectedPages: affected,
|
|
6557
|
+
pageCount: affected?.length
|
|
6558
|
+
}];
|
|
6559
|
+
},
|
|
6560
|
+
cross_page_duplication: (c, pages) => {
|
|
6561
|
+
if (c.score >= 10) return [];
|
|
6562
|
+
const impact = impactFromScore(c.score);
|
|
6563
|
+
const effort = effortForCriterion("cross_page_duplication", c.score);
|
|
6564
|
+
const affected = getAffectedPages("cross_page_duplication", pages);
|
|
6565
|
+
return [{
|
|
6566
|
+
id: "fix-cross-page-duplication",
|
|
6567
|
+
criterion: c.criterion_label,
|
|
6568
|
+
criterionId: c.criterion,
|
|
6569
|
+
title: "Eliminate cross-page duplicate content",
|
|
6570
|
+
description: "The same paragraphs appear on multiple pages. AI engines may only index one version, wasting the others.",
|
|
6571
|
+
impact,
|
|
6572
|
+
effort,
|
|
6573
|
+
impactScore: 0,
|
|
6574
|
+
category: "content",
|
|
6575
|
+
steps: [
|
|
6576
|
+
"Identify paragraphs that are copy-pasted across multiple pages",
|
|
6577
|
+
"Rewrite each instance to provide a unique angle relevant to that page",
|
|
6578
|
+
"Move truly shared content to a single resource page and link to it",
|
|
6579
|
+
"Use canonical tags if pages must share content"
|
|
6580
|
+
],
|
|
6581
|
+
successCriteria: "Each page has unique body content with no copy-pasted paragraphs",
|
|
6582
|
+
affectedPages: affected,
|
|
6583
|
+
pageCount: affected?.length
|
|
6584
|
+
}];
|
|
6585
|
+
},
|
|
6160
6586
|
visible_date_signal: (c, pages) => {
|
|
6161
6587
|
if (c.score >= 10) return [];
|
|
6162
6588
|
const impact = impactFromScore(c.score);
|