aeorank 3.0.3 → 3.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -24
- package/dist/browser.d.ts +3 -3
- package/dist/browser.js +429 -24
- package/dist/browser.js.map +1 -1
- package/dist/cli.js +365 -20
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +429 -24
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +3 -3
- package/dist/index.d.ts +3 -3
- package/dist/index.js +429 -24
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
package/dist/index.d.cts
CHANGED
|
@@ -219,7 +219,7 @@ interface SitemapDateAnalysis {
|
|
|
219
219
|
declare function countRecentSitemapDates(sitemapText: string): SitemapDateAnalysis;
|
|
220
220
|
declare function extractRawDataSummary(data: SiteData): RawDataSummary;
|
|
221
221
|
/**
|
|
222
|
-
* Run all
|
|
222
|
+
* Run all 36 criteria checks using pre-fetched site data.
|
|
223
223
|
* All functions are synchronous (no HTTP calls) - data was already fetched.
|
|
224
224
|
*/
|
|
225
225
|
declare function auditSiteFromData(data: SiteData): CriterionResult[];
|
|
@@ -231,7 +231,7 @@ declare function auditSite(targetUrl: string): Promise<CriterionResult[]>;
|
|
|
231
231
|
|
|
232
232
|
/**
|
|
233
233
|
* V2 Pillar Framework — 5-pillar scoring model.
|
|
234
|
-
* Maps all
|
|
234
|
+
* Maps all 36 criteria into pillars, computes sub-scores,
|
|
235
235
|
* provides client-friendly names, and calculates top-3 fixes.
|
|
236
236
|
*/
|
|
237
237
|
|
|
@@ -424,7 +424,7 @@ declare function analyzeAllPages(siteData: SiteData): PageReview[];
|
|
|
424
424
|
|
|
425
425
|
/**
|
|
426
426
|
* Per-page AEO scoring.
|
|
427
|
-
* Evaluates
|
|
427
|
+
* Evaluates 21 of 36 criteria that apply at individual page level.
|
|
428
428
|
* Produces a 0-75 AEO score per page (single-page cap at 75).
|
|
429
429
|
*/
|
|
430
430
|
|
package/dist/index.d.ts
CHANGED
|
@@ -219,7 +219,7 @@ interface SitemapDateAnalysis {
|
|
|
219
219
|
declare function countRecentSitemapDates(sitemapText: string): SitemapDateAnalysis;
|
|
220
220
|
declare function extractRawDataSummary(data: SiteData): RawDataSummary;
|
|
221
221
|
/**
|
|
222
|
-
* Run all
|
|
222
|
+
* Run all 36 criteria checks using pre-fetched site data.
|
|
223
223
|
* All functions are synchronous (no HTTP calls) - data was already fetched.
|
|
224
224
|
*/
|
|
225
225
|
declare function auditSiteFromData(data: SiteData): CriterionResult[];
|
|
@@ -231,7 +231,7 @@ declare function auditSite(targetUrl: string): Promise<CriterionResult[]>;
|
|
|
231
231
|
|
|
232
232
|
/**
|
|
233
233
|
* V2 Pillar Framework — 5-pillar scoring model.
|
|
234
|
-
* Maps all
|
|
234
|
+
* Maps all 36 criteria into pillars, computes sub-scores,
|
|
235
235
|
* provides client-friendly names, and calculates top-3 fixes.
|
|
236
236
|
*/
|
|
237
237
|
|
|
@@ -424,7 +424,7 @@ declare function analyzeAllPages(siteData: SiteData): PageReview[];
|
|
|
424
424
|
|
|
425
425
|
/**
|
|
426
426
|
* Per-page AEO scoring.
|
|
427
|
-
* Evaluates
|
|
427
|
+
* Evaluates 21 of 36 criteria that apply at individual page level.
|
|
428
428
|
* Produces a 0-75 AEO score per page (single-page cap at 75).
|
|
429
429
|
*/
|
|
430
430
|
|
package/dist/index.js
CHANGED
|
@@ -78,6 +78,75 @@ function detectParkedDomain(bodySnippet) {
|
|
|
78
78
|
return { isParked: false };
|
|
79
79
|
}
|
|
80
80
|
|
|
81
|
+
// src/duplicate-content.ts
|
|
82
|
+
var BOILERPLATE_PATTERNS = /\b(sign up|subscribe|get started|contact us|request a demo|free trial|book a call|schedule a|learn more|click here|follow us|share this|copyright|all rights reserved|privacy policy|terms of service)\b/i;
|
|
83
|
+
var MIN_SUBSTANTIVE_WORDS = 15;
|
|
84
|
+
var MAX_METADATA_WORDS = 24;
|
|
85
|
+
var MAX_METADATA_LABEL_WORDS = 4;
|
|
86
|
+
function normalizeParagraphText(htmlFragment) {
|
|
87
|
+
return htmlFragment.replace(/<[^>]*>/g, " ").replace(/&\w+;/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
|
|
88
|
+
}
|
|
89
|
+
function tokenize(text) {
|
|
90
|
+
return text.split(/\s+/).map((word) => word.replace(/^[^a-z0-9]+|[^a-z0-9]+$/gi, "")).filter((word) => word.length > 0);
|
|
91
|
+
}
|
|
92
|
+
function isBoilerplateParagraph(text, words) {
|
|
93
|
+
if (words < 20 && BOILERPLATE_PATTERNS.test(text)) return true;
|
|
94
|
+
if (/\b(cookie|gdpr|consent|opt.out)\b/i.test(text) && words < 30) return true;
|
|
95
|
+
return false;
|
|
96
|
+
}
|
|
97
|
+
function isMetadataParagraph(text, words) {
|
|
98
|
+
const labelMatch = text.match(/^([^:]{1,60}):\s+/);
|
|
99
|
+
if (!labelMatch) return false;
|
|
100
|
+
const labelWords = tokenize(labelMatch[1]).length;
|
|
101
|
+
return labelWords > 0 && labelWords <= MAX_METADATA_LABEL_WORDS && words <= MAX_METADATA_WORDS;
|
|
102
|
+
}
|
|
103
|
+
function buildShinglesFromTokens(words, n = 4) {
|
|
104
|
+
const shingles = /* @__PURE__ */ new Set();
|
|
105
|
+
for (let i = 0; i <= words.length - n; i++) {
|
|
106
|
+
shingles.add(words.slice(i, i + n).join(" "));
|
|
107
|
+
}
|
|
108
|
+
return shingles;
|
|
109
|
+
}
|
|
110
|
+
function createParagraph(htmlFragment) {
|
|
111
|
+
const text = normalizeParagraphText(htmlFragment);
|
|
112
|
+
const words = tokenize(text);
|
|
113
|
+
if (words.length < MIN_SUBSTANTIVE_WORDS) return null;
|
|
114
|
+
if (isBoilerplateParagraph(text, words.length)) return null;
|
|
115
|
+
if (isMetadataParagraph(text, words.length)) return null;
|
|
116
|
+
const shingles = buildShinglesFromTokens(words);
|
|
117
|
+
if (shingles.size < 3) return null;
|
|
118
|
+
return { text, shingles };
|
|
119
|
+
}
|
|
120
|
+
function stripNonContentHtml(html) {
|
|
121
|
+
return html.replace(/<(script|style|nav|header|footer|noscript)\b[^>]*>[\s\S]*?<\/\1>/gi, "").replace(/<aside\b[^>]*>[\s\S]*?<\/aside>/gi, "");
|
|
122
|
+
}
|
|
123
|
+
function extractDuplicateContentParagraphs(html) {
|
|
124
|
+
const cleaned = stripNonContentHtml(html);
|
|
125
|
+
const matches = cleaned.match(/<p\b[^>]*>([\s\S]*?)<\/p>/gi) || [];
|
|
126
|
+
return matches.map(createParagraph).filter((paragraph) => paragraph !== null);
|
|
127
|
+
}
|
|
128
|
+
function extractDuplicateContentSections(html) {
|
|
129
|
+
const cleaned = stripNonContentHtml(html);
|
|
130
|
+
const parts = cleaned.split(/(?=<h[23]\b[^>]*>)/i);
|
|
131
|
+
const sections = [];
|
|
132
|
+
for (const part of parts) {
|
|
133
|
+
const headingMatch = part.match(/<h[23]\b[^>]*>([\s\S]*?)<\/h[23]>/i);
|
|
134
|
+
const heading = headingMatch ? headingMatch[1].replace(/<[^>]*>/g, "").trim() : "(intro)";
|
|
135
|
+
const paragraphs = (part.match(/<p\b[^>]*>([\s\S]*?)<\/p>/gi) || []).map(createParagraph).filter((paragraph) => paragraph !== null);
|
|
136
|
+
if (paragraphs.length > 0) sections.push({ heading, paragraphs });
|
|
137
|
+
}
|
|
138
|
+
return sections;
|
|
139
|
+
}
|
|
140
|
+
function shingleJaccardSimilarity(a, b) {
|
|
141
|
+
if (a.size === 0 && b.size === 0) return 0;
|
|
142
|
+
let intersection = 0;
|
|
143
|
+
for (const shingle of a) {
|
|
144
|
+
if (b.has(shingle)) intersection++;
|
|
145
|
+
}
|
|
146
|
+
const union = a.size + b.size - intersection;
|
|
147
|
+
return union === 0 ? 0 : intersection / union;
|
|
148
|
+
}
|
|
149
|
+
|
|
81
150
|
// src/site-crawler.ts
|
|
82
151
|
async function fetchText(url) {
|
|
83
152
|
try {
|
|
@@ -2558,6 +2627,186 @@ function checkImageContextAI(data) {
|
|
|
2558
2627
|
}
|
|
2559
2628
|
return { criterion: "image_context_ai", criterion_label: "Image Context for AI", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P2" };
|
|
2560
2629
|
}
|
|
2630
|
+
function findIntraPageDuplicates(html) {
|
|
2631
|
+
const sections = extractDuplicateContentSections(html);
|
|
2632
|
+
if (sections.length < 2) return [];
|
|
2633
|
+
const pairs = [];
|
|
2634
|
+
for (let i = 0; i < sections.length; i++) {
|
|
2635
|
+
for (let j = i + 1; j < sections.length; j++) {
|
|
2636
|
+
let found = false;
|
|
2637
|
+
for (const pA of sections[i].paragraphs) {
|
|
2638
|
+
if (found) break;
|
|
2639
|
+
for (const pB of sections[j].paragraphs) {
|
|
2640
|
+
const sim = shingleJaccardSimilarity(pA.shingles, pB.shingles);
|
|
2641
|
+
if (sim > 0.4) {
|
|
2642
|
+
pairs.push({
|
|
2643
|
+
headingA: sections[i].heading,
|
|
2644
|
+
headingB: sections[j].heading,
|
|
2645
|
+
similarity: Math.round(sim * 100),
|
|
2646
|
+
sample: pA.text.slice(0, 80)
|
|
2647
|
+
});
|
|
2648
|
+
found = true;
|
|
2649
|
+
break;
|
|
2650
|
+
}
|
|
2651
|
+
}
|
|
2652
|
+
}
|
|
2653
|
+
}
|
|
2654
|
+
}
|
|
2655
|
+
return pairs;
|
|
2656
|
+
}
|
|
2657
|
+
function checkDuplicateContent(data) {
|
|
2658
|
+
const findings = [];
|
|
2659
|
+
const pages = [];
|
|
2660
|
+
if (data.homepage) {
|
|
2661
|
+
pages.push({ html: data.homepage.text, url: data.homepage.finalUrl || `https://${data.domain}/` });
|
|
2662
|
+
}
|
|
2663
|
+
if (data.blogSample) {
|
|
2664
|
+
for (const page of data.blogSample) {
|
|
2665
|
+
pages.push({ html: page.text, url: page.finalUrl || "" });
|
|
2666
|
+
}
|
|
2667
|
+
}
|
|
2668
|
+
if (pages.length === 0) {
|
|
2669
|
+
findings.push({ severity: "critical", detail: "No pages available for duplicate content analysis" });
|
|
2670
|
+
return { criterion: "duplicate_content", criterion_label: "Duplicate Content Blocks", score: 0, status: "fail", findings, fix_priority: "P1" };
|
|
2671
|
+
}
|
|
2672
|
+
let totalDupPages = 0;
|
|
2673
|
+
let totalDupPairs = 0;
|
|
2674
|
+
const dupDetails = [];
|
|
2675
|
+
for (const page of pages) {
|
|
2676
|
+
const pairs = findIntraPageDuplicates(page.html);
|
|
2677
|
+
if (pairs.length > 0) {
|
|
2678
|
+
totalDupPages++;
|
|
2679
|
+
totalDupPairs += pairs.length;
|
|
2680
|
+
dupDetails.push({ url: page.url, pairs });
|
|
2681
|
+
}
|
|
2682
|
+
}
|
|
2683
|
+
const dupRatio = totalDupPages / pages.length;
|
|
2684
|
+
let score;
|
|
2685
|
+
if (totalDupPairs === 0) {
|
|
2686
|
+
score = 10;
|
|
2687
|
+
findings.push({ severity: "info", detail: `${pages.length} pages analyzed - no duplicate content blocks detected` });
|
|
2688
|
+
} else if (dupRatio <= 0.05 && totalDupPairs <= 2) {
|
|
2689
|
+
score = 9;
|
|
2690
|
+
findings.push({ severity: "info", detail: `${totalDupPairs} duplicate block pair(s) on ${totalDupPages} page(s) - minor` });
|
|
2691
|
+
} else if (dupRatio <= 0.1) {
|
|
2692
|
+
score = 7;
|
|
2693
|
+
findings.push({ severity: "low", detail: `${totalDupPairs} duplicate block pair(s) across ${totalDupPages} page(s)`, fix: "Rewrite duplicate sections to provide unique content in each" });
|
|
2694
|
+
} else if (dupRatio <= 0.2) {
|
|
2695
|
+
score = 5;
|
|
2696
|
+
findings.push({ severity: "medium", detail: `${totalDupPages} pages (${Math.round(dupRatio * 100)}%) contain duplicate content blocks`, fix: "Rewrite or remove repeated text blocks - LLMs may flag this as low-quality content" });
|
|
2697
|
+
} else if (dupRatio <= 0.4) {
|
|
2698
|
+
score = 3;
|
|
2699
|
+
findings.push({ severity: "medium", detail: `${totalDupPages} pages (${Math.round(dupRatio * 100)}%) have significant duplicate content`, fix: "Widespread duplicate blocks reduce content authority - rewrite each section with unique angles" });
|
|
2700
|
+
} else {
|
|
2701
|
+
score = 0;
|
|
2702
|
+
findings.push({ severity: "high", detail: `${totalDupPages} pages (${Math.round(dupRatio * 100)}%) contain duplicate content blocks`, fix: "Severe content duplication across the site - LLMs will likely reduce citation authority" });
|
|
2703
|
+
}
|
|
2704
|
+
for (const dup of dupDetails.slice(0, 3)) {
|
|
2705
|
+
const shortUrl = dup.url.slice(0, 60);
|
|
2706
|
+
for (const pair of dup.pairs.slice(0, 2)) {
|
|
2707
|
+
findings.push({
|
|
2708
|
+
severity: "low",
|
|
2709
|
+
detail: `${shortUrl}: '${pair.headingA}' and '${pair.headingB}' share ${pair.similarity}% similar text ("${pair.sample}...")`,
|
|
2710
|
+
fix: `Rewrite one of these sections to eliminate duplicate content`
|
|
2711
|
+
});
|
|
2712
|
+
}
|
|
2713
|
+
}
|
|
2714
|
+
return { criterion: "duplicate_content", criterion_label: "Duplicate Content Blocks", score, status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P1" };
|
|
2715
|
+
}
|
|
2716
|
+
function checkCrossPageDuplication(data) {
|
|
2717
|
+
const findings = [];
|
|
2718
|
+
const pages = [];
|
|
2719
|
+
if (data.homepage) {
|
|
2720
|
+
pages.push({ url: data.homepage.finalUrl || `https://${data.domain}/`, paragraphs: extractDuplicateContentParagraphs(data.homepage.text) });
|
|
2721
|
+
}
|
|
2722
|
+
if (data.blogSample) {
|
|
2723
|
+
for (const page of data.blogSample) {
|
|
2724
|
+
pages.push({ url: page.finalUrl || "", paragraphs: extractDuplicateContentParagraphs(page.text) });
|
|
2725
|
+
}
|
|
2726
|
+
}
|
|
2727
|
+
if (pages.length <= 1) {
|
|
2728
|
+
findings.push({ severity: "info", detail: "Not enough pages to assess cross-page duplication" });
|
|
2729
|
+
return { criterion: "cross_page_duplication", criterion_label: "Cross-Page Duplicate Content", score: 5, status: "partial", findings, fix_priority: "P3" };
|
|
2730
|
+
}
|
|
2731
|
+
const paragraphPageCount = /* @__PURE__ */ new Map();
|
|
2732
|
+
for (const page of pages) {
|
|
2733
|
+
const seen = /* @__PURE__ */ new Set();
|
|
2734
|
+
for (const p of page.paragraphs) {
|
|
2735
|
+
const fp = [...p.shingles].slice(0, 5).join("|");
|
|
2736
|
+
if (!seen.has(fp)) {
|
|
2737
|
+
seen.add(fp);
|
|
2738
|
+
paragraphPageCount.set(fp, (paragraphPageCount.get(fp) || 0) + 1);
|
|
2739
|
+
}
|
|
2740
|
+
}
|
|
2741
|
+
}
|
|
2742
|
+
const boilerplateThreshold = Math.max(3, pages.length * 0.4);
|
|
2743
|
+
const siteBoilerprints = /* @__PURE__ */ new Set();
|
|
2744
|
+
for (const [fp, count] of paragraphPageCount) {
|
|
2745
|
+
if (count >= boilerplateThreshold) siteBoilerprints.add(fp);
|
|
2746
|
+
}
|
|
2747
|
+
const crossDupPairs = [];
|
|
2748
|
+
for (let i = 0; i < pages.length; i++) {
|
|
2749
|
+
for (let j = i + 1; j < pages.length; j++) {
|
|
2750
|
+
let dupCount = 0;
|
|
2751
|
+
let sample = "";
|
|
2752
|
+
for (const pA of pages[i].paragraphs) {
|
|
2753
|
+
const fpA = [...pA.shingles].slice(0, 5).join("|");
|
|
2754
|
+
if (siteBoilerprints.has(fpA)) continue;
|
|
2755
|
+
for (const pB of pages[j].paragraphs) {
|
|
2756
|
+
const sim = shingleJaccardSimilarity(pA.shingles, pB.shingles);
|
|
2757
|
+
if (sim > 0.4) {
|
|
2758
|
+
dupCount++;
|
|
2759
|
+
if (!sample) sample = pA.text.slice(0, 80);
|
|
2760
|
+
break;
|
|
2761
|
+
}
|
|
2762
|
+
}
|
|
2763
|
+
}
|
|
2764
|
+
if (dupCount >= 2) {
|
|
2765
|
+
crossDupPairs.push({
|
|
2766
|
+
urlA: pages[i].url.slice(0, 60),
|
|
2767
|
+
urlB: pages[j].url.slice(0, 60),
|
|
2768
|
+
dupCount,
|
|
2769
|
+
sample
|
|
2770
|
+
});
|
|
2771
|
+
}
|
|
2772
|
+
}
|
|
2773
|
+
}
|
|
2774
|
+
const affectedUrls = /* @__PURE__ */ new Set();
|
|
2775
|
+
for (const pair of crossDupPairs) {
|
|
2776
|
+
affectedUrls.add(pair.urlA);
|
|
2777
|
+
affectedUrls.add(pair.urlB);
|
|
2778
|
+
}
|
|
2779
|
+
const affectedRatio = pages.length > 0 ? affectedUrls.size / pages.length : 0;
|
|
2780
|
+
const totalDupParagraphs = crossDupPairs.reduce((s, p) => s + p.dupCount, 0);
|
|
2781
|
+
let score;
|
|
2782
|
+
if (crossDupPairs.length === 0) {
|
|
2783
|
+
score = 10;
|
|
2784
|
+
findings.push({ severity: "info", detail: `${pages.length} pages analyzed - no cross-page content duplication detected` });
|
|
2785
|
+
} else if (affectedRatio <= 0.05 && totalDupParagraphs <= 4) {
|
|
2786
|
+
score = 9;
|
|
2787
|
+
findings.push({ severity: "info", detail: `${totalDupParagraphs} shared paragraph(s) across ${affectedUrls.size} page(s) - minor` });
|
|
2788
|
+
} else if (affectedRatio <= 0.1) {
|
|
2789
|
+
score = 7;
|
|
2790
|
+
findings.push({ severity: "low", detail: `${totalDupParagraphs} shared paragraphs across ${affectedUrls.size} pages`, fix: "Rewrite shared content so each page provides a unique perspective" });
|
|
2791
|
+
} else if (affectedRatio <= 0.2) {
|
|
2792
|
+
score = 5;
|
|
2793
|
+
findings.push({ severity: "medium", detail: `${affectedUrls.size} pages (${Math.round(affectedRatio * 100)}%) share duplicate paragraphs`, fix: "Significant cross-page duplication - AI engines may only index one version" });
|
|
2794
|
+
} else if (affectedRatio <= 0.4) {
|
|
2795
|
+
score = 3;
|
|
2796
|
+
findings.push({ severity: "medium", detail: `${affectedUrls.size} pages (${Math.round(affectedRatio * 100)}%) contain shared content blocks`, fix: "Widespread copy-paste content across pages reduces overall site authority" });
|
|
2797
|
+
} else {
|
|
2798
|
+
score = 0;
|
|
2799
|
+
findings.push({ severity: "high", detail: `${affectedUrls.size} pages (${Math.round(affectedRatio * 100)}%) share duplicate content`, fix: "Severe cross-page duplication - AI engines will likely ignore redundant pages entirely" });
|
|
2800
|
+
}
|
|
2801
|
+
for (const pair of crossDupPairs.slice(0, 3)) {
|
|
2802
|
+
findings.push({
|
|
2803
|
+
severity: "low",
|
|
2804
|
+
detail: `${pair.dupCount} shared paragraph(s): ${pair.urlA} \u2194 ${pair.urlB} ("${pair.sample}...")`,
|
|
2805
|
+
fix: "Rewrite shared paragraphs so each page has unique content"
|
|
2806
|
+
});
|
|
2807
|
+
}
|
|
2808
|
+
return { criterion: "cross_page_duplication", criterion_label: "Cross-Page Duplicate Content", score, status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P1" };
|
|
2809
|
+
}
|
|
2561
2810
|
function auditSiteFromData(data) {
|
|
2562
2811
|
const topicCoherence = checkTopicCoherence(data);
|
|
2563
2812
|
const cannibalization = checkContentCannibalization(data, topicCoherence.score);
|
|
@@ -2596,7 +2845,10 @@ function auditSiteFromData(data) {
|
|
|
2596
2845
|
checkEvidencePackaging(data),
|
|
2597
2846
|
checkEntityDisambiguation(data),
|
|
2598
2847
|
checkExtractionFriction(data),
|
|
2599
|
-
checkImageContextAI(data)
|
|
2848
|
+
checkImageContextAI(data),
|
|
2849
|
+
// V3 criteria (#35-#36)
|
|
2850
|
+
checkDuplicateContent(data),
|
|
2851
|
+
checkCrossPageDuplication(data)
|
|
2600
2852
|
];
|
|
2601
2853
|
}
|
|
2602
2854
|
async function auditSite(targetUrl) {
|
|
@@ -2620,11 +2872,11 @@ var WEIGHTS = {
|
|
|
2620
2872
|
// Information density per page
|
|
2621
2873
|
direct_answer_density: 0.05,
|
|
2622
2874
|
// Direct answers to queries
|
|
2623
|
-
qa_content_format: 0.
|
|
2875
|
+
qa_content_format: 0.04,
|
|
2624
2876
|
// Answer-shaped content structure
|
|
2625
|
-
query_answer_alignment: 0.
|
|
2877
|
+
query_answer_alignment: 0.04,
|
|
2626
2878
|
// Relevance to actual AI queries
|
|
2627
|
-
faq_section: 0.
|
|
2879
|
+
faq_section: 0.03,
|
|
2628
2880
|
// Structured Q&A pairs
|
|
2629
2881
|
// ─── Content Organization (~30%) ──────────────────────────────────────────
|
|
2630
2882
|
// HOW easily AI engines can extract and trust your content.
|
|
@@ -2672,8 +2924,13 @@ var WEIGHTS = {
|
|
|
2672
2924
|
// Clear entity boundaries
|
|
2673
2925
|
extraction_friction: 0.02,
|
|
2674
2926
|
// Sentence length, voice, jargon
|
|
2675
|
-
image_context_ai: 0.01
|
|
2927
|
+
image_context_ai: 0.01,
|
|
2676
2928
|
// Figure/figcaption, alt text quality
|
|
2929
|
+
// ─── V3 Criteria ────────────────────────────────────────────────────────
|
|
2930
|
+
duplicate_content: 0.05,
|
|
2931
|
+
// Duplicate text blocks within pages
|
|
2932
|
+
cross_page_duplication: 0.03
|
|
2933
|
+
// Same paragraphs copied across pages
|
|
2677
2934
|
};
|
|
2678
2935
|
function calculateOverallScore(criteria) {
|
|
2679
2936
|
let totalWeight = 0;
|
|
@@ -2803,7 +3060,9 @@ var PILLARS = {
|
|
|
2803
3060
|
"fact_density",
|
|
2804
3061
|
"citation_ready_writing",
|
|
2805
3062
|
"answer_first_placement",
|
|
2806
|
-
"evidence_packaging"
|
|
3063
|
+
"evidence_packaging",
|
|
3064
|
+
"duplicate_content",
|
|
3065
|
+
"cross_page_duplication"
|
|
2807
3066
|
],
|
|
2808
3067
|
"Content Structure": [
|
|
2809
3068
|
"direct_answer_density",
|
|
@@ -2868,6 +3127,8 @@ var CLIENT_NAMES = {
|
|
|
2868
3127
|
image_context_ai: "Image Context for AI",
|
|
2869
3128
|
schema_coverage: "Schema Coverage",
|
|
2870
3129
|
speakable_schema: "Speakable Schema",
|
|
3130
|
+
duplicate_content: "Duplicate Content Blocks",
|
|
3131
|
+
cross_page_duplication: "Cross-Page Duplicate Content",
|
|
2871
3132
|
content_cannibalization: "Content Cannibalization",
|
|
2872
3133
|
llms_txt: "llms.txt File",
|
|
2873
3134
|
robots_txt: "robots.txt for AI",
|
|
@@ -2885,10 +3146,12 @@ var PILLAR_WEIGHTS = {
|
|
|
2885
3146
|
citation_ready_writing: 0.04,
|
|
2886
3147
|
answer_first_placement: 0.03,
|
|
2887
3148
|
evidence_packaging: 0.03,
|
|
3149
|
+
duplicate_content: 0.05,
|
|
3150
|
+
cross_page_duplication: 0.03,
|
|
2888
3151
|
direct_answer_density: 0.05,
|
|
2889
|
-
qa_content_format: 0.
|
|
2890
|
-
query_answer_alignment: 0.
|
|
2891
|
-
faq_section: 0.
|
|
3152
|
+
qa_content_format: 0.04,
|
|
3153
|
+
query_answer_alignment: 0.04,
|
|
3154
|
+
faq_section: 0.03,
|
|
2892
3155
|
table_list_extractability: 0.03,
|
|
2893
3156
|
definition_patterns: 0.02,
|
|
2894
3157
|
entity_disambiguation: 0.02,
|
|
@@ -2921,6 +3184,8 @@ var CRITERION_EFFORT = {
|
|
|
2921
3184
|
citation_ready_writing: "Medium",
|
|
2922
3185
|
answer_first_placement: "Medium",
|
|
2923
3186
|
evidence_packaging: "Medium",
|
|
3187
|
+
duplicate_content: "Medium",
|
|
3188
|
+
cross_page_duplication: "Medium",
|
|
2924
3189
|
direct_answer_density: "Medium",
|
|
2925
3190
|
qa_content_format: "Medium",
|
|
2926
3191
|
query_answer_alignment: "Medium",
|
|
@@ -2976,6 +3241,8 @@ var FIX_DESCRIPTIONS = {
|
|
|
2976
3241
|
image_context_ai: "Wrap images in <figure>/<figcaption> with descriptive alt text.",
|
|
2977
3242
|
schema_coverage: "Extend structured data to inner pages (articles, services, products).",
|
|
2978
3243
|
speakable_schema: "Add SpeakableSpecification schema for voice assistant compatibility.",
|
|
3244
|
+
duplicate_content: "Rewrite duplicate text blocks so each section provides unique value.",
|
|
3245
|
+
cross_page_duplication: "Rewrite shared paragraphs across pages so each page has unique content.",
|
|
2979
3246
|
content_cannibalization: "Consolidate overlapping pages or differentiate titles and H1 headings.",
|
|
2980
3247
|
llms_txt: "Create a /llms.txt file describing your site for AI engines.",
|
|
2981
3248
|
robots_txt: "Update robots.txt to explicitly allow AI crawlers.",
|
|
@@ -3071,7 +3338,9 @@ var CRITERION_LABELS = {
|
|
|
3071
3338
|
"Evidence Packaging": "Evidence Packaging",
|
|
3072
3339
|
"Entity Disambiguation": "Entity Disambiguation",
|
|
3073
3340
|
"Extraction Friction Score": "Extraction Friction Score",
|
|
3074
|
-
"Image Context for AI": "Image Context for AI"
|
|
3341
|
+
"Image Context for AI": "Image Context for AI",
|
|
3342
|
+
"Duplicate Content Blocks": "Duplicate Content Blocks",
|
|
3343
|
+
"Cross-Page Duplicate Content": "Cross-Page Duplicate Content"
|
|
3075
3344
|
};
|
|
3076
3345
|
function scoreToStatus(score) {
|
|
3077
3346
|
if (score === 0) return "MISSING";
|
|
@@ -3166,9 +3435,9 @@ var CRITERION_WEIGHTS = {
|
|
|
3166
3435
|
content_depth: 0.07,
|
|
3167
3436
|
fact_density: 0.06,
|
|
3168
3437
|
direct_answer_density: 0.05,
|
|
3169
|
-
qa_content_format: 0.
|
|
3170
|
-
query_answer_alignment: 0.
|
|
3171
|
-
faq_section: 0.
|
|
3438
|
+
qa_content_format: 0.04,
|
|
3439
|
+
query_answer_alignment: 0.04,
|
|
3440
|
+
faq_section: 0.03,
|
|
3172
3441
|
// Content Organization (~30%)
|
|
3173
3442
|
entity_consistency: 0.05,
|
|
3174
3443
|
internal_linking: 0.04,
|
|
@@ -3197,7 +3466,10 @@ var CRITERION_WEIGHTS = {
|
|
|
3197
3466
|
evidence_packaging: 0.03,
|
|
3198
3467
|
entity_disambiguation: 0.02,
|
|
3199
3468
|
extraction_friction: 0.02,
|
|
3200
|
-
image_context_ai: 0.01
|
|
3469
|
+
image_context_ai: 0.01,
|
|
3470
|
+
// V3 Criteria
|
|
3471
|
+
duplicate_content: 0.05,
|
|
3472
|
+
cross_page_duplication: 0.03
|
|
3201
3473
|
};
|
|
3202
3474
|
var OPPORTUNITY_TEMPLATES = {
|
|
3203
3475
|
llms_txt: {
|
|
@@ -3320,6 +3592,16 @@ var OPPORTUNITY_TEMPLATES = {
|
|
|
3320
3592
|
effort: "Medium",
|
|
3321
3593
|
description: "Ensure every question-format heading (H2/H3) is followed by a direct answer paragraph. This pattern is ideal for AI engine snippet extraction."
|
|
3322
3594
|
},
|
|
3595
|
+
duplicate_content: {
|
|
3596
|
+
name: "Fix Duplicate Content Blocks",
|
|
3597
|
+
effort: "Medium",
|
|
3598
|
+
description: "Sections within pages contain identical or near-identical text. LLMs may flag this as low-quality or thin content, reducing citation authority. Rewrite duplicate blocks with unique angles."
|
|
3599
|
+
},
|
|
3600
|
+
cross_page_duplication: {
|
|
3601
|
+
name: "Eliminate Cross-Page Duplicate Content",
|
|
3602
|
+
effort: "Medium",
|
|
3603
|
+
description: "The same paragraphs appear on multiple pages. AI engines may only index one version and ignore the rest. Rewrite shared content so each page offers a unique perspective."
|
|
3604
|
+
},
|
|
3323
3605
|
content_cannibalization: {
|
|
3324
3606
|
name: "Resolve Content Cannibalization",
|
|
3325
3607
|
effort: "Medium",
|
|
@@ -3728,9 +4010,9 @@ var PAGE_CRITERIA = {
|
|
|
3728
4010
|
original_data: { weight: 0.1, label: "Original Data & Expert Content" },
|
|
3729
4011
|
fact_density: { weight: 0.06, label: "Fact & Data Density" },
|
|
3730
4012
|
direct_answer_density: { weight: 0.05, label: "Direct Answer Paragraphs" },
|
|
3731
|
-
qa_content_format: { weight: 0.
|
|
3732
|
-
query_answer_alignment: { weight: 0.
|
|
3733
|
-
faq_section: { weight: 0.
|
|
4013
|
+
qa_content_format: { weight: 0.04, label: "Q&A Content Format" },
|
|
4014
|
+
query_answer_alignment: { weight: 0.04, label: "Query-Answer Alignment" },
|
|
4015
|
+
faq_section: { weight: 0.03, label: "FAQ Section Content" },
|
|
3734
4016
|
// Content Organization
|
|
3735
4017
|
content_freshness: { weight: 0.04, label: "Content Freshness Signals" },
|
|
3736
4018
|
schema_markup: { weight: 0.03, label: "Schema.org Structured Data" },
|
|
@@ -3747,7 +4029,8 @@ var PAGE_CRITERIA = {
|
|
|
3747
4029
|
evidence_packaging: { weight: 0.03, label: "Evidence Packaging" },
|
|
3748
4030
|
entity_disambiguation: { weight: 0.02, label: "Entity Disambiguation" },
|
|
3749
4031
|
extraction_friction: { weight: 0.02, label: "Extraction Friction Score" },
|
|
3750
|
-
image_context_ai: { weight: 0.01, label: "Image Context for AI" }
|
|
4032
|
+
image_context_ai: { weight: 0.01, label: "Image Context for AI" },
|
|
4033
|
+
duplicate_content: { weight: 0.05, label: "Duplicate Content Blocks" }
|
|
3751
4034
|
};
|
|
3752
4035
|
function extractJsonLdBlocks(html) {
|
|
3753
4036
|
const blocks = [];
|
|
@@ -4196,6 +4479,48 @@ function scoreImageContextAI(html) {
|
|
|
4196
4479
|
if (contextualImages.length > 0) score += 3;
|
|
4197
4480
|
return cap(score, 10);
|
|
4198
4481
|
}
|
|
4482
|
+
function scoreDuplicateContent(html) {
|
|
4483
|
+
return scoreDuplicateContentDetailed(html).score;
|
|
4484
|
+
}
|
|
4485
|
+
function scoreDuplicateContentDetailed(html) {
|
|
4486
|
+
const sections = extractDuplicateContentSections(html);
|
|
4487
|
+
if (sections.length < 2) return { score: 10, duplicates: [] };
|
|
4488
|
+
const totalParagraphs = sections.reduce((sum, s) => sum + s.paragraphs.length, 0);
|
|
4489
|
+
const duplicates = [];
|
|
4490
|
+
let dupParagraphCount = 0;
|
|
4491
|
+
for (let i = 0; i < sections.length; i++) {
|
|
4492
|
+
for (let j = i + 1; j < sections.length; j++) {
|
|
4493
|
+
for (const pA of sections[i].paragraphs) {
|
|
4494
|
+
for (const pB of sections[j].paragraphs) {
|
|
4495
|
+
const sim = shingleJaccardSimilarity(pA.shingles, pB.shingles);
|
|
4496
|
+
if (sim > 0.4) {
|
|
4497
|
+
dupParagraphCount++;
|
|
4498
|
+
duplicates.push({
|
|
4499
|
+
headingA: sections[i].heading,
|
|
4500
|
+
headingB: sections[j].heading,
|
|
4501
|
+
similarity: Math.round(sim * 100),
|
|
4502
|
+
sample: pA.text.slice(0, 80)
|
|
4503
|
+
});
|
|
4504
|
+
break;
|
|
4505
|
+
}
|
|
4506
|
+
}
|
|
4507
|
+
}
|
|
4508
|
+
}
|
|
4509
|
+
}
|
|
4510
|
+
if (dupParagraphCount === 0) return { score: 10, duplicates: [] };
|
|
4511
|
+
const dupRatio = totalParagraphs > 0 ? dupParagraphCount / totalParagraphs : 0;
|
|
4512
|
+
let score;
|
|
4513
|
+
if (dupParagraphCount === 1 && dupRatio <= 0.05) {
|
|
4514
|
+
score = 6;
|
|
4515
|
+
} else if (dupParagraphCount === 1) {
|
|
4516
|
+
score = 4;
|
|
4517
|
+
} else if (dupParagraphCount === 2) {
|
|
4518
|
+
score = 2;
|
|
4519
|
+
} else {
|
|
4520
|
+
score = 0;
|
|
4521
|
+
}
|
|
4522
|
+
return { score, duplicates };
|
|
4523
|
+
}
|
|
4199
4524
|
var SCORING_FUNCTIONS = {
|
|
4200
4525
|
schema_markup: scoreSchemaMarkup,
|
|
4201
4526
|
qa_content_format: scoreQAFormat,
|
|
@@ -4216,7 +4541,8 @@ var SCORING_FUNCTIONS = {
|
|
|
4216
4541
|
evidence_packaging: scoreEvidencePackaging,
|
|
4217
4542
|
entity_disambiguation: scoreEntityDisambiguation,
|
|
4218
4543
|
extraction_friction: scoreExtractionFriction,
|
|
4219
|
-
image_context_ai: scoreImageContextAI
|
|
4544
|
+
image_context_ai: scoreImageContextAI,
|
|
4545
|
+
duplicate_content: scoreDuplicateContent
|
|
4220
4546
|
};
|
|
4221
4547
|
function scorePage(html, url) {
|
|
4222
4548
|
let totalWeight = 0;
|
|
@@ -4230,6 +4556,11 @@ function scorePage(html, url) {
|
|
|
4230
4556
|
totalWeight += weight;
|
|
4231
4557
|
}
|
|
4232
4558
|
let aeoScore = totalWeight === 0 ? 0 : Math.round(weightedSum / totalWeight);
|
|
4559
|
+
const dupScore = criterionScores.find((c) => c.criterion === "duplicate_content")?.score ?? 10;
|
|
4560
|
+
if (dupScore <= 6) {
|
|
4561
|
+
const dupCap = 35 + dupScore * 5;
|
|
4562
|
+
aeoScore = Math.min(aeoScore, dupCap);
|
|
4563
|
+
}
|
|
4233
4564
|
const scoreCapped = aeoScore > 75;
|
|
4234
4565
|
if (scoreCapped) aeoScore = 75;
|
|
4235
4566
|
return { aeoScore, criterionScores, scoreCapped };
|
|
@@ -4449,6 +4780,15 @@ function checkHasCitationReadyContent(html) {
|
|
|
4449
4780
|
}
|
|
4450
4781
|
return null;
|
|
4451
4782
|
}
|
|
4783
|
+
function checkDuplicateContentBlocks(html) {
|
|
4784
|
+
const { score, duplicates } = scoreDuplicateContentDetailed(html);
|
|
4785
|
+
if (score <= 6 && duplicates.length > 0) {
|
|
4786
|
+
const first = duplicates[0];
|
|
4787
|
+
const label = duplicates.length === 1 ? `Duplicate content: '${first.headingA}' and '${first.headingB}' share ${first.similarity}% similar text ("${first.sample}...")` : `${duplicates.length} duplicate blocks found (e.g. '${first.headingA}' and '${first.headingB}' \u2014 "${first.sample}...")`;
|
|
4788
|
+
return { check: "duplicate-content", label, severity: score <= 3 ? "error" : "warning" };
|
|
4789
|
+
}
|
|
4790
|
+
return null;
|
|
4791
|
+
}
|
|
4452
4792
|
function analyzePage(html, url, category) {
|
|
4453
4793
|
const title = extractTitle(html);
|
|
4454
4794
|
const textContent = getTextContent2(html);
|
|
@@ -4467,7 +4807,8 @@ function analyzePage(html, url, category) {
|
|
|
4467
4807
|
checkImagesMissingAlt(html),
|
|
4468
4808
|
checkNoInternalLinks(html, url),
|
|
4469
4809
|
checkNoAnswerBlock(html),
|
|
4470
|
-
checkNoEvidence(html, url)
|
|
4810
|
+
checkNoEvidence(html, url),
|
|
4811
|
+
checkDuplicateContentBlocks(html)
|
|
4471
4812
|
];
|
|
4472
4813
|
for (const result of issueChecks) {
|
|
4473
4814
|
if (result) issues.push(result);
|
|
@@ -4830,9 +5171,9 @@ var CRITERION_WEIGHTS2 = {
|
|
|
4830
5171
|
content_depth: 0.07,
|
|
4831
5172
|
fact_density: 0.06,
|
|
4832
5173
|
direct_answer_density: 0.05,
|
|
4833
|
-
qa_content_format: 0.
|
|
4834
|
-
query_answer_alignment: 0.
|
|
4835
|
-
faq_section: 0.
|
|
5174
|
+
qa_content_format: 0.04,
|
|
5175
|
+
query_answer_alignment: 0.04,
|
|
5176
|
+
faq_section: 0.03,
|
|
4836
5177
|
// Content Organization (~30%)
|
|
4837
5178
|
entity_consistency: 0.05,
|
|
4838
5179
|
internal_linking: 0.04,
|
|
@@ -4846,6 +5187,8 @@ var CRITERION_WEIGHTS2 = {
|
|
|
4846
5187
|
clean_html: 0.02,
|
|
4847
5188
|
// Technical Plumbing (~15%)
|
|
4848
5189
|
content_cannibalization: 0.02,
|
|
5190
|
+
duplicate_content: 0.05,
|
|
5191
|
+
cross_page_duplication: 0.03,
|
|
4849
5192
|
llms_txt: 0.02,
|
|
4850
5193
|
robots_txt: 0.02,
|
|
4851
5194
|
content_velocity: 0.02,
|
|
@@ -4890,7 +5233,9 @@ var PHASE_CONFIG = [
|
|
|
4890
5233
|
"citation_ready_writing",
|
|
4891
5234
|
"answer_first_placement",
|
|
4892
5235
|
"evidence_packaging",
|
|
4893
|
-
"entity_disambiguation"
|
|
5236
|
+
"entity_disambiguation",
|
|
5237
|
+
"duplicate_content",
|
|
5238
|
+
"cross_page_duplication"
|
|
4894
5239
|
]
|
|
4895
5240
|
},
|
|
4896
5241
|
{
|
|
@@ -5770,6 +6115,66 @@ Summarization: yes`,
|
|
|
5770
6115
|
}
|
|
5771
6116
|
return fixes;
|
|
5772
6117
|
},
|
|
6118
|
+
duplicate_content: (c, pages) => {
|
|
6119
|
+
if (c.score >= 10) return [];
|
|
6120
|
+
const impact = impactFromScore(c.score);
|
|
6121
|
+
const effort = effortForCriterion("duplicate_content", c.score);
|
|
6122
|
+
const affected = getAffectedPages("duplicate_content", pages);
|
|
6123
|
+
const sectionPairs = c.findings.filter((f) => f.detail.includes("' and '")).map((f) => {
|
|
6124
|
+
const match = f.detail.match(/'([^']+)' and '([^']+)'/);
|
|
6125
|
+
return match ? { a: match[1], b: match[2] } : null;
|
|
6126
|
+
}).filter(Boolean);
|
|
6127
|
+
const steps = [
|
|
6128
|
+
"Identify sections with duplicate or near-identical text",
|
|
6129
|
+
"Rewrite each section to provide a unique angle on the topic",
|
|
6130
|
+
"Ensure each heading section adds new information for the reader"
|
|
6131
|
+
];
|
|
6132
|
+
if (sectionPairs.length > 0) {
|
|
6133
|
+
const pair = sectionPairs[0];
|
|
6134
|
+
steps.unshift(`Start with '${pair.a}' and '${pair.b}' which share similar text`);
|
|
6135
|
+
}
|
|
6136
|
+
return [{
|
|
6137
|
+
id: "fix-duplicate-content",
|
|
6138
|
+
criterion: c.criterion_label,
|
|
6139
|
+
criterionId: c.criterion,
|
|
6140
|
+
title: "Fix duplicate content blocks",
|
|
6141
|
+
description: "Sections within pages contain identical or near-identical text. LLMs may flag this as low-quality content, reducing the authority of the page.",
|
|
6142
|
+
impact,
|
|
6143
|
+
effort,
|
|
6144
|
+
impactScore: 0,
|
|
6145
|
+
category: "content",
|
|
6146
|
+
steps,
|
|
6147
|
+
successCriteria: "Each section within a page provides unique content",
|
|
6148
|
+
affectedPages: affected,
|
|
6149
|
+
pageCount: affected?.length
|
|
6150
|
+
}];
|
|
6151
|
+
},
|
|
6152
|
+
cross_page_duplication: (c, pages) => {
|
|
6153
|
+
if (c.score >= 10) return [];
|
|
6154
|
+
const impact = impactFromScore(c.score);
|
|
6155
|
+
const effort = effortForCriterion("cross_page_duplication", c.score);
|
|
6156
|
+
const affected = getAffectedPages("cross_page_duplication", pages);
|
|
6157
|
+
return [{
|
|
6158
|
+
id: "fix-cross-page-duplication",
|
|
6159
|
+
criterion: c.criterion_label,
|
|
6160
|
+
criterionId: c.criterion,
|
|
6161
|
+
title: "Eliminate cross-page duplicate content",
|
|
6162
|
+
description: "The same paragraphs appear on multiple pages. AI engines may only index one version, wasting the others.",
|
|
6163
|
+
impact,
|
|
6164
|
+
effort,
|
|
6165
|
+
impactScore: 0,
|
|
6166
|
+
category: "content",
|
|
6167
|
+
steps: [
|
|
6168
|
+
"Identify paragraphs that are copy-pasted across multiple pages",
|
|
6169
|
+
"Rewrite each instance to provide a unique angle relevant to that page",
|
|
6170
|
+
"Move truly shared content to a single resource page and link to it",
|
|
6171
|
+
"Use canonical tags if pages must share content"
|
|
6172
|
+
],
|
|
6173
|
+
successCriteria: "Each page has unique body content with no copy-pasted paragraphs",
|
|
6174
|
+
affectedPages: affected,
|
|
6175
|
+
pageCount: affected?.length
|
|
6176
|
+
}];
|
|
6177
|
+
},
|
|
5773
6178
|
visible_date_signal: (c, pages) => {
|
|
5774
6179
|
if (c.score >= 10) return [];
|
|
5775
6180
|
const impact = impactFromScore(c.score);
|