aeorank 3.0.3 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -23
- package/dist/browser.d.ts +3 -3
- package/dist/browser.js +450 -24
- package/dist/browser.js.map +1 -1
- package/dist/cli.js +386 -20
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +450 -24
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +3 -3
- package/dist/index.d.ts +3 -3
- package/dist/index.js +450 -24
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
package/dist/browser.js
CHANGED
|
@@ -2558,6 +2558,234 @@ function checkImageContextAI(data) {
|
|
|
2558
2558
|
}
|
|
2559
2559
|
return { criterion: "image_context_ai", criterion_label: "Image Context for AI", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P2" };
|
|
2560
2560
|
}
|
|
2561
|
+
var BOILERPLATE_RE = /\b(sign up|subscribe|get started|contact us|request a demo|free trial|book a call|schedule a|learn more|click here|follow us|share this|copyright|all rights reserved|privacy policy|terms of service)\b/i;
|
|
2562
|
+
function isBoilerplateParagraph(text) {
|
|
2563
|
+
const words = text.split(/\s+/).length;
|
|
2564
|
+
if (words < 20 && BOILERPLATE_RE.test(text)) return true;
|
|
2565
|
+
if (/\b(cookie|gdpr|consent|opt.out)\b/i.test(text) && words < 30) return true;
|
|
2566
|
+
return false;
|
|
2567
|
+
}
|
|
2568
|
+
function toShingles(text, n = 4) {
|
|
2569
|
+
const words = text.split(/\s+/).filter((w) => w.length > 1);
|
|
2570
|
+
const shingles = /* @__PURE__ */ new Set();
|
|
2571
|
+
for (let i = 0; i <= words.length - n; i++) {
|
|
2572
|
+
shingles.add(words.slice(i, i + n).join(" "));
|
|
2573
|
+
}
|
|
2574
|
+
return shingles;
|
|
2575
|
+
}
|
|
2576
|
+
function shingleSimilarity(a, b) {
|
|
2577
|
+
if (a.size === 0 && b.size === 0) return 0;
|
|
2578
|
+
let intersection = 0;
|
|
2579
|
+
for (const s of a) {
|
|
2580
|
+
if (b.has(s)) intersection++;
|
|
2581
|
+
}
|
|
2582
|
+
const union = a.size + b.size - intersection;
|
|
2583
|
+
return union === 0 ? 0 : intersection / union;
|
|
2584
|
+
}
|
|
2585
|
+
function extractPageParagraphs(html) {
|
|
2586
|
+
const cleaned = html.replace(/<(script|style|nav|header|footer|noscript)\b[^>]*>[\s\S]*?<\/\1>/gi, "").replace(/<aside\b[^>]*>[\s\S]*?<\/aside>/gi, "");
|
|
2587
|
+
const pMatches = cleaned.match(/<p\b[^>]*>([\s\S]*?)<\/p>/gi) || [];
|
|
2588
|
+
return pMatches.map((p) => {
|
|
2589
|
+
const text = p.replace(/<[^>]*>/g, " ").replace(/&\w+;/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
|
|
2590
|
+
return { text, shingles: toShingles(text) };
|
|
2591
|
+
}).filter((p) => p.shingles.size >= 3 && !isBoilerplateParagraph(p.text));
|
|
2592
|
+
}
|
|
2593
|
+
function splitIntoSectionsWithParagraphs(html) {
|
|
2594
|
+
const cleaned = html.replace(/<(script|style|nav|header|footer|noscript)\b[^>]*>[\s\S]*?<\/\1>/gi, "").replace(/<aside\b[^>]*>[\s\S]*?<\/aside>/gi, "");
|
|
2595
|
+
const parts = cleaned.split(/(?=<h[23]\b[^>]*>)/i);
|
|
2596
|
+
const sections = [];
|
|
2597
|
+
for (const part of parts) {
|
|
2598
|
+
const hMatch = part.match(/<h[23]\b[^>]*>([\s\S]*?)<\/h[23]>/i);
|
|
2599
|
+
const heading = hMatch ? hMatch[1].replace(/<[^>]*>/g, "").trim() : "(intro)";
|
|
2600
|
+
const pMatches = part.match(/<p\b[^>]*>([\s\S]*?)<\/p>/gi) || [];
|
|
2601
|
+
const paragraphs = pMatches.map((p) => {
|
|
2602
|
+
const text = p.replace(/<[^>]*>/g, " ").replace(/&\w+;/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
|
|
2603
|
+
return { text, shingles: toShingles(text) };
|
|
2604
|
+
}).filter((p) => p.shingles.size >= 3 && !isBoilerplateParagraph(p.text));
|
|
2605
|
+
if (paragraphs.length > 0) sections.push({ heading, paragraphs });
|
|
2606
|
+
}
|
|
2607
|
+
return sections;
|
|
2608
|
+
}
|
|
2609
|
+
function findIntraPageDuplicates(html) {
|
|
2610
|
+
const sections = splitIntoSectionsWithParagraphs(html);
|
|
2611
|
+
if (sections.length < 2) return [];
|
|
2612
|
+
const pairs = [];
|
|
2613
|
+
for (let i = 0; i < sections.length; i++) {
|
|
2614
|
+
for (let j = i + 1; j < sections.length; j++) {
|
|
2615
|
+
let found = false;
|
|
2616
|
+
for (const pA of sections[i].paragraphs) {
|
|
2617
|
+
if (found) break;
|
|
2618
|
+
for (const pB of sections[j].paragraphs) {
|
|
2619
|
+
const sim = shingleSimilarity(pA.shingles, pB.shingles);
|
|
2620
|
+
if (sim > 0.4) {
|
|
2621
|
+
pairs.push({
|
|
2622
|
+
headingA: sections[i].heading,
|
|
2623
|
+
headingB: sections[j].heading,
|
|
2624
|
+
similarity: Math.round(sim * 100),
|
|
2625
|
+
sample: pA.text.slice(0, 80)
|
|
2626
|
+
});
|
|
2627
|
+
found = true;
|
|
2628
|
+
break;
|
|
2629
|
+
}
|
|
2630
|
+
}
|
|
2631
|
+
}
|
|
2632
|
+
}
|
|
2633
|
+
}
|
|
2634
|
+
return pairs;
|
|
2635
|
+
}
|
|
2636
|
+
function checkDuplicateContent(data) {
|
|
2637
|
+
const findings = [];
|
|
2638
|
+
const pages = [];
|
|
2639
|
+
if (data.homepage) {
|
|
2640
|
+
pages.push({ html: data.homepage.text, url: data.homepage.finalUrl || `https://${data.domain}/` });
|
|
2641
|
+
}
|
|
2642
|
+
if (data.blogSample) {
|
|
2643
|
+
for (const page of data.blogSample) {
|
|
2644
|
+
pages.push({ html: page.text, url: page.finalUrl || "" });
|
|
2645
|
+
}
|
|
2646
|
+
}
|
|
2647
|
+
if (pages.length === 0) {
|
|
2648
|
+
findings.push({ severity: "critical", detail: "No pages available for duplicate content analysis" });
|
|
2649
|
+
return { criterion: "duplicate_content", criterion_label: "Duplicate Content Blocks", score: 0, status: "fail", findings, fix_priority: "P1" };
|
|
2650
|
+
}
|
|
2651
|
+
let totalDupPages = 0;
|
|
2652
|
+
let totalDupPairs = 0;
|
|
2653
|
+
const dupDetails = [];
|
|
2654
|
+
for (const page of pages) {
|
|
2655
|
+
const pairs = findIntraPageDuplicates(page.html);
|
|
2656
|
+
if (pairs.length > 0) {
|
|
2657
|
+
totalDupPages++;
|
|
2658
|
+
totalDupPairs += pairs.length;
|
|
2659
|
+
dupDetails.push({ url: page.url, pairs });
|
|
2660
|
+
}
|
|
2661
|
+
}
|
|
2662
|
+
const dupRatio = totalDupPages / pages.length;
|
|
2663
|
+
let score;
|
|
2664
|
+
if (totalDupPairs === 0) {
|
|
2665
|
+
score = 10;
|
|
2666
|
+
findings.push({ severity: "info", detail: `${pages.length} pages analyzed - no duplicate content blocks detected` });
|
|
2667
|
+
} else if (dupRatio <= 0.05 && totalDupPairs <= 2) {
|
|
2668
|
+
score = 9;
|
|
2669
|
+
findings.push({ severity: "info", detail: `${totalDupPairs} duplicate block pair(s) on ${totalDupPages} page(s) - minor` });
|
|
2670
|
+
} else if (dupRatio <= 0.1) {
|
|
2671
|
+
score = 7;
|
|
2672
|
+
findings.push({ severity: "low", detail: `${totalDupPairs} duplicate block pair(s) across ${totalDupPages} page(s)`, fix: "Rewrite duplicate sections to provide unique content in each" });
|
|
2673
|
+
} else if (dupRatio <= 0.2) {
|
|
2674
|
+
score = 5;
|
|
2675
|
+
findings.push({ severity: "medium", detail: `${totalDupPages} pages (${Math.round(dupRatio * 100)}%) contain duplicate content blocks`, fix: "Rewrite or remove repeated text blocks - LLMs may flag this as low-quality content" });
|
|
2676
|
+
} else if (dupRatio <= 0.4) {
|
|
2677
|
+
score = 3;
|
|
2678
|
+
findings.push({ severity: "medium", detail: `${totalDupPages} pages (${Math.round(dupRatio * 100)}%) have significant duplicate content`, fix: "Widespread duplicate blocks reduce content authority - rewrite each section with unique angles" });
|
|
2679
|
+
} else {
|
|
2680
|
+
score = 0;
|
|
2681
|
+
findings.push({ severity: "high", detail: `${totalDupPages} pages (${Math.round(dupRatio * 100)}%) contain duplicate content blocks`, fix: "Severe content duplication across the site - LLMs will likely reduce citation authority" });
|
|
2682
|
+
}
|
|
2683
|
+
for (const dup of dupDetails.slice(0, 3)) {
|
|
2684
|
+
const shortUrl = dup.url.slice(0, 60);
|
|
2685
|
+
for (const pair of dup.pairs.slice(0, 2)) {
|
|
2686
|
+
findings.push({
|
|
2687
|
+
severity: "low",
|
|
2688
|
+
detail: `${shortUrl}: '${pair.headingA}' and '${pair.headingB}' share ${pair.similarity}% similar text ("${pair.sample}...")`,
|
|
2689
|
+
fix: `Rewrite one of these sections to eliminate duplicate content`
|
|
2690
|
+
});
|
|
2691
|
+
}
|
|
2692
|
+
}
|
|
2693
|
+
return { criterion: "duplicate_content", criterion_label: "Duplicate Content Blocks", score, status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P1" };
|
|
2694
|
+
}
|
|
2695
|
+
function checkCrossPageDuplication(data) {
|
|
2696
|
+
const findings = [];
|
|
2697
|
+
const pages = [];
|
|
2698
|
+
if (data.homepage) {
|
|
2699
|
+
pages.push({ url: data.homepage.finalUrl || `https://${data.domain}/`, paragraphs: extractPageParagraphs(data.homepage.text) });
|
|
2700
|
+
}
|
|
2701
|
+
if (data.blogSample) {
|
|
2702
|
+
for (const page of data.blogSample) {
|
|
2703
|
+
pages.push({ url: page.finalUrl || "", paragraphs: extractPageParagraphs(page.text) });
|
|
2704
|
+
}
|
|
2705
|
+
}
|
|
2706
|
+
if (pages.length <= 1) {
|
|
2707
|
+
findings.push({ severity: "info", detail: "Not enough pages to assess cross-page duplication" });
|
|
2708
|
+
return { criterion: "cross_page_duplication", criterion_label: "Cross-Page Duplicate Content", score: 5, status: "partial", findings, fix_priority: "P3" };
|
|
2709
|
+
}
|
|
2710
|
+
const paragraphPageCount = /* @__PURE__ */ new Map();
|
|
2711
|
+
for (const page of pages) {
|
|
2712
|
+
const seen = /* @__PURE__ */ new Set();
|
|
2713
|
+
for (const p of page.paragraphs) {
|
|
2714
|
+
const fp = [...p.shingles].slice(0, 5).join("|");
|
|
2715
|
+
if (!seen.has(fp)) {
|
|
2716
|
+
seen.add(fp);
|
|
2717
|
+
paragraphPageCount.set(fp, (paragraphPageCount.get(fp) || 0) + 1);
|
|
2718
|
+
}
|
|
2719
|
+
}
|
|
2720
|
+
}
|
|
2721
|
+
const boilerplateThreshold = Math.max(3, pages.length * 0.4);
|
|
2722
|
+
const siteBoilerprints = /* @__PURE__ */ new Set();
|
|
2723
|
+
for (const [fp, count] of paragraphPageCount) {
|
|
2724
|
+
if (count >= boilerplateThreshold) siteBoilerprints.add(fp);
|
|
2725
|
+
}
|
|
2726
|
+
const crossDupPairs = [];
|
|
2727
|
+
for (let i = 0; i < pages.length; i++) {
|
|
2728
|
+
for (let j = i + 1; j < pages.length; j++) {
|
|
2729
|
+
let dupCount = 0;
|
|
2730
|
+
let sample = "";
|
|
2731
|
+
for (const pA of pages[i].paragraphs) {
|
|
2732
|
+
const fpA = [...pA.shingles].slice(0, 5).join("|");
|
|
2733
|
+
if (siteBoilerprints.has(fpA)) continue;
|
|
2734
|
+
for (const pB of pages[j].paragraphs) {
|
|
2735
|
+
const sim = shingleSimilarity(pA.shingles, pB.shingles);
|
|
2736
|
+
if (sim > 0.4) {
|
|
2737
|
+
dupCount++;
|
|
2738
|
+
if (!sample) sample = pA.text.slice(0, 80);
|
|
2739
|
+
break;
|
|
2740
|
+
}
|
|
2741
|
+
}
|
|
2742
|
+
}
|
|
2743
|
+
if (dupCount >= 2) {
|
|
2744
|
+
crossDupPairs.push({
|
|
2745
|
+
urlA: pages[i].url.slice(0, 60),
|
|
2746
|
+
urlB: pages[j].url.slice(0, 60),
|
|
2747
|
+
dupCount,
|
|
2748
|
+
sample
|
|
2749
|
+
});
|
|
2750
|
+
}
|
|
2751
|
+
}
|
|
2752
|
+
}
|
|
2753
|
+
const affectedUrls = /* @__PURE__ */ new Set();
|
|
2754
|
+
for (const pair of crossDupPairs) {
|
|
2755
|
+
affectedUrls.add(pair.urlA);
|
|
2756
|
+
affectedUrls.add(pair.urlB);
|
|
2757
|
+
}
|
|
2758
|
+
const affectedRatio = pages.length > 0 ? affectedUrls.size / pages.length : 0;
|
|
2759
|
+
const totalDupParagraphs = crossDupPairs.reduce((s, p) => s + p.dupCount, 0);
|
|
2760
|
+
let score;
|
|
2761
|
+
if (crossDupPairs.length === 0) {
|
|
2762
|
+
score = 10;
|
|
2763
|
+
findings.push({ severity: "info", detail: `${pages.length} pages analyzed - no cross-page content duplication detected` });
|
|
2764
|
+
} else if (affectedRatio <= 0.05 && totalDupParagraphs <= 4) {
|
|
2765
|
+
score = 9;
|
|
2766
|
+
findings.push({ severity: "info", detail: `${totalDupParagraphs} shared paragraph(s) across ${affectedUrls.size} page(s) - minor` });
|
|
2767
|
+
} else if (affectedRatio <= 0.1) {
|
|
2768
|
+
score = 7;
|
|
2769
|
+
findings.push({ severity: "low", detail: `${totalDupParagraphs} shared paragraphs across ${affectedUrls.size} pages`, fix: "Rewrite shared content so each page provides a unique perspective" });
|
|
2770
|
+
} else if (affectedRatio <= 0.2) {
|
|
2771
|
+
score = 5;
|
|
2772
|
+
findings.push({ severity: "medium", detail: `${affectedUrls.size} pages (${Math.round(affectedRatio * 100)}%) share duplicate paragraphs`, fix: "Significant cross-page duplication - AI engines may only index one version" });
|
|
2773
|
+
} else if (affectedRatio <= 0.4) {
|
|
2774
|
+
score = 3;
|
|
2775
|
+
findings.push({ severity: "medium", detail: `${affectedUrls.size} pages (${Math.round(affectedRatio * 100)}%) contain shared content blocks`, fix: "Widespread copy-paste content across pages reduces overall site authority" });
|
|
2776
|
+
} else {
|
|
2777
|
+
score = 0;
|
|
2778
|
+
findings.push({ severity: "high", detail: `${affectedUrls.size} pages (${Math.round(affectedRatio * 100)}%) share duplicate content`, fix: "Severe cross-page duplication - AI engines will likely ignore redundant pages entirely" });
|
|
2779
|
+
}
|
|
2780
|
+
for (const pair of crossDupPairs.slice(0, 3)) {
|
|
2781
|
+
findings.push({
|
|
2782
|
+
severity: "low",
|
|
2783
|
+
detail: `${pair.dupCount} shared paragraph(s): ${pair.urlA} \u2194 ${pair.urlB} ("${pair.sample}...")`,
|
|
2784
|
+
fix: "Rewrite shared paragraphs so each page has unique content"
|
|
2785
|
+
});
|
|
2786
|
+
}
|
|
2787
|
+
return { criterion: "cross_page_duplication", criterion_label: "Cross-Page Duplicate Content", score, status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P1" };
|
|
2788
|
+
}
|
|
2561
2789
|
function auditSiteFromData(data) {
|
|
2562
2790
|
const topicCoherence = checkTopicCoherence(data);
|
|
2563
2791
|
const cannibalization = checkContentCannibalization(data, topicCoherence.score);
|
|
@@ -2596,7 +2824,10 @@ function auditSiteFromData(data) {
|
|
|
2596
2824
|
checkEvidencePackaging(data),
|
|
2597
2825
|
checkEntityDisambiguation(data),
|
|
2598
2826
|
checkExtractionFriction(data),
|
|
2599
|
-
checkImageContextAI(data)
|
|
2827
|
+
checkImageContextAI(data),
|
|
2828
|
+
// V3 criteria (#35-#36)
|
|
2829
|
+
checkDuplicateContent(data),
|
|
2830
|
+
checkCrossPageDuplication(data)
|
|
2600
2831
|
];
|
|
2601
2832
|
}
|
|
2602
2833
|
async function auditSite(targetUrl) {
|
|
@@ -2620,11 +2851,11 @@ var WEIGHTS = {
|
|
|
2620
2851
|
// Information density per page
|
|
2621
2852
|
direct_answer_density: 0.05,
|
|
2622
2853
|
// Direct answers to queries
|
|
2623
|
-
qa_content_format: 0.
|
|
2854
|
+
qa_content_format: 0.04,
|
|
2624
2855
|
// Answer-shaped content structure
|
|
2625
|
-
query_answer_alignment: 0.
|
|
2856
|
+
query_answer_alignment: 0.04,
|
|
2626
2857
|
// Relevance to actual AI queries
|
|
2627
|
-
faq_section: 0.
|
|
2858
|
+
faq_section: 0.03,
|
|
2628
2859
|
// Structured Q&A pairs
|
|
2629
2860
|
// ─── Content Organization (~30%) ──────────────────────────────────────────
|
|
2630
2861
|
// HOW easily AI engines can extract and trust your content.
|
|
@@ -2672,8 +2903,13 @@ var WEIGHTS = {
|
|
|
2672
2903
|
// Clear entity boundaries
|
|
2673
2904
|
extraction_friction: 0.02,
|
|
2674
2905
|
// Sentence length, voice, jargon
|
|
2675
|
-
image_context_ai: 0.01
|
|
2906
|
+
image_context_ai: 0.01,
|
|
2676
2907
|
// Figure/figcaption, alt text quality
|
|
2908
|
+
// ─── V3 Criteria ────────────────────────────────────────────────────────
|
|
2909
|
+
duplicate_content: 0.05,
|
|
2910
|
+
// Duplicate text blocks within pages
|
|
2911
|
+
cross_page_duplication: 0.03
|
|
2912
|
+
// Same paragraphs copied across pages
|
|
2677
2913
|
};
|
|
2678
2914
|
function calculateOverallScore(criteria) {
|
|
2679
2915
|
let totalWeight = 0;
|
|
@@ -2702,7 +2938,9 @@ var PILLARS = {
|
|
|
2702
2938
|
"fact_density",
|
|
2703
2939
|
"citation_ready_writing",
|
|
2704
2940
|
"answer_first_placement",
|
|
2705
|
-
"evidence_packaging"
|
|
2941
|
+
"evidence_packaging",
|
|
2942
|
+
"duplicate_content",
|
|
2943
|
+
"cross_page_duplication"
|
|
2706
2944
|
],
|
|
2707
2945
|
"Content Structure": [
|
|
2708
2946
|
"direct_answer_density",
|
|
@@ -2767,6 +3005,8 @@ var CLIENT_NAMES = {
|
|
|
2767
3005
|
image_context_ai: "Image Context for AI",
|
|
2768
3006
|
schema_coverage: "Schema Coverage",
|
|
2769
3007
|
speakable_schema: "Speakable Schema",
|
|
3008
|
+
duplicate_content: "Duplicate Content Blocks",
|
|
3009
|
+
cross_page_duplication: "Cross-Page Duplicate Content",
|
|
2770
3010
|
content_cannibalization: "Content Cannibalization",
|
|
2771
3011
|
llms_txt: "llms.txt File",
|
|
2772
3012
|
robots_txt: "robots.txt for AI",
|
|
@@ -2784,10 +3024,12 @@ var PILLAR_WEIGHTS = {
|
|
|
2784
3024
|
citation_ready_writing: 0.04,
|
|
2785
3025
|
answer_first_placement: 0.03,
|
|
2786
3026
|
evidence_packaging: 0.03,
|
|
3027
|
+
duplicate_content: 0.05,
|
|
3028
|
+
cross_page_duplication: 0.03,
|
|
2787
3029
|
direct_answer_density: 0.05,
|
|
2788
|
-
qa_content_format: 0.
|
|
2789
|
-
query_answer_alignment: 0.
|
|
2790
|
-
faq_section: 0.
|
|
3030
|
+
qa_content_format: 0.04,
|
|
3031
|
+
query_answer_alignment: 0.04,
|
|
3032
|
+
faq_section: 0.03,
|
|
2791
3033
|
table_list_extractability: 0.03,
|
|
2792
3034
|
definition_patterns: 0.02,
|
|
2793
3035
|
entity_disambiguation: 0.02,
|
|
@@ -2820,6 +3062,8 @@ var CRITERION_EFFORT = {
|
|
|
2820
3062
|
citation_ready_writing: "Medium",
|
|
2821
3063
|
answer_first_placement: "Medium",
|
|
2822
3064
|
evidence_packaging: "Medium",
|
|
3065
|
+
duplicate_content: "Medium",
|
|
3066
|
+
cross_page_duplication: "Medium",
|
|
2823
3067
|
direct_answer_density: "Medium",
|
|
2824
3068
|
qa_content_format: "Medium",
|
|
2825
3069
|
query_answer_alignment: "Medium",
|
|
@@ -2875,6 +3119,8 @@ var FIX_DESCRIPTIONS = {
|
|
|
2875
3119
|
image_context_ai: "Wrap images in <figure>/<figcaption> with descriptive alt text.",
|
|
2876
3120
|
schema_coverage: "Extend structured data to inner pages (articles, services, products).",
|
|
2877
3121
|
speakable_schema: "Add SpeakableSpecification schema for voice assistant compatibility.",
|
|
3122
|
+
duplicate_content: "Rewrite duplicate text blocks so each section provides unique value.",
|
|
3123
|
+
cross_page_duplication: "Rewrite shared paragraphs across pages so each page has unique content.",
|
|
2878
3124
|
content_cannibalization: "Consolidate overlapping pages or differentiate titles and H1 headings.",
|
|
2879
3125
|
llms_txt: "Create a /llms.txt file describing your site for AI engines.",
|
|
2880
3126
|
robots_txt: "Update robots.txt to explicitly allow AI crawlers.",
|
|
@@ -2970,7 +3216,9 @@ var CRITERION_LABELS = {
|
|
|
2970
3216
|
"Evidence Packaging": "Evidence Packaging",
|
|
2971
3217
|
"Entity Disambiguation": "Entity Disambiguation",
|
|
2972
3218
|
"Extraction Friction Score": "Extraction Friction Score",
|
|
2973
|
-
"Image Context for AI": "Image Context for AI"
|
|
3219
|
+
"Image Context for AI": "Image Context for AI",
|
|
3220
|
+
"Duplicate Content Blocks": "Duplicate Content Blocks",
|
|
3221
|
+
"Cross-Page Duplicate Content": "Cross-Page Duplicate Content"
|
|
2974
3222
|
};
|
|
2975
3223
|
function scoreToStatus(score) {
|
|
2976
3224
|
if (score === 0) return "MISSING";
|
|
@@ -3065,9 +3313,9 @@ var CRITERION_WEIGHTS = {
|
|
|
3065
3313
|
content_depth: 0.07,
|
|
3066
3314
|
fact_density: 0.06,
|
|
3067
3315
|
direct_answer_density: 0.05,
|
|
3068
|
-
qa_content_format: 0.
|
|
3069
|
-
query_answer_alignment: 0.
|
|
3070
|
-
faq_section: 0.
|
|
3316
|
+
qa_content_format: 0.04,
|
|
3317
|
+
query_answer_alignment: 0.04,
|
|
3318
|
+
faq_section: 0.03,
|
|
3071
3319
|
// Content Organization (~30%)
|
|
3072
3320
|
entity_consistency: 0.05,
|
|
3073
3321
|
internal_linking: 0.04,
|
|
@@ -3096,7 +3344,10 @@ var CRITERION_WEIGHTS = {
|
|
|
3096
3344
|
evidence_packaging: 0.03,
|
|
3097
3345
|
entity_disambiguation: 0.02,
|
|
3098
3346
|
extraction_friction: 0.02,
|
|
3099
|
-
image_context_ai: 0.01
|
|
3347
|
+
image_context_ai: 0.01,
|
|
3348
|
+
// V3 Criteria
|
|
3349
|
+
duplicate_content: 0.05,
|
|
3350
|
+
cross_page_duplication: 0.03
|
|
3100
3351
|
};
|
|
3101
3352
|
var OPPORTUNITY_TEMPLATES = {
|
|
3102
3353
|
llms_txt: {
|
|
@@ -3219,6 +3470,16 @@ var OPPORTUNITY_TEMPLATES = {
|
|
|
3219
3470
|
effort: "Medium",
|
|
3220
3471
|
description: "Ensure every question-format heading (H2/H3) is followed by a direct answer paragraph. This pattern is ideal for AI engine snippet extraction."
|
|
3221
3472
|
},
|
|
3473
|
+
duplicate_content: {
|
|
3474
|
+
name: "Fix Duplicate Content Blocks",
|
|
3475
|
+
effort: "Medium",
|
|
3476
|
+
description: "Sections within pages contain identical or near-identical text. LLMs may flag this as low-quality or thin content, reducing citation authority. Rewrite duplicate blocks with unique angles."
|
|
3477
|
+
},
|
|
3478
|
+
cross_page_duplication: {
|
|
3479
|
+
name: "Eliminate Cross-Page Duplicate Content",
|
|
3480
|
+
effort: "Medium",
|
|
3481
|
+
description: "The same paragraphs appear on multiple pages. AI engines may only index one version and ignore the rest. Rewrite shared content so each page offers a unique perspective."
|
|
3482
|
+
},
|
|
3222
3483
|
content_cannibalization: {
|
|
3223
3484
|
name: "Resolve Content Cannibalization",
|
|
3224
3485
|
effort: "Medium",
|
|
@@ -3627,9 +3888,9 @@ var PAGE_CRITERIA = {
|
|
|
3627
3888
|
original_data: { weight: 0.1, label: "Original Data & Expert Content" },
|
|
3628
3889
|
fact_density: { weight: 0.06, label: "Fact & Data Density" },
|
|
3629
3890
|
direct_answer_density: { weight: 0.05, label: "Direct Answer Paragraphs" },
|
|
3630
|
-
qa_content_format: { weight: 0.
|
|
3631
|
-
query_answer_alignment: { weight: 0.
|
|
3632
|
-
faq_section: { weight: 0.
|
|
3891
|
+
qa_content_format: { weight: 0.04, label: "Q&A Content Format" },
|
|
3892
|
+
query_answer_alignment: { weight: 0.04, label: "Query-Answer Alignment" },
|
|
3893
|
+
faq_section: { weight: 0.03, label: "FAQ Section Content" },
|
|
3633
3894
|
// Content Organization
|
|
3634
3895
|
content_freshness: { weight: 0.04, label: "Content Freshness Signals" },
|
|
3635
3896
|
schema_markup: { weight: 0.03, label: "Schema.org Structured Data" },
|
|
@@ -3646,7 +3907,8 @@ var PAGE_CRITERIA = {
|
|
|
3646
3907
|
evidence_packaging: { weight: 0.03, label: "Evidence Packaging" },
|
|
3647
3908
|
entity_disambiguation: { weight: 0.02, label: "Entity Disambiguation" },
|
|
3648
3909
|
extraction_friction: { weight: 0.02, label: "Extraction Friction Score" },
|
|
3649
|
-
image_context_ai: { weight: 0.01, label: "Image Context for AI" }
|
|
3910
|
+
image_context_ai: { weight: 0.01, label: "Image Context for AI" },
|
|
3911
|
+
duplicate_content: { weight: 0.05, label: "Duplicate Content Blocks" }
|
|
3650
3912
|
};
|
|
3651
3913
|
function extractJsonLdBlocks(html) {
|
|
3652
3914
|
const blocks = [];
|
|
@@ -4095,6 +4357,90 @@ function scoreImageContextAI(html) {
|
|
|
4095
4357
|
if (contextualImages.length > 0) score += 3;
|
|
4096
4358
|
return cap(score, 10);
|
|
4097
4359
|
}
|
|
4360
|
+
var BOILERPLATE_PATTERNS = /\b(sign up|subscribe|get started|contact us|request a demo|free trial|book a call|schedule a|learn more|click here|follow us|share this|copyright|all rights reserved|privacy policy|terms of service)\b/i;
|
|
4361
|
+
function isBoilerplate(text) {
|
|
4362
|
+
const words = text.split(/\s+/).length;
|
|
4363
|
+
if (words < 20 && BOILERPLATE_PATTERNS.test(text)) return true;
|
|
4364
|
+
if (/\b(cookie|gdpr|consent|opt.out)\b/i.test(text) && words < 30) return true;
|
|
4365
|
+
return false;
|
|
4366
|
+
}
|
|
4367
|
+
function scoreDuplicateContent(html) {
|
|
4368
|
+
return scoreDuplicateContentDetailed(html).score;
|
|
4369
|
+
}
|
|
4370
|
+
function scoreDuplicateContentDetailed(html) {
|
|
4371
|
+
const sections = extractSectionsWithParagraphs(html);
|
|
4372
|
+
if (sections.length < 2) return { score: 10, duplicates: [] };
|
|
4373
|
+
const totalParagraphs = sections.reduce((sum, s) => sum + s.paragraphs.length, 0);
|
|
4374
|
+
const duplicates = [];
|
|
4375
|
+
let dupParagraphCount = 0;
|
|
4376
|
+
for (let i = 0; i < sections.length; i++) {
|
|
4377
|
+
for (let j = i + 1; j < sections.length; j++) {
|
|
4378
|
+
for (const pA of sections[i].paragraphs) {
|
|
4379
|
+
for (const pB of sections[j].paragraphs) {
|
|
4380
|
+
const sim = shingleJaccard(pA.shingles, pB.shingles);
|
|
4381
|
+
if (sim > 0.4) {
|
|
4382
|
+
dupParagraphCount++;
|
|
4383
|
+
duplicates.push({
|
|
4384
|
+
headingA: sections[i].heading,
|
|
4385
|
+
headingB: sections[j].heading,
|
|
4386
|
+
similarity: Math.round(sim * 100),
|
|
4387
|
+
sample: pA.text.slice(0, 80)
|
|
4388
|
+
});
|
|
4389
|
+
break;
|
|
4390
|
+
}
|
|
4391
|
+
}
|
|
4392
|
+
}
|
|
4393
|
+
}
|
|
4394
|
+
}
|
|
4395
|
+
if (dupParagraphCount === 0) return { score: 10, duplicates: [] };
|
|
4396
|
+
const dupRatio = totalParagraphs > 0 ? dupParagraphCount / totalParagraphs : 0;
|
|
4397
|
+
let score;
|
|
4398
|
+
if (dupParagraphCount === 1 && dupRatio <= 0.05) {
|
|
4399
|
+
score = 6;
|
|
4400
|
+
} else if (dupParagraphCount === 1) {
|
|
4401
|
+
score = 4;
|
|
4402
|
+
} else if (dupParagraphCount === 2) {
|
|
4403
|
+
score = 2;
|
|
4404
|
+
} else {
|
|
4405
|
+
score = 0;
|
|
4406
|
+
}
|
|
4407
|
+
return { score, duplicates };
|
|
4408
|
+
}
|
|
4409
|
+
function extractSectionsWithParagraphs(html) {
|
|
4410
|
+
const cleaned = html.replace(/<(script|style|nav|header|footer|noscript)\b[^>]*>[\s\S]*?<\/\1>/gi, "").replace(/<aside\b[^>]*>[\s\S]*?<\/aside>/gi, "");
|
|
4411
|
+
const parts = cleaned.split(/(?=<h[23]\b[^>]*>)/i);
|
|
4412
|
+
const sections = [];
|
|
4413
|
+
for (const part of parts) {
|
|
4414
|
+
const headingMatch = part.match(/<h[23]\b[^>]*>([\s\S]*?)<\/h[23]>/i);
|
|
4415
|
+
const heading = headingMatch ? headingMatch[1].replace(/<[^>]*>/g, "").trim() : "(intro)";
|
|
4416
|
+
const pMatches = part.match(/<p\b[^>]*>([\s\S]*?)<\/p>/gi) || [];
|
|
4417
|
+
const paragraphs = pMatches.map((p) => {
|
|
4418
|
+
const text = p.replace(/<[^>]*>/g, " ").replace(/&\w+;/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
|
|
4419
|
+
return { text, shingles: buildShingles(text, 4) };
|
|
4420
|
+
}).filter((p) => p.shingles.size >= 3 && !isBoilerplate(p.text));
|
|
4421
|
+
if (paragraphs.length > 0) {
|
|
4422
|
+
sections.push({ heading, paragraphs });
|
|
4423
|
+
}
|
|
4424
|
+
}
|
|
4425
|
+
return sections;
|
|
4426
|
+
}
|
|
4427
|
+
function buildShingles(text, n) {
|
|
4428
|
+
const words = text.split(/\s+/).filter((w) => w.length > 1);
|
|
4429
|
+
const shingles = /* @__PURE__ */ new Set();
|
|
4430
|
+
for (let i = 0; i <= words.length - n; i++) {
|
|
4431
|
+
shingles.add(words.slice(i, i + n).join(" "));
|
|
4432
|
+
}
|
|
4433
|
+
return shingles;
|
|
4434
|
+
}
|
|
4435
|
+
function shingleJaccard(a, b) {
|
|
4436
|
+
if (a.size === 0 && b.size === 0) return 0;
|
|
4437
|
+
let intersection = 0;
|
|
4438
|
+
for (const s of a) {
|
|
4439
|
+
if (b.has(s)) intersection++;
|
|
4440
|
+
}
|
|
4441
|
+
const union = a.size + b.size - intersection;
|
|
4442
|
+
return union === 0 ? 0 : intersection / union;
|
|
4443
|
+
}
|
|
4098
4444
|
var SCORING_FUNCTIONS = {
|
|
4099
4445
|
schema_markup: scoreSchemaMarkup,
|
|
4100
4446
|
qa_content_format: scoreQAFormat,
|
|
@@ -4115,7 +4461,8 @@ var SCORING_FUNCTIONS = {
|
|
|
4115
4461
|
evidence_packaging: scoreEvidencePackaging,
|
|
4116
4462
|
entity_disambiguation: scoreEntityDisambiguation,
|
|
4117
4463
|
extraction_friction: scoreExtractionFriction,
|
|
4118
|
-
image_context_ai: scoreImageContextAI
|
|
4464
|
+
image_context_ai: scoreImageContextAI,
|
|
4465
|
+
duplicate_content: scoreDuplicateContent
|
|
4119
4466
|
};
|
|
4120
4467
|
function scorePage(html, url) {
|
|
4121
4468
|
let totalWeight = 0;
|
|
@@ -4129,6 +4476,11 @@ function scorePage(html, url) {
|
|
|
4129
4476
|
totalWeight += weight;
|
|
4130
4477
|
}
|
|
4131
4478
|
let aeoScore = totalWeight === 0 ? 0 : Math.round(weightedSum / totalWeight);
|
|
4479
|
+
const dupScore = criterionScores.find((c) => c.criterion === "duplicate_content")?.score ?? 10;
|
|
4480
|
+
if (dupScore <= 6) {
|
|
4481
|
+
const dupCap = 35 + dupScore * 5;
|
|
4482
|
+
aeoScore = Math.min(aeoScore, dupCap);
|
|
4483
|
+
}
|
|
4132
4484
|
const scoreCapped = aeoScore > 75;
|
|
4133
4485
|
if (scoreCapped) aeoScore = 75;
|
|
4134
4486
|
return { aeoScore, criterionScores, scoreCapped };
|
|
@@ -4348,6 +4700,15 @@ function checkHasCitationReadyContent(html) {
|
|
|
4348
4700
|
}
|
|
4349
4701
|
return null;
|
|
4350
4702
|
}
|
|
4703
|
+
function checkDuplicateContentBlocks(html) {
|
|
4704
|
+
const { score, duplicates } = scoreDuplicateContentDetailed(html);
|
|
4705
|
+
if (score <= 6 && duplicates.length > 0) {
|
|
4706
|
+
const first = duplicates[0];
|
|
4707
|
+
const label = duplicates.length === 1 ? `Duplicate content: '${first.headingA}' and '${first.headingB}' share ${first.similarity}% similar text ("${first.sample}...")` : `${duplicates.length} duplicate blocks found (e.g. '${first.headingA}' and '${first.headingB}' \u2014 "${first.sample}...")`;
|
|
4708
|
+
return { check: "duplicate-content", label, severity: score <= 3 ? "error" : "warning" };
|
|
4709
|
+
}
|
|
4710
|
+
return null;
|
|
4711
|
+
}
|
|
4351
4712
|
function analyzePage(html, url, category) {
|
|
4352
4713
|
const title = extractTitle(html);
|
|
4353
4714
|
const textContent = getTextContent2(html);
|
|
@@ -4366,7 +4727,8 @@ function analyzePage(html, url, category) {
|
|
|
4366
4727
|
checkImagesMissingAlt(html),
|
|
4367
4728
|
checkNoInternalLinks(html, url),
|
|
4368
4729
|
checkNoAnswerBlock(html),
|
|
4369
|
-
checkNoEvidence(html, url)
|
|
4730
|
+
checkNoEvidence(html, url),
|
|
4731
|
+
checkDuplicateContentBlocks(html)
|
|
4370
4732
|
];
|
|
4371
4733
|
for (const result of issueChecks) {
|
|
4372
4734
|
if (result) issues.push(result);
|
|
@@ -4639,9 +5001,9 @@ var CRITERION_WEIGHTS2 = {
|
|
|
4639
5001
|
content_depth: 0.07,
|
|
4640
5002
|
fact_density: 0.06,
|
|
4641
5003
|
direct_answer_density: 0.05,
|
|
4642
|
-
qa_content_format: 0.
|
|
4643
|
-
query_answer_alignment: 0.
|
|
4644
|
-
faq_section: 0.
|
|
5004
|
+
qa_content_format: 0.04,
|
|
5005
|
+
query_answer_alignment: 0.04,
|
|
5006
|
+
faq_section: 0.03,
|
|
4645
5007
|
// Content Organization (~30%)
|
|
4646
5008
|
entity_consistency: 0.05,
|
|
4647
5009
|
internal_linking: 0.04,
|
|
@@ -4655,6 +5017,8 @@ var CRITERION_WEIGHTS2 = {
|
|
|
4655
5017
|
clean_html: 0.02,
|
|
4656
5018
|
// Technical Plumbing (~15%)
|
|
4657
5019
|
content_cannibalization: 0.02,
|
|
5020
|
+
duplicate_content: 0.05,
|
|
5021
|
+
cross_page_duplication: 0.03,
|
|
4658
5022
|
llms_txt: 0.02,
|
|
4659
5023
|
robots_txt: 0.02,
|
|
4660
5024
|
content_velocity: 0.02,
|
|
@@ -4699,7 +5063,9 @@ var PHASE_CONFIG = [
|
|
|
4699
5063
|
"citation_ready_writing",
|
|
4700
5064
|
"answer_first_placement",
|
|
4701
5065
|
"evidence_packaging",
|
|
4702
|
-
"entity_disambiguation"
|
|
5066
|
+
"entity_disambiguation",
|
|
5067
|
+
"duplicate_content",
|
|
5068
|
+
"cross_page_duplication"
|
|
4703
5069
|
]
|
|
4704
5070
|
},
|
|
4705
5071
|
{
|
|
@@ -5579,6 +5945,66 @@ Summarization: yes`,
|
|
|
5579
5945
|
}
|
|
5580
5946
|
return fixes;
|
|
5581
5947
|
},
|
|
5948
|
+
duplicate_content: (c, pages) => {
|
|
5949
|
+
if (c.score >= 10) return [];
|
|
5950
|
+
const impact = impactFromScore(c.score);
|
|
5951
|
+
const effort = effortForCriterion("duplicate_content", c.score);
|
|
5952
|
+
const affected = getAffectedPages("duplicate_content", pages);
|
|
5953
|
+
const sectionPairs = c.findings.filter((f) => f.detail.includes("' and '")).map((f) => {
|
|
5954
|
+
const match = f.detail.match(/'([^']+)' and '([^']+)'/);
|
|
5955
|
+
return match ? { a: match[1], b: match[2] } : null;
|
|
5956
|
+
}).filter(Boolean);
|
|
5957
|
+
const steps = [
|
|
5958
|
+
"Identify sections with duplicate or near-identical text",
|
|
5959
|
+
"Rewrite each section to provide a unique angle on the topic",
|
|
5960
|
+
"Ensure each heading section adds new information for the reader"
|
|
5961
|
+
];
|
|
5962
|
+
if (sectionPairs.length > 0) {
|
|
5963
|
+
const pair = sectionPairs[0];
|
|
5964
|
+
steps.unshift(`Start with '${pair.a}' and '${pair.b}' which share similar text`);
|
|
5965
|
+
}
|
|
5966
|
+
return [{
|
|
5967
|
+
id: "fix-duplicate-content",
|
|
5968
|
+
criterion: c.criterion_label,
|
|
5969
|
+
criterionId: c.criterion,
|
|
5970
|
+
title: "Fix duplicate content blocks",
|
|
5971
|
+
description: "Sections within pages contain identical or near-identical text. LLMs may flag this as low-quality content, reducing the authority of the page.",
|
|
5972
|
+
impact,
|
|
5973
|
+
effort,
|
|
5974
|
+
impactScore: 0,
|
|
5975
|
+
category: "content",
|
|
5976
|
+
steps,
|
|
5977
|
+
successCriteria: "Each section within a page provides unique content",
|
|
5978
|
+
affectedPages: affected,
|
|
5979
|
+
pageCount: affected?.length
|
|
5980
|
+
}];
|
|
5981
|
+
},
|
|
5982
|
+
cross_page_duplication: (c, pages) => {
|
|
5983
|
+
if (c.score >= 10) return [];
|
|
5984
|
+
const impact = impactFromScore(c.score);
|
|
5985
|
+
const effort = effortForCriterion("cross_page_duplication", c.score);
|
|
5986
|
+
const affected = getAffectedPages("cross_page_duplication", pages);
|
|
5987
|
+
return [{
|
|
5988
|
+
id: "fix-cross-page-duplication",
|
|
5989
|
+
criterion: c.criterion_label,
|
|
5990
|
+
criterionId: c.criterion,
|
|
5991
|
+
title: "Eliminate cross-page duplicate content",
|
|
5992
|
+
description: "The same paragraphs appear on multiple pages. AI engines may only index one version, wasting the others.",
|
|
5993
|
+
impact,
|
|
5994
|
+
effort,
|
|
5995
|
+
impactScore: 0,
|
|
5996
|
+
category: "content",
|
|
5997
|
+
steps: [
|
|
5998
|
+
"Identify paragraphs that are copy-pasted across multiple pages",
|
|
5999
|
+
"Rewrite each instance to provide a unique angle relevant to that page",
|
|
6000
|
+
"Move truly shared content to a single resource page and link to it",
|
|
6001
|
+
"Use canonical tags if pages must share content"
|
|
6002
|
+
],
|
|
6003
|
+
successCriteria: "Each page has unique body content with no copy-pasted paragraphs",
|
|
6004
|
+
affectedPages: affected,
|
|
6005
|
+
pageCount: affected?.length
|
|
6006
|
+
}];
|
|
6007
|
+
},
|
|
5582
6008
|
visible_date_signal: (c, pages) => {
|
|
5583
6009
|
if (c.score >= 10) return [];
|
|
5584
6010
|
const impact = impactFromScore(c.score);
|