aeorank 3.0.2 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -23
- package/dist/browser.d.ts +3 -3
- package/dist/browser.js +452 -26
- package/dist/browser.js.map +1 -1
- package/dist/{chunk-OCLAIHX6.js → chunk-RYV25AUV.js} +4 -2
- package/dist/chunk-RYV25AUV.js.map +1 -0
- package/dist/cli.js +387 -21
- package/dist/cli.js.map +1 -1
- package/dist/{full-site-crawler-5AYKCZQY.js → full-site-crawler-OBECS7AT.js} +4 -2
- package/dist/full-site-crawler-OBECS7AT.js.map +1 -0
- package/dist/{full-site-crawler-BCJS67WQ.js → full-site-crawler-TQ35TB2X.js} +2 -2
- package/dist/index.cjs +454 -26
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +3 -3
- package/dist/index.d.ts +3 -3
- package/dist/index.js +452 -26
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
- package/dist/chunk-OCLAIHX6.js.map +0 -1
- package/dist/full-site-crawler-5AYKCZQY.js.map +0 -1
- /package/dist/{full-site-crawler-BCJS67WQ.js.map → full-site-crawler-TQ35TB2X.js.map} +0 -0
package/dist/index.js
CHANGED
|
@@ -3,7 +3,7 @@ import {
|
|
|
3
3
|
extractAllUrlsFromSitemap,
|
|
4
4
|
extractInternalLinks,
|
|
5
5
|
inferCategory
|
|
6
|
-
} from "./chunk-
|
|
6
|
+
} from "./chunk-RYV25AUV.js";
|
|
7
7
|
|
|
8
8
|
// src/parked-domain.ts
|
|
9
9
|
var PARKING_PATHS = ["/lander", "/parking", "/park", "/sedoparking"];
|
|
@@ -2558,6 +2558,234 @@ function checkImageContextAI(data) {
|
|
|
2558
2558
|
}
|
|
2559
2559
|
return { criterion: "image_context_ai", criterion_label: "Image Context for AI", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P2" };
|
|
2560
2560
|
}
|
|
2561
|
+
var BOILERPLATE_RE = /\b(sign up|subscribe|get started|contact us|request a demo|free trial|book a call|schedule a|learn more|click here|follow us|share this|copyright|all rights reserved|privacy policy|terms of service)\b/i;
|
|
2562
|
+
function isBoilerplateParagraph(text) {
|
|
2563
|
+
const words = text.split(/\s+/).length;
|
|
2564
|
+
if (words < 20 && BOILERPLATE_RE.test(text)) return true;
|
|
2565
|
+
if (/\b(cookie|gdpr|consent|opt.out)\b/i.test(text) && words < 30) return true;
|
|
2566
|
+
return false;
|
|
2567
|
+
}
|
|
2568
|
+
function toShingles(text, n = 4) {
|
|
2569
|
+
const words = text.split(/\s+/).filter((w) => w.length > 1);
|
|
2570
|
+
const shingles = /* @__PURE__ */ new Set();
|
|
2571
|
+
for (let i = 0; i <= words.length - n; i++) {
|
|
2572
|
+
shingles.add(words.slice(i, i + n).join(" "));
|
|
2573
|
+
}
|
|
2574
|
+
return shingles;
|
|
2575
|
+
}
|
|
2576
|
+
function shingleSimilarity(a, b) {
|
|
2577
|
+
if (a.size === 0 && b.size === 0) return 0;
|
|
2578
|
+
let intersection = 0;
|
|
2579
|
+
for (const s of a) {
|
|
2580
|
+
if (b.has(s)) intersection++;
|
|
2581
|
+
}
|
|
2582
|
+
const union = a.size + b.size - intersection;
|
|
2583
|
+
return union === 0 ? 0 : intersection / union;
|
|
2584
|
+
}
|
|
2585
|
+
function extractPageParagraphs(html) {
|
|
2586
|
+
const cleaned = html.replace(/<(script|style|nav|header|footer|noscript)\b[^>]*>[\s\S]*?<\/\1>/gi, "").replace(/<aside\b[^>]*>[\s\S]*?<\/aside>/gi, "");
|
|
2587
|
+
const pMatches = cleaned.match(/<p\b[^>]*>([\s\S]*?)<\/p>/gi) || [];
|
|
2588
|
+
return pMatches.map((p) => {
|
|
2589
|
+
const text = p.replace(/<[^>]*>/g, " ").replace(/&\w+;/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
|
|
2590
|
+
return { text, shingles: toShingles(text) };
|
|
2591
|
+
}).filter((p) => p.shingles.size >= 3 && !isBoilerplateParagraph(p.text));
|
|
2592
|
+
}
|
|
2593
|
+
function splitIntoSectionsWithParagraphs(html) {
|
|
2594
|
+
const cleaned = html.replace(/<(script|style|nav|header|footer|noscript)\b[^>]*>[\s\S]*?<\/\1>/gi, "").replace(/<aside\b[^>]*>[\s\S]*?<\/aside>/gi, "");
|
|
2595
|
+
const parts = cleaned.split(/(?=<h[23]\b[^>]*>)/i);
|
|
2596
|
+
const sections = [];
|
|
2597
|
+
for (const part of parts) {
|
|
2598
|
+
const hMatch = part.match(/<h[23]\b[^>]*>([\s\S]*?)<\/h[23]>/i);
|
|
2599
|
+
const heading = hMatch ? hMatch[1].replace(/<[^>]*>/g, "").trim() : "(intro)";
|
|
2600
|
+
const pMatches = part.match(/<p\b[^>]*>([\s\S]*?)<\/p>/gi) || [];
|
|
2601
|
+
const paragraphs = pMatches.map((p) => {
|
|
2602
|
+
const text = p.replace(/<[^>]*>/g, " ").replace(/&\w+;/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
|
|
2603
|
+
return { text, shingles: toShingles(text) };
|
|
2604
|
+
}).filter((p) => p.shingles.size >= 3 && !isBoilerplateParagraph(p.text));
|
|
2605
|
+
if (paragraphs.length > 0) sections.push({ heading, paragraphs });
|
|
2606
|
+
}
|
|
2607
|
+
return sections;
|
|
2608
|
+
}
|
|
2609
|
+
function findIntraPageDuplicates(html) {
|
|
2610
|
+
const sections = splitIntoSectionsWithParagraphs(html);
|
|
2611
|
+
if (sections.length < 2) return [];
|
|
2612
|
+
const pairs = [];
|
|
2613
|
+
for (let i = 0; i < sections.length; i++) {
|
|
2614
|
+
for (let j = i + 1; j < sections.length; j++) {
|
|
2615
|
+
let found = false;
|
|
2616
|
+
for (const pA of sections[i].paragraphs) {
|
|
2617
|
+
if (found) break;
|
|
2618
|
+
for (const pB of sections[j].paragraphs) {
|
|
2619
|
+
const sim = shingleSimilarity(pA.shingles, pB.shingles);
|
|
2620
|
+
if (sim > 0.4) {
|
|
2621
|
+
pairs.push({
|
|
2622
|
+
headingA: sections[i].heading,
|
|
2623
|
+
headingB: sections[j].heading,
|
|
2624
|
+
similarity: Math.round(sim * 100),
|
|
2625
|
+
sample: pA.text.slice(0, 80)
|
|
2626
|
+
});
|
|
2627
|
+
found = true;
|
|
2628
|
+
break;
|
|
2629
|
+
}
|
|
2630
|
+
}
|
|
2631
|
+
}
|
|
2632
|
+
}
|
|
2633
|
+
}
|
|
2634
|
+
return pairs;
|
|
2635
|
+
}
|
|
2636
|
+
function checkDuplicateContent(data) {
|
|
2637
|
+
const findings = [];
|
|
2638
|
+
const pages = [];
|
|
2639
|
+
if (data.homepage) {
|
|
2640
|
+
pages.push({ html: data.homepage.text, url: data.homepage.finalUrl || `https://${data.domain}/` });
|
|
2641
|
+
}
|
|
2642
|
+
if (data.blogSample) {
|
|
2643
|
+
for (const page of data.blogSample) {
|
|
2644
|
+
pages.push({ html: page.text, url: page.finalUrl || "" });
|
|
2645
|
+
}
|
|
2646
|
+
}
|
|
2647
|
+
if (pages.length === 0) {
|
|
2648
|
+
findings.push({ severity: "critical", detail: "No pages available for duplicate content analysis" });
|
|
2649
|
+
return { criterion: "duplicate_content", criterion_label: "Duplicate Content Blocks", score: 0, status: "fail", findings, fix_priority: "P1" };
|
|
2650
|
+
}
|
|
2651
|
+
let totalDupPages = 0;
|
|
2652
|
+
let totalDupPairs = 0;
|
|
2653
|
+
const dupDetails = [];
|
|
2654
|
+
for (const page of pages) {
|
|
2655
|
+
const pairs = findIntraPageDuplicates(page.html);
|
|
2656
|
+
if (pairs.length > 0) {
|
|
2657
|
+
totalDupPages++;
|
|
2658
|
+
totalDupPairs += pairs.length;
|
|
2659
|
+
dupDetails.push({ url: page.url, pairs });
|
|
2660
|
+
}
|
|
2661
|
+
}
|
|
2662
|
+
const dupRatio = totalDupPages / pages.length;
|
|
2663
|
+
let score;
|
|
2664
|
+
if (totalDupPairs === 0) {
|
|
2665
|
+
score = 10;
|
|
2666
|
+
findings.push({ severity: "info", detail: `${pages.length} pages analyzed - no duplicate content blocks detected` });
|
|
2667
|
+
} else if (dupRatio <= 0.05 && totalDupPairs <= 2) {
|
|
2668
|
+
score = 9;
|
|
2669
|
+
findings.push({ severity: "info", detail: `${totalDupPairs} duplicate block pair(s) on ${totalDupPages} page(s) - minor` });
|
|
2670
|
+
} else if (dupRatio <= 0.1) {
|
|
2671
|
+
score = 7;
|
|
2672
|
+
findings.push({ severity: "low", detail: `${totalDupPairs} duplicate block pair(s) across ${totalDupPages} page(s)`, fix: "Rewrite duplicate sections to provide unique content in each" });
|
|
2673
|
+
} else if (dupRatio <= 0.2) {
|
|
2674
|
+
score = 5;
|
|
2675
|
+
findings.push({ severity: "medium", detail: `${totalDupPages} pages (${Math.round(dupRatio * 100)}%) contain duplicate content blocks`, fix: "Rewrite or remove repeated text blocks - LLMs may flag this as low-quality content" });
|
|
2676
|
+
} else if (dupRatio <= 0.4) {
|
|
2677
|
+
score = 3;
|
|
2678
|
+
findings.push({ severity: "medium", detail: `${totalDupPages} pages (${Math.round(dupRatio * 100)}%) have significant duplicate content`, fix: "Widespread duplicate blocks reduce content authority - rewrite each section with unique angles" });
|
|
2679
|
+
} else {
|
|
2680
|
+
score = 0;
|
|
2681
|
+
findings.push({ severity: "high", detail: `${totalDupPages} pages (${Math.round(dupRatio * 100)}%) contain duplicate content blocks`, fix: "Severe content duplication across the site - LLMs will likely reduce citation authority" });
|
|
2682
|
+
}
|
|
2683
|
+
for (const dup of dupDetails.slice(0, 3)) {
|
|
2684
|
+
const shortUrl = dup.url.slice(0, 60);
|
|
2685
|
+
for (const pair of dup.pairs.slice(0, 2)) {
|
|
2686
|
+
findings.push({
|
|
2687
|
+
severity: "low",
|
|
2688
|
+
detail: `${shortUrl}: '${pair.headingA}' and '${pair.headingB}' share ${pair.similarity}% similar text ("${pair.sample}...")`,
|
|
2689
|
+
fix: `Rewrite one of these sections to eliminate duplicate content`
|
|
2690
|
+
});
|
|
2691
|
+
}
|
|
2692
|
+
}
|
|
2693
|
+
return { criterion: "duplicate_content", criterion_label: "Duplicate Content Blocks", score, status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P1" };
|
|
2694
|
+
}
|
|
2695
|
+
function checkCrossPageDuplication(data) {
|
|
2696
|
+
const findings = [];
|
|
2697
|
+
const pages = [];
|
|
2698
|
+
if (data.homepage) {
|
|
2699
|
+
pages.push({ url: data.homepage.finalUrl || `https://${data.domain}/`, paragraphs: extractPageParagraphs(data.homepage.text) });
|
|
2700
|
+
}
|
|
2701
|
+
if (data.blogSample) {
|
|
2702
|
+
for (const page of data.blogSample) {
|
|
2703
|
+
pages.push({ url: page.finalUrl || "", paragraphs: extractPageParagraphs(page.text) });
|
|
2704
|
+
}
|
|
2705
|
+
}
|
|
2706
|
+
if (pages.length <= 1) {
|
|
2707
|
+
findings.push({ severity: "info", detail: "Not enough pages to assess cross-page duplication" });
|
|
2708
|
+
return { criterion: "cross_page_duplication", criterion_label: "Cross-Page Duplicate Content", score: 5, status: "partial", findings, fix_priority: "P3" };
|
|
2709
|
+
}
|
|
2710
|
+
const paragraphPageCount = /* @__PURE__ */ new Map();
|
|
2711
|
+
for (const page of pages) {
|
|
2712
|
+
const seen = /* @__PURE__ */ new Set();
|
|
2713
|
+
for (const p of page.paragraphs) {
|
|
2714
|
+
const fp = [...p.shingles].slice(0, 5).join("|");
|
|
2715
|
+
if (!seen.has(fp)) {
|
|
2716
|
+
seen.add(fp);
|
|
2717
|
+
paragraphPageCount.set(fp, (paragraphPageCount.get(fp) || 0) + 1);
|
|
2718
|
+
}
|
|
2719
|
+
}
|
|
2720
|
+
}
|
|
2721
|
+
const boilerplateThreshold = Math.max(3, pages.length * 0.4);
|
|
2722
|
+
const siteBoilerprints = /* @__PURE__ */ new Set();
|
|
2723
|
+
for (const [fp, count] of paragraphPageCount) {
|
|
2724
|
+
if (count >= boilerplateThreshold) siteBoilerprints.add(fp);
|
|
2725
|
+
}
|
|
2726
|
+
const crossDupPairs = [];
|
|
2727
|
+
for (let i = 0; i < pages.length; i++) {
|
|
2728
|
+
for (let j = i + 1; j < pages.length; j++) {
|
|
2729
|
+
let dupCount = 0;
|
|
2730
|
+
let sample = "";
|
|
2731
|
+
for (const pA of pages[i].paragraphs) {
|
|
2732
|
+
const fpA = [...pA.shingles].slice(0, 5).join("|");
|
|
2733
|
+
if (siteBoilerprints.has(fpA)) continue;
|
|
2734
|
+
for (const pB of pages[j].paragraphs) {
|
|
2735
|
+
const sim = shingleSimilarity(pA.shingles, pB.shingles);
|
|
2736
|
+
if (sim > 0.4) {
|
|
2737
|
+
dupCount++;
|
|
2738
|
+
if (!sample) sample = pA.text.slice(0, 80);
|
|
2739
|
+
break;
|
|
2740
|
+
}
|
|
2741
|
+
}
|
|
2742
|
+
}
|
|
2743
|
+
if (dupCount >= 2) {
|
|
2744
|
+
crossDupPairs.push({
|
|
2745
|
+
urlA: pages[i].url.slice(0, 60),
|
|
2746
|
+
urlB: pages[j].url.slice(0, 60),
|
|
2747
|
+
dupCount,
|
|
2748
|
+
sample
|
|
2749
|
+
});
|
|
2750
|
+
}
|
|
2751
|
+
}
|
|
2752
|
+
}
|
|
2753
|
+
const affectedUrls = /* @__PURE__ */ new Set();
|
|
2754
|
+
for (const pair of crossDupPairs) {
|
|
2755
|
+
affectedUrls.add(pair.urlA);
|
|
2756
|
+
affectedUrls.add(pair.urlB);
|
|
2757
|
+
}
|
|
2758
|
+
const affectedRatio = pages.length > 0 ? affectedUrls.size / pages.length : 0;
|
|
2759
|
+
const totalDupParagraphs = crossDupPairs.reduce((s, p) => s + p.dupCount, 0);
|
|
2760
|
+
let score;
|
|
2761
|
+
if (crossDupPairs.length === 0) {
|
|
2762
|
+
score = 10;
|
|
2763
|
+
findings.push({ severity: "info", detail: `${pages.length} pages analyzed - no cross-page content duplication detected` });
|
|
2764
|
+
} else if (affectedRatio <= 0.05 && totalDupParagraphs <= 4) {
|
|
2765
|
+
score = 9;
|
|
2766
|
+
findings.push({ severity: "info", detail: `${totalDupParagraphs} shared paragraph(s) across ${affectedUrls.size} page(s) - minor` });
|
|
2767
|
+
} else if (affectedRatio <= 0.1) {
|
|
2768
|
+
score = 7;
|
|
2769
|
+
findings.push({ severity: "low", detail: `${totalDupParagraphs} shared paragraphs across ${affectedUrls.size} pages`, fix: "Rewrite shared content so each page provides a unique perspective" });
|
|
2770
|
+
} else if (affectedRatio <= 0.2) {
|
|
2771
|
+
score = 5;
|
|
2772
|
+
findings.push({ severity: "medium", detail: `${affectedUrls.size} pages (${Math.round(affectedRatio * 100)}%) share duplicate paragraphs`, fix: "Significant cross-page duplication - AI engines may only index one version" });
|
|
2773
|
+
} else if (affectedRatio <= 0.4) {
|
|
2774
|
+
score = 3;
|
|
2775
|
+
findings.push({ severity: "medium", detail: `${affectedUrls.size} pages (${Math.round(affectedRatio * 100)}%) contain shared content blocks`, fix: "Widespread copy-paste content across pages reduces overall site authority" });
|
|
2776
|
+
} else {
|
|
2777
|
+
score = 0;
|
|
2778
|
+
findings.push({ severity: "high", detail: `${affectedUrls.size} pages (${Math.round(affectedRatio * 100)}%) share duplicate content`, fix: "Severe cross-page duplication - AI engines will likely ignore redundant pages entirely" });
|
|
2779
|
+
}
|
|
2780
|
+
for (const pair of crossDupPairs.slice(0, 3)) {
|
|
2781
|
+
findings.push({
|
|
2782
|
+
severity: "low",
|
|
2783
|
+
detail: `${pair.dupCount} shared paragraph(s): ${pair.urlA} \u2194 ${pair.urlB} ("${pair.sample}...")`,
|
|
2784
|
+
fix: "Rewrite shared paragraphs so each page has unique content"
|
|
2785
|
+
});
|
|
2786
|
+
}
|
|
2787
|
+
return { criterion: "cross_page_duplication", criterion_label: "Cross-Page Duplicate Content", score, status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P1" };
|
|
2788
|
+
}
|
|
2561
2789
|
function auditSiteFromData(data) {
|
|
2562
2790
|
const topicCoherence = checkTopicCoherence(data);
|
|
2563
2791
|
const cannibalization = checkContentCannibalization(data, topicCoherence.score);
|
|
@@ -2596,7 +2824,10 @@ function auditSiteFromData(data) {
|
|
|
2596
2824
|
checkEvidencePackaging(data),
|
|
2597
2825
|
checkEntityDisambiguation(data),
|
|
2598
2826
|
checkExtractionFriction(data),
|
|
2599
|
-
checkImageContextAI(data)
|
|
2827
|
+
checkImageContextAI(data),
|
|
2828
|
+
// V3 criteria (#35-#36)
|
|
2829
|
+
checkDuplicateContent(data),
|
|
2830
|
+
checkCrossPageDuplication(data)
|
|
2600
2831
|
];
|
|
2601
2832
|
}
|
|
2602
2833
|
async function auditSite(targetUrl) {
|
|
@@ -2620,11 +2851,11 @@ var WEIGHTS = {
|
|
|
2620
2851
|
// Information density per page
|
|
2621
2852
|
direct_answer_density: 0.05,
|
|
2622
2853
|
// Direct answers to queries
|
|
2623
|
-
qa_content_format: 0.
|
|
2854
|
+
qa_content_format: 0.04,
|
|
2624
2855
|
// Answer-shaped content structure
|
|
2625
|
-
query_answer_alignment: 0.
|
|
2856
|
+
query_answer_alignment: 0.04,
|
|
2626
2857
|
// Relevance to actual AI queries
|
|
2627
|
-
faq_section: 0.
|
|
2858
|
+
faq_section: 0.03,
|
|
2628
2859
|
// Structured Q&A pairs
|
|
2629
2860
|
// ─── Content Organization (~30%) ──────────────────────────────────────────
|
|
2630
2861
|
// HOW easily AI engines can extract and trust your content.
|
|
@@ -2672,8 +2903,13 @@ var WEIGHTS = {
|
|
|
2672
2903
|
// Clear entity boundaries
|
|
2673
2904
|
extraction_friction: 0.02,
|
|
2674
2905
|
// Sentence length, voice, jargon
|
|
2675
|
-
image_context_ai: 0.01
|
|
2906
|
+
image_context_ai: 0.01,
|
|
2676
2907
|
// Figure/figcaption, alt text quality
|
|
2908
|
+
// ─── V3 Criteria ────────────────────────────────────────────────────────
|
|
2909
|
+
duplicate_content: 0.05,
|
|
2910
|
+
// Duplicate text blocks within pages
|
|
2911
|
+
cross_page_duplication: 0.03
|
|
2912
|
+
// Same paragraphs copied across pages
|
|
2677
2913
|
};
|
|
2678
2914
|
function calculateOverallScore(criteria) {
|
|
2679
2915
|
let totalWeight = 0;
|
|
@@ -2803,7 +3039,9 @@ var PILLARS = {
|
|
|
2803
3039
|
"fact_density",
|
|
2804
3040
|
"citation_ready_writing",
|
|
2805
3041
|
"answer_first_placement",
|
|
2806
|
-
"evidence_packaging"
|
|
3042
|
+
"evidence_packaging",
|
|
3043
|
+
"duplicate_content",
|
|
3044
|
+
"cross_page_duplication"
|
|
2807
3045
|
],
|
|
2808
3046
|
"Content Structure": [
|
|
2809
3047
|
"direct_answer_density",
|
|
@@ -2868,6 +3106,8 @@ var CLIENT_NAMES = {
|
|
|
2868
3106
|
image_context_ai: "Image Context for AI",
|
|
2869
3107
|
schema_coverage: "Schema Coverage",
|
|
2870
3108
|
speakable_schema: "Speakable Schema",
|
|
3109
|
+
duplicate_content: "Duplicate Content Blocks",
|
|
3110
|
+
cross_page_duplication: "Cross-Page Duplicate Content",
|
|
2871
3111
|
content_cannibalization: "Content Cannibalization",
|
|
2872
3112
|
llms_txt: "llms.txt File",
|
|
2873
3113
|
robots_txt: "robots.txt for AI",
|
|
@@ -2885,10 +3125,12 @@ var PILLAR_WEIGHTS = {
|
|
|
2885
3125
|
citation_ready_writing: 0.04,
|
|
2886
3126
|
answer_first_placement: 0.03,
|
|
2887
3127
|
evidence_packaging: 0.03,
|
|
3128
|
+
duplicate_content: 0.05,
|
|
3129
|
+
cross_page_duplication: 0.03,
|
|
2888
3130
|
direct_answer_density: 0.05,
|
|
2889
|
-
qa_content_format: 0.
|
|
2890
|
-
query_answer_alignment: 0.
|
|
2891
|
-
faq_section: 0.
|
|
3131
|
+
qa_content_format: 0.04,
|
|
3132
|
+
query_answer_alignment: 0.04,
|
|
3133
|
+
faq_section: 0.03,
|
|
2892
3134
|
table_list_extractability: 0.03,
|
|
2893
3135
|
definition_patterns: 0.02,
|
|
2894
3136
|
entity_disambiguation: 0.02,
|
|
@@ -2921,6 +3163,8 @@ var CRITERION_EFFORT = {
|
|
|
2921
3163
|
citation_ready_writing: "Medium",
|
|
2922
3164
|
answer_first_placement: "Medium",
|
|
2923
3165
|
evidence_packaging: "Medium",
|
|
3166
|
+
duplicate_content: "Medium",
|
|
3167
|
+
cross_page_duplication: "Medium",
|
|
2924
3168
|
direct_answer_density: "Medium",
|
|
2925
3169
|
qa_content_format: "Medium",
|
|
2926
3170
|
query_answer_alignment: "Medium",
|
|
@@ -2976,6 +3220,8 @@ var FIX_DESCRIPTIONS = {
|
|
|
2976
3220
|
image_context_ai: "Wrap images in <figure>/<figcaption> with descriptive alt text.",
|
|
2977
3221
|
schema_coverage: "Extend structured data to inner pages (articles, services, products).",
|
|
2978
3222
|
speakable_schema: "Add SpeakableSpecification schema for voice assistant compatibility.",
|
|
3223
|
+
duplicate_content: "Rewrite duplicate text blocks so each section provides unique value.",
|
|
3224
|
+
cross_page_duplication: "Rewrite shared paragraphs across pages so each page has unique content.",
|
|
2979
3225
|
content_cannibalization: "Consolidate overlapping pages or differentiate titles and H1 headings.",
|
|
2980
3226
|
llms_txt: "Create a /llms.txt file describing your site for AI engines.",
|
|
2981
3227
|
robots_txt: "Update robots.txt to explicitly allow AI crawlers.",
|
|
@@ -3071,7 +3317,9 @@ var CRITERION_LABELS = {
|
|
|
3071
3317
|
"Evidence Packaging": "Evidence Packaging",
|
|
3072
3318
|
"Entity Disambiguation": "Entity Disambiguation",
|
|
3073
3319
|
"Extraction Friction Score": "Extraction Friction Score",
|
|
3074
|
-
"Image Context for AI": "Image Context for AI"
|
|
3320
|
+
"Image Context for AI": "Image Context for AI",
|
|
3321
|
+
"Duplicate Content Blocks": "Duplicate Content Blocks",
|
|
3322
|
+
"Cross-Page Duplicate Content": "Cross-Page Duplicate Content"
|
|
3075
3323
|
};
|
|
3076
3324
|
function scoreToStatus(score) {
|
|
3077
3325
|
if (score === 0) return "MISSING";
|
|
@@ -3166,9 +3414,9 @@ var CRITERION_WEIGHTS = {
|
|
|
3166
3414
|
content_depth: 0.07,
|
|
3167
3415
|
fact_density: 0.06,
|
|
3168
3416
|
direct_answer_density: 0.05,
|
|
3169
|
-
qa_content_format: 0.
|
|
3170
|
-
query_answer_alignment: 0.
|
|
3171
|
-
faq_section: 0.
|
|
3417
|
+
qa_content_format: 0.04,
|
|
3418
|
+
query_answer_alignment: 0.04,
|
|
3419
|
+
faq_section: 0.03,
|
|
3172
3420
|
// Content Organization (~30%)
|
|
3173
3421
|
entity_consistency: 0.05,
|
|
3174
3422
|
internal_linking: 0.04,
|
|
@@ -3197,7 +3445,10 @@ var CRITERION_WEIGHTS = {
|
|
|
3197
3445
|
evidence_packaging: 0.03,
|
|
3198
3446
|
entity_disambiguation: 0.02,
|
|
3199
3447
|
extraction_friction: 0.02,
|
|
3200
|
-
image_context_ai: 0.01
|
|
3448
|
+
image_context_ai: 0.01,
|
|
3449
|
+
// V3 Criteria
|
|
3450
|
+
duplicate_content: 0.05,
|
|
3451
|
+
cross_page_duplication: 0.03
|
|
3201
3452
|
};
|
|
3202
3453
|
var OPPORTUNITY_TEMPLATES = {
|
|
3203
3454
|
llms_txt: {
|
|
@@ -3320,6 +3571,16 @@ var OPPORTUNITY_TEMPLATES = {
|
|
|
3320
3571
|
effort: "Medium",
|
|
3321
3572
|
description: "Ensure every question-format heading (H2/H3) is followed by a direct answer paragraph. This pattern is ideal for AI engine snippet extraction."
|
|
3322
3573
|
},
|
|
3574
|
+
duplicate_content: {
|
|
3575
|
+
name: "Fix Duplicate Content Blocks",
|
|
3576
|
+
effort: "Medium",
|
|
3577
|
+
description: "Sections within pages contain identical or near-identical text. LLMs may flag this as low-quality or thin content, reducing citation authority. Rewrite duplicate blocks with unique angles."
|
|
3578
|
+
},
|
|
3579
|
+
cross_page_duplication: {
|
|
3580
|
+
name: "Eliminate Cross-Page Duplicate Content",
|
|
3581
|
+
effort: "Medium",
|
|
3582
|
+
description: "The same paragraphs appear on multiple pages. AI engines may only index one version and ignore the rest. Rewrite shared content so each page offers a unique perspective."
|
|
3583
|
+
},
|
|
3323
3584
|
content_cannibalization: {
|
|
3324
3585
|
name: "Resolve Content Cannibalization",
|
|
3325
3586
|
effort: "Medium",
|
|
@@ -3728,9 +3989,9 @@ var PAGE_CRITERIA = {
|
|
|
3728
3989
|
original_data: { weight: 0.1, label: "Original Data & Expert Content" },
|
|
3729
3990
|
fact_density: { weight: 0.06, label: "Fact & Data Density" },
|
|
3730
3991
|
direct_answer_density: { weight: 0.05, label: "Direct Answer Paragraphs" },
|
|
3731
|
-
qa_content_format: { weight: 0.
|
|
3732
|
-
query_answer_alignment: { weight: 0.
|
|
3733
|
-
faq_section: { weight: 0.
|
|
3992
|
+
qa_content_format: { weight: 0.04, label: "Q&A Content Format" },
|
|
3993
|
+
query_answer_alignment: { weight: 0.04, label: "Query-Answer Alignment" },
|
|
3994
|
+
faq_section: { weight: 0.03, label: "FAQ Section Content" },
|
|
3734
3995
|
// Content Organization
|
|
3735
3996
|
content_freshness: { weight: 0.04, label: "Content Freshness Signals" },
|
|
3736
3997
|
schema_markup: { weight: 0.03, label: "Schema.org Structured Data" },
|
|
@@ -3747,7 +4008,8 @@ var PAGE_CRITERIA = {
|
|
|
3747
4008
|
evidence_packaging: { weight: 0.03, label: "Evidence Packaging" },
|
|
3748
4009
|
entity_disambiguation: { weight: 0.02, label: "Entity Disambiguation" },
|
|
3749
4010
|
extraction_friction: { weight: 0.02, label: "Extraction Friction Score" },
|
|
3750
|
-
image_context_ai: { weight: 0.01, label: "Image Context for AI" }
|
|
4011
|
+
image_context_ai: { weight: 0.01, label: "Image Context for AI" },
|
|
4012
|
+
duplicate_content: { weight: 0.05, label: "Duplicate Content Blocks" }
|
|
3751
4013
|
};
|
|
3752
4014
|
function extractJsonLdBlocks(html) {
|
|
3753
4015
|
const blocks = [];
|
|
@@ -4196,6 +4458,90 @@ function scoreImageContextAI(html) {
|
|
|
4196
4458
|
if (contextualImages.length > 0) score += 3;
|
|
4197
4459
|
return cap(score, 10);
|
|
4198
4460
|
}
|
|
4461
|
+
var BOILERPLATE_PATTERNS = /\b(sign up|subscribe|get started|contact us|request a demo|free trial|book a call|schedule a|learn more|click here|follow us|share this|copyright|all rights reserved|privacy policy|terms of service)\b/i;
|
|
4462
|
+
function isBoilerplate(text) {
|
|
4463
|
+
const words = text.split(/\s+/).length;
|
|
4464
|
+
if (words < 20 && BOILERPLATE_PATTERNS.test(text)) return true;
|
|
4465
|
+
if (/\b(cookie|gdpr|consent|opt.out)\b/i.test(text) && words < 30) return true;
|
|
4466
|
+
return false;
|
|
4467
|
+
}
|
|
4468
|
+
function scoreDuplicateContent(html) {
|
|
4469
|
+
return scoreDuplicateContentDetailed(html).score;
|
|
4470
|
+
}
|
|
4471
|
+
function scoreDuplicateContentDetailed(html) {
|
|
4472
|
+
const sections = extractSectionsWithParagraphs(html);
|
|
4473
|
+
if (sections.length < 2) return { score: 10, duplicates: [] };
|
|
4474
|
+
const totalParagraphs = sections.reduce((sum, s) => sum + s.paragraphs.length, 0);
|
|
4475
|
+
const duplicates = [];
|
|
4476
|
+
let dupParagraphCount = 0;
|
|
4477
|
+
for (let i = 0; i < sections.length; i++) {
|
|
4478
|
+
for (let j = i + 1; j < sections.length; j++) {
|
|
4479
|
+
for (const pA of sections[i].paragraphs) {
|
|
4480
|
+
for (const pB of sections[j].paragraphs) {
|
|
4481
|
+
const sim = shingleJaccard(pA.shingles, pB.shingles);
|
|
4482
|
+
if (sim > 0.4) {
|
|
4483
|
+
dupParagraphCount++;
|
|
4484
|
+
duplicates.push({
|
|
4485
|
+
headingA: sections[i].heading,
|
|
4486
|
+
headingB: sections[j].heading,
|
|
4487
|
+
similarity: Math.round(sim * 100),
|
|
4488
|
+
sample: pA.text.slice(0, 80)
|
|
4489
|
+
});
|
|
4490
|
+
break;
|
|
4491
|
+
}
|
|
4492
|
+
}
|
|
4493
|
+
}
|
|
4494
|
+
}
|
|
4495
|
+
}
|
|
4496
|
+
if (dupParagraphCount === 0) return { score: 10, duplicates: [] };
|
|
4497
|
+
const dupRatio = totalParagraphs > 0 ? dupParagraphCount / totalParagraphs : 0;
|
|
4498
|
+
let score;
|
|
4499
|
+
if (dupParagraphCount === 1 && dupRatio <= 0.05) {
|
|
4500
|
+
score = 6;
|
|
4501
|
+
} else if (dupParagraphCount === 1) {
|
|
4502
|
+
score = 4;
|
|
4503
|
+
} else if (dupParagraphCount === 2) {
|
|
4504
|
+
score = 2;
|
|
4505
|
+
} else {
|
|
4506
|
+
score = 0;
|
|
4507
|
+
}
|
|
4508
|
+
return { score, duplicates };
|
|
4509
|
+
}
|
|
4510
|
+
function extractSectionsWithParagraphs(html) {
|
|
4511
|
+
const cleaned = html.replace(/<(script|style|nav|header|footer|noscript)\b[^>]*>[\s\S]*?<\/\1>/gi, "").replace(/<aside\b[^>]*>[\s\S]*?<\/aside>/gi, "");
|
|
4512
|
+
const parts = cleaned.split(/(?=<h[23]\b[^>]*>)/i);
|
|
4513
|
+
const sections = [];
|
|
4514
|
+
for (const part of parts) {
|
|
4515
|
+
const headingMatch = part.match(/<h[23]\b[^>]*>([\s\S]*?)<\/h[23]>/i);
|
|
4516
|
+
const heading = headingMatch ? headingMatch[1].replace(/<[^>]*>/g, "").trim() : "(intro)";
|
|
4517
|
+
const pMatches = part.match(/<p\b[^>]*>([\s\S]*?)<\/p>/gi) || [];
|
|
4518
|
+
const paragraphs = pMatches.map((p) => {
|
|
4519
|
+
const text = p.replace(/<[^>]*>/g, " ").replace(/&\w+;/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
|
|
4520
|
+
return { text, shingles: buildShingles(text, 4) };
|
|
4521
|
+
}).filter((p) => p.shingles.size >= 3 && !isBoilerplate(p.text));
|
|
4522
|
+
if (paragraphs.length > 0) {
|
|
4523
|
+
sections.push({ heading, paragraphs });
|
|
4524
|
+
}
|
|
4525
|
+
}
|
|
4526
|
+
return sections;
|
|
4527
|
+
}
|
|
4528
|
+
function buildShingles(text, n) {
|
|
4529
|
+
const words = text.split(/\s+/).filter((w) => w.length > 1);
|
|
4530
|
+
const shingles = /* @__PURE__ */ new Set();
|
|
4531
|
+
for (let i = 0; i <= words.length - n; i++) {
|
|
4532
|
+
shingles.add(words.slice(i, i + n).join(" "));
|
|
4533
|
+
}
|
|
4534
|
+
return shingles;
|
|
4535
|
+
}
|
|
4536
|
+
function shingleJaccard(a, b) {
|
|
4537
|
+
if (a.size === 0 && b.size === 0) return 0;
|
|
4538
|
+
let intersection = 0;
|
|
4539
|
+
for (const s of a) {
|
|
4540
|
+
if (b.has(s)) intersection++;
|
|
4541
|
+
}
|
|
4542
|
+
const union = a.size + b.size - intersection;
|
|
4543
|
+
return union === 0 ? 0 : intersection / union;
|
|
4544
|
+
}
|
|
4199
4545
|
var SCORING_FUNCTIONS = {
|
|
4200
4546
|
schema_markup: scoreSchemaMarkup,
|
|
4201
4547
|
qa_content_format: scoreQAFormat,
|
|
@@ -4216,7 +4562,8 @@ var SCORING_FUNCTIONS = {
|
|
|
4216
4562
|
evidence_packaging: scoreEvidencePackaging,
|
|
4217
4563
|
entity_disambiguation: scoreEntityDisambiguation,
|
|
4218
4564
|
extraction_friction: scoreExtractionFriction,
|
|
4219
|
-
image_context_ai: scoreImageContextAI
|
|
4565
|
+
image_context_ai: scoreImageContextAI,
|
|
4566
|
+
duplicate_content: scoreDuplicateContent
|
|
4220
4567
|
};
|
|
4221
4568
|
function scorePage(html, url) {
|
|
4222
4569
|
let totalWeight = 0;
|
|
@@ -4230,6 +4577,11 @@ function scorePage(html, url) {
|
|
|
4230
4577
|
totalWeight += weight;
|
|
4231
4578
|
}
|
|
4232
4579
|
let aeoScore = totalWeight === 0 ? 0 : Math.round(weightedSum / totalWeight);
|
|
4580
|
+
const dupScore = criterionScores.find((c) => c.criterion === "duplicate_content")?.score ?? 10;
|
|
4581
|
+
if (dupScore <= 6) {
|
|
4582
|
+
const dupCap = 35 + dupScore * 5;
|
|
4583
|
+
aeoScore = Math.min(aeoScore, dupCap);
|
|
4584
|
+
}
|
|
4233
4585
|
const scoreCapped = aeoScore > 75;
|
|
4234
4586
|
if (scoreCapped) aeoScore = 75;
|
|
4235
4587
|
return { aeoScore, criterionScores, scoreCapped };
|
|
@@ -4449,6 +4801,15 @@ function checkHasCitationReadyContent(html) {
|
|
|
4449
4801
|
}
|
|
4450
4802
|
return null;
|
|
4451
4803
|
}
|
|
4804
|
+
function checkDuplicateContentBlocks(html) {
|
|
4805
|
+
const { score, duplicates } = scoreDuplicateContentDetailed(html);
|
|
4806
|
+
if (score <= 6 && duplicates.length > 0) {
|
|
4807
|
+
const first = duplicates[0];
|
|
4808
|
+
const label = duplicates.length === 1 ? `Duplicate content: '${first.headingA}' and '${first.headingB}' share ${first.similarity}% similar text ("${first.sample}...")` : `${duplicates.length} duplicate blocks found (e.g. '${first.headingA}' and '${first.headingB}' \u2014 "${first.sample}...")`;
|
|
4809
|
+
return { check: "duplicate-content", label, severity: score <= 3 ? "error" : "warning" };
|
|
4810
|
+
}
|
|
4811
|
+
return null;
|
|
4812
|
+
}
|
|
4452
4813
|
function analyzePage(html, url, category) {
|
|
4453
4814
|
const title = extractTitle(html);
|
|
4454
4815
|
const textContent = getTextContent2(html);
|
|
@@ -4467,7 +4828,8 @@ function analyzePage(html, url, category) {
|
|
|
4467
4828
|
checkImagesMissingAlt(html),
|
|
4468
4829
|
checkNoInternalLinks(html, url),
|
|
4469
4830
|
checkNoAnswerBlock(html),
|
|
4470
|
-
checkNoEvidence(html, url)
|
|
4831
|
+
checkNoEvidence(html, url),
|
|
4832
|
+
checkDuplicateContentBlocks(html)
|
|
4471
4833
|
];
|
|
4472
4834
|
for (const result of issueChecks) {
|
|
4473
4835
|
if (result) issues.push(result);
|
|
@@ -4535,7 +4897,7 @@ async function audit(domain, options) {
|
|
|
4535
4897
|
}
|
|
4536
4898
|
}
|
|
4537
4899
|
if (options?.fullCrawl) {
|
|
4538
|
-
const { crawlFullSite: crawlFullSite2 } = await import("./full-site-crawler-
|
|
4900
|
+
const { crawlFullSite: crawlFullSite2 } = await import("./full-site-crawler-TQ35TB2X.js");
|
|
4539
4901
|
const crawlResult = await crawlFullSite2(siteData, {
|
|
4540
4902
|
maxPages: options.maxPages ?? 200,
|
|
4541
4903
|
concurrency: options.concurrency ?? 5
|
|
@@ -4830,9 +5192,9 @@ var CRITERION_WEIGHTS2 = {
|
|
|
4830
5192
|
content_depth: 0.07,
|
|
4831
5193
|
fact_density: 0.06,
|
|
4832
5194
|
direct_answer_density: 0.05,
|
|
4833
|
-
qa_content_format: 0.
|
|
4834
|
-
query_answer_alignment: 0.
|
|
4835
|
-
faq_section: 0.
|
|
5195
|
+
qa_content_format: 0.04,
|
|
5196
|
+
query_answer_alignment: 0.04,
|
|
5197
|
+
faq_section: 0.03,
|
|
4836
5198
|
// Content Organization (~30%)
|
|
4837
5199
|
entity_consistency: 0.05,
|
|
4838
5200
|
internal_linking: 0.04,
|
|
@@ -4846,6 +5208,8 @@ var CRITERION_WEIGHTS2 = {
|
|
|
4846
5208
|
clean_html: 0.02,
|
|
4847
5209
|
// Technical Plumbing (~15%)
|
|
4848
5210
|
content_cannibalization: 0.02,
|
|
5211
|
+
duplicate_content: 0.05,
|
|
5212
|
+
cross_page_duplication: 0.03,
|
|
4849
5213
|
llms_txt: 0.02,
|
|
4850
5214
|
robots_txt: 0.02,
|
|
4851
5215
|
content_velocity: 0.02,
|
|
@@ -4890,7 +5254,9 @@ var PHASE_CONFIG = [
|
|
|
4890
5254
|
"citation_ready_writing",
|
|
4891
5255
|
"answer_first_placement",
|
|
4892
5256
|
"evidence_packaging",
|
|
4893
|
-
"entity_disambiguation"
|
|
5257
|
+
"entity_disambiguation",
|
|
5258
|
+
"duplicate_content",
|
|
5259
|
+
"cross_page_duplication"
|
|
4894
5260
|
]
|
|
4895
5261
|
},
|
|
4896
5262
|
{
|
|
@@ -5770,6 +6136,66 @@ Summarization: yes`,
|
|
|
5770
6136
|
}
|
|
5771
6137
|
return fixes;
|
|
5772
6138
|
},
|
|
6139
|
+
duplicate_content: (c, pages) => {
|
|
6140
|
+
if (c.score >= 10) return [];
|
|
6141
|
+
const impact = impactFromScore(c.score);
|
|
6142
|
+
const effort = effortForCriterion("duplicate_content", c.score);
|
|
6143
|
+
const affected = getAffectedPages("duplicate_content", pages);
|
|
6144
|
+
const sectionPairs = c.findings.filter((f) => f.detail.includes("' and '")).map((f) => {
|
|
6145
|
+
const match = f.detail.match(/'([^']+)' and '([^']+)'/);
|
|
6146
|
+
return match ? { a: match[1], b: match[2] } : null;
|
|
6147
|
+
}).filter(Boolean);
|
|
6148
|
+
const steps = [
|
|
6149
|
+
"Identify sections with duplicate or near-identical text",
|
|
6150
|
+
"Rewrite each section to provide a unique angle on the topic",
|
|
6151
|
+
"Ensure each heading section adds new information for the reader"
|
|
6152
|
+
];
|
|
6153
|
+
if (sectionPairs.length > 0) {
|
|
6154
|
+
const pair = sectionPairs[0];
|
|
6155
|
+
steps.unshift(`Start with '${pair.a}' and '${pair.b}' which share similar text`);
|
|
6156
|
+
}
|
|
6157
|
+
return [{
|
|
6158
|
+
id: "fix-duplicate-content",
|
|
6159
|
+
criterion: c.criterion_label,
|
|
6160
|
+
criterionId: c.criterion,
|
|
6161
|
+
title: "Fix duplicate content blocks",
|
|
6162
|
+
description: "Sections within pages contain identical or near-identical text. LLMs may flag this as low-quality content, reducing the authority of the page.",
|
|
6163
|
+
impact,
|
|
6164
|
+
effort,
|
|
6165
|
+
impactScore: 0,
|
|
6166
|
+
category: "content",
|
|
6167
|
+
steps,
|
|
6168
|
+
successCriteria: "Each section within a page provides unique content",
|
|
6169
|
+
affectedPages: affected,
|
|
6170
|
+
pageCount: affected?.length
|
|
6171
|
+
}];
|
|
6172
|
+
},
|
|
6173
|
+
cross_page_duplication: (c, pages) => {
|
|
6174
|
+
if (c.score >= 10) return [];
|
|
6175
|
+
const impact = impactFromScore(c.score);
|
|
6176
|
+
const effort = effortForCriterion("cross_page_duplication", c.score);
|
|
6177
|
+
const affected = getAffectedPages("cross_page_duplication", pages);
|
|
6178
|
+
return [{
|
|
6179
|
+
id: "fix-cross-page-duplication",
|
|
6180
|
+
criterion: c.criterion_label,
|
|
6181
|
+
criterionId: c.criterion,
|
|
6182
|
+
title: "Eliminate cross-page duplicate content",
|
|
6183
|
+
description: "The same paragraphs appear on multiple pages. AI engines may only index one version, wasting the others.",
|
|
6184
|
+
impact,
|
|
6185
|
+
effort,
|
|
6186
|
+
impactScore: 0,
|
|
6187
|
+
category: "content",
|
|
6188
|
+
steps: [
|
|
6189
|
+
"Identify paragraphs that are copy-pasted across multiple pages",
|
|
6190
|
+
"Rewrite each instance to provide a unique angle relevant to that page",
|
|
6191
|
+
"Move truly shared content to a single resource page and link to it",
|
|
6192
|
+
"Use canonical tags if pages must share content"
|
|
6193
|
+
],
|
|
6194
|
+
successCriteria: "Each page has unique body content with no copy-pasted paragraphs",
|
|
6195
|
+
affectedPages: affected,
|
|
6196
|
+
pageCount: affected?.length
|
|
6197
|
+
}];
|
|
6198
|
+
},
|
|
5773
6199
|
visible_date_signal: (c, pages) => {
|
|
5774
6200
|
if (c.score >= 10) return [];
|
|
5775
6201
|
const impact = impactFromScore(c.score);
|