aeorank 3.0.3 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # AEORank
2
2
 
3
- Score any website for AI engine visibility across 34 criteria in a 5-pillar framework. Pure HTTP + regex - zero API keys, under 10 seconds.
3
+ Score any website for AI engine visibility across 36 criteria in a 5-pillar framework. Pure HTTP + regex - zero API keys, under 10 seconds.
4
4
 
5
5
  [![npm version](https://img.shields.io/npm/v/aeorank.svg)](https://www.npmjs.com/package/aeorank)
6
6
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
@@ -35,7 +35,7 @@ import { audit } from 'aeorank';
35
35
 
36
36
  const result = await audit('example.com');
37
37
  console.log(result.overallScore); // 0-100
38
- console.log(result.scorecard); // 34 criteria with scores, pillars, weights
38
+ console.log(result.scorecard); // 36 criteria with scores, pillars, weights
39
39
  console.log(result.pillarScores); // { answerReadiness, contentStructure, ... }
40
40
  console.log(result.topFixes); // Top 3 highest-impact fixes
41
41
  console.log(result.opportunities); // Prioritized improvements
@@ -43,7 +43,7 @@ console.log(result.opportunities); // Prioritized improvements
43
43
 
44
44
  ## What It Checks
45
45
 
46
- AEORank evaluates 34 criteria that determine how AI engines (ChatGPT, Claude, Perplexity, Google AI Overviews) discover, parse, and cite your content. Criteria are organized into five pillars:
46
+ AEORank evaluates 36 criteria that determine how AI engines (ChatGPT, Claude, Perplexity, Google AI Overviews) discover, parse, and cite your content. Criteria are organized into five pillars:
47
47
 
48
48
  ### 5-Pillar Framework
49
49
 
@@ -55,7 +55,9 @@ AEORank evaluates 34 criteria that determine how AI engines (ChatGPT, Claude, Pe
55
55
  | Original Data & Expert Analysis | 10% | Proprietary research, case studies, unique data points |
56
56
  | Content Depth | 7% | Article length, heading structure, deep vs thin pages |
57
57
  | Fact & Data Density | 6% | Specific numbers, statistics, data points per page |
58
+ | Duplicate Content Blocks | 5% | Identical text blocks repeated across sections within a page |
58
59
  | Citation-Ready Writing | 4% | Self-contained definition sentences, single-claim statements |
60
+ | Cross-Page Duplicate Content | 3% | Same paragraphs copy-pasted across multiple pages |
59
61
  | Answer-First Placement | 3% | Answer block in first 300 words, no throat-clearing openers |
60
62
  | Evidence Packaging | 3% | Inline citations, attribution phrases, sources sections |
61
63
 
@@ -64,9 +66,9 @@ AEORank evaluates 34 criteria that determine how AI engines (ChatGPT, Claude, Pe
64
66
  | Criterion | Weight | What it measures |
65
67
  |-----------|--------|------------------|
66
68
  | Direct Answer Paragraphs | 5% | Concise answer paragraphs after question headings |
67
- | Q&A Content Format | 5% | Question-format headings (What, How, Why) with answers |
68
- | Query-Answer Alignment | 5% | Every question heading followed by a direct answer |
69
- | Comprehensive FAQ Section | 4% | Dedicated FAQ with FAQPage schema markup |
69
+ | Q&A Content Format | 4% | Question-format headings (What, How, Why) with answers |
70
+ | Query-Answer Alignment | 4% | Every question heading followed by a direct answer |
71
+ | Comprehensive FAQ Section | 3% | Dedicated FAQ with FAQPage schema markup |
70
72
  | Table & List Extractability | 3% | HTML tables with headers, ordered/unordered lists |
71
73
  | Definition Patterns | 2% | Clear "X is defined as..." patterns for key terms |
72
74
  | Entity Disambiguation | 2% | Primary entity defined early, consistent terminology |
@@ -107,19 +109,21 @@ AEORank evaluates 34 criteria that determine how AI engines (ChatGPT, Claude, Pe
107
109
  | RSS/Atom Feed | 1% | RSS feed linked from homepage |
108
110
 
109
111
  > **Coherence Gate:** Sites with topic coherence below 6/10 are score-capped regardless of technical perfection. A scattered site with perfect robots.txt, llms.txt, and schema will score lower than a focused site with mediocre technical implementation.
112
+ >
113
+ > **Duplication Gate:** Per-page scores are capped when duplicate content blocks are detected. A page with 3+ identical copy-pasted paragraphs cannot score above 35/75 regardless of other signals — LLMs will flag it as low-quality content.
110
114
 
111
115
  <details>
112
- <summary>All 34 criteria (numbered list)</summary>
116
+ <summary>All 36 criteria (numbered list)</summary>
113
117
 
114
118
  | # | Criterion | Weight | Pillar |
115
119
  |---|-----------|--------|--------|
116
120
  | 1 | llms.txt File | 2% | AI Discovery |
117
121
  | 2 | Schema.org Structured Data | 3% | Trust & Authority |
118
- | 3 | Q&A Content Format | 5% | Content Structure |
122
+ | 3 | Q&A Content Format | 4% | Content Structure |
119
123
  | 4 | Clean, Crawlable HTML | 2% | Technical Foundation |
120
124
  | 5 | Entity Authority & NAP Consistency | 5% | Trust & Authority |
121
125
  | 6 | robots.txt for AI Crawlers | 2% | AI Discovery |
122
- | 7 | Comprehensive FAQ Section | 4% | Content Structure |
126
+ | 7 | Comprehensive FAQ Section | 3% | Content Structure |
123
127
  | 8 | Original Data & Expert Analysis | 10% | Answer Readiness |
124
128
  | 9 | Internal Linking Structure | 4% | Trust & Authority |
125
129
  | 10 | Semantic HTML5 & Accessibility | 2% | Technical Foundation |
@@ -136,7 +140,7 @@ AEORank evaluates 34 criteria that determine how AI engines (ChatGPT, Claude, Pe
136
140
  | 21 | Content Publishing Velocity | 2% | AI Discovery |
137
141
  | 22 | Schema Coverage & Depth | 1% | Technical Foundation |
138
142
  | 23 | Speakable Schema | 1% | Technical Foundation |
139
- | 24 | Query-Answer Alignment | 5% | Content Structure |
143
+ | 24 | Query-Answer Alignment | 4% | Content Structure |
140
144
  | 25 | Content Cannibalization | 2% | AI Discovery |
141
145
  | 26 | Visible Date Signal | 2% | Technical Foundation |
142
146
  | 27 | Topic Coherence | 14% | Answer Readiness |
@@ -147,6 +151,8 @@ AEORank evaluates 34 criteria that determine how AI engines (ChatGPT, Claude, Pe
147
151
  | 32 | Entity Disambiguation | 2% | Content Structure |
148
152
  | 33 | Extraction Friction | 2% | Technical Foundation |
149
153
  | 34 | Image Context for AI | 1% | Technical Foundation |
154
+ | 35 | Duplicate Content Blocks | 5% | Answer Readiness |
155
+ | 36 | Cross-Page Duplicate Content | 3% | Answer Readiness |
150
156
 
151
157
  </details>
152
158
 
@@ -197,7 +203,7 @@ Or use `npx` directly:
197
203
  Run a complete audit. Returns `AuditResult` with:
198
204
 
199
205
  - `overallScore` - 0-100 weighted score
200
- - `scorecard` - 28 `ScoreCardItem` entries (criterion, score 0-10, status, key findings)
206
+ - `scorecard` - 36 `ScoreCardItem` entries (criterion, score 0-10, status, key findings)
201
207
  - `detailedFindings` - Per-criterion findings with severity
202
208
  - `opportunities` - Prioritized improvements with effort/impact
203
209
  - `pitchNumbers` - Key metrics (schema types, AI crawler access, etc.)
@@ -219,10 +225,10 @@ Run a complete audit. Returns `AuditResult` with:
219
225
 
220
226
  ### `scorePage(html, url?)`
221
227
 
222
- Score a single HTML page against 14 per-page AEO criteria. Returns `PageScoreResult` with:
228
+ Score a single HTML page against 21 per-page AEO criteria. Returns `PageScoreResult` with:
223
229
 
224
- - `aeoScore` - 0-100 weighted score
225
- - `criterionScores` - 14 `PageCriterionScore` entries (criterion, score 0-10, weight)
230
+ - `aeoScore` - 0-75 weighted score (capped; duplication gate may lower further)
231
+ - `criterionScores` - 21 `PageCriterionScore` entries (criterion, score 0-10, weight)
226
232
 
227
233
  ### `scoreAllPages(siteData)`
228
234
 
@@ -382,21 +388,22 @@ console.log(crawlResult.discoveredUrls.length); // Total URLs found
382
388
 
383
389
  ## Per-Page Scoring
384
390
 
385
- AEORank scores each individual page (0-75) against the 20 criteria that apply at page level. Instead of only seeing "your site scores 62," you get "your /about page scores 45, your /blog/guide scores 72."
391
+ AEORank scores each individual page (0-75) against the 21 criteria that apply at page level. Instead of only seeing "your site scores 62," you get "your /about page scores 45, your /blog/guide scores 72."
386
392
 
387
- The 20 per-page criteria follow the same pillar-first weighting as the site-level score:
393
+ The 21 per-page criteria follow the same pillar-first weighting as the site-level score:
388
394
 
389
395
  | Pillar | Per-Page Criteria | Weight |
390
396
  |--------|-------------------|--------|
391
397
  | **Answer Readiness** | Original Data & Expert Content | 10% |
392
398
  | | Fact & Data Density | 6% |
399
+ | | Duplicate Content Blocks | 5% |
393
400
  | | Citation-Ready Writing | 4% |
394
401
  | | Answer-First Placement | 3% |
395
402
  | | Evidence Packaging | 3% |
396
403
  | **Content Structure** | Direct Answer Paragraphs | 5% |
397
- | | Q&A Content Format | 5% |
398
- | | Query-Answer Alignment | 5% |
399
- | | FAQ Section Content | 4% |
404
+ | | Q&A Content Format | 4% |
405
+ | | Query-Answer Alignment | 4% |
406
+ | | FAQ Section Content | 3% |
400
407
  | | Table & List Extractability | 3% |
401
408
  | | Definition Patterns | 2% |
402
409
  | | Entity Disambiguation | 2% |
@@ -409,9 +416,11 @@ The 20 per-page criteria follow the same pillar-first weighting as the site-leve
409
416
  | | Image Context for AI | 1% |
410
417
  | **AI Discovery** | Canonical URL Strategy | 1% |
411
418
 
412
- The remaining 14 criteria are site-level only: llms.txt, robots.txt, sitemap, RSS, entity consistency, internal linking, content licensing, author schema, content velocity, schema coverage, speakable schema, content cannibalization, topic coherence, and content depth.
419
+ The remaining 15 criteria are site-level only: llms.txt, robots.txt, sitemap, RSS, entity consistency, internal linking, content licensing, author schema, content velocity, schema coverage, speakable schema, content cannibalization, cross-page duplication, topic coherence, and content depth.
413
420
 
414
421
  > **Single-page cap:** Per-page scores are capped at 75 since single pages cannot demonstrate site-wide signals like topic coherence, content velocity, or sitemap completeness.
422
+ >
423
+ > **Duplication gate:** Pages with significant duplicate content blocks are score-capped. A page with 3+ copy-pasted paragraphs is capped at 35/75 — LLMs treat repeated content as low-quality regardless of other signals.
415
424
 
416
425
  ### CLI Output
417
426
 
@@ -436,7 +445,7 @@ import type { PageScoreResult, PageCriterionScore } from 'aeorank';
436
445
  // Score a single page
437
446
  const result = scorePage(html, url);
438
447
  console.log(result.aeoScore); // 0-75 (capped for single pages)
439
- console.log(result.criterionScores); // 20 per-criterion scores
448
+ console.log(result.criterionScores); // 21 per-criterion scores
440
449
  console.log(result.scoreCapped); // true if score was capped at 75
441
450
 
442
451
  // Score all pages from site data
@@ -565,9 +574,13 @@ console.log(result.comparison.tied); // Criteria with equal scores
565
574
 
566
575
  ## Changelog
567
576
 
577
+ ### v3.1.0 - Duplicate Content Detection
578
+
579
+ 2 new criteria (#35-#36): Duplicate Content Blocks (intra-page, 5%) and Cross-Page Duplicate Content (3%). Detects identical text blocks within pages and copy-pasted paragraphs across pages using shingle-based Jaccard similarity. Boilerplate filtering excludes CTAs, signups, and template content from false positives. Duplication gate caps per-page scores when severe duplication is found. CLI now shows duplicate section names inline per page.
580
+
568
581
  ### v3.0.0 - 5-Pillar Framework & 6 New Criteria
569
582
 
570
- Scoring Engine v2: 28 → 34 criteria with 5-pillar framework (Answer Readiness, Content Structure, Trust & Authority, Technical Foundation, AI Discovery). 6 new criteria targeting citation quality, evidence packaging, and extraction friction. Per-pillar sub-scores, top-3 fixes, client-friendly names. Single-page score cap at 75. 15 per-page quality checks (up from 12).
583
+ Scoring Engine v2: 28 → 34 criteria (now 36) with 5-pillar framework (Answer Readiness, Content Structure, Trust & Authority, Technical Foundation, AI Discovery). 6 new criteria targeting citation quality, evidence packaging, and extraction friction. Per-pillar sub-scores, top-3 fixes, client-friendly names. Single-page score cap at 75. 15 per-page quality checks (up from 12).
571
584
 
572
585
  ### v2.3.0 - Coherence Scaling & Script Stripping
573
586
 
@@ -595,7 +608,7 @@ Individual page scores (0-100) against 14 page-level criteria. Top/bottom page r
595
608
 
596
609
  ## Benchmark Dataset
597
610
 
598
- The `data/` directory contains the largest open dataset of AI visibility scores - **13,619 domains** scored across 34 criteria, including **4,328 Y Combinator startups** across 48 batches (W06-W26):
611
+ The `data/` directory contains the largest open dataset of AI visibility scores - **13,619 domains** scored across 36 criteria, including **4,328 Y Combinator startups** across 48 batches (W06-W26):
599
612
 
600
613
  | File | Contents |
601
614
  |------|----------|
package/dist/browser.d.ts CHANGED
@@ -64,7 +64,7 @@ declare function buildLinkGraph(pages: FetchResult[], domain: string, homepageUr
64
64
 
65
65
  /**
66
66
  * V2 Pillar Framework — 5-pillar scoring model.
67
- * Maps all 34 criteria into pillars, computes sub-scores,
67
+ * Maps all 36 criteria into pillars, computes sub-scores,
68
68
  * provides client-friendly names, and calculates top-3 fixes.
69
69
  */
70
70
 
@@ -320,7 +320,7 @@ interface SitemapDateAnalysis {
320
320
  declare function countRecentSitemapDates(sitemapText: string): SitemapDateAnalysis;
321
321
  declare function extractRawDataSummary(data: SiteData): RawDataSummary;
322
322
  /**
323
- * Run all 34 criteria checks using pre-fetched site data.
323
+ * Run all 36 criteria checks using pre-fetched site data.
324
324
  * All functions are synchronous (no HTTP calls) - data was already fetched.
325
325
  */
326
326
  declare function auditSiteFromData(data: SiteData): CriterionResult[];
@@ -456,7 +456,7 @@ declare function analyzeAllPages(siteData: SiteData): PageReview[];
456
456
 
457
457
  /**
458
458
  * Per-page AEO scoring.
459
- * Evaluates 20 of 34 criteria that apply at individual page level.
459
+ * Evaluates 21 of 36 criteria that apply at individual page level.
460
460
  * Produces a 0-75 AEO score per page (single-page cap at 75).
461
461
  */
462
462