aeorank 2.2.0 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -2
- package/dist/browser.js +28 -17
- package/dist/browser.js.map +1 -1
- package/dist/cli.js +28 -17
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +28 -17
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +28 -17
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# AEORank
|
|
2
2
|
|
|
3
|
-
Score any website for AI engine visibility across 28 criteria. Pure HTTP + regex - zero API keys
|
|
3
|
+
Score any website for AI engine visibility across 28 criteria. Pure HTTP + regex - zero API keys, under 10 seconds.
|
|
4
4
|
|
|
5
5
|
[](https://www.npmjs.com/package/aeorank)
|
|
6
6
|
[](https://opensource.org/licenses/MIT)
|
|
@@ -309,9 +309,21 @@ npm install puppeteer
|
|
|
309
309
|
|
|
310
310
|
Use `--no-headless` to skip SPA rendering (faster but may produce lower scores for SPAs).
|
|
311
311
|
|
|
312
|
+
## Page Discovery
|
|
313
|
+
|
|
314
|
+
AEORank automatically discovers and scores pages beyond just the homepage:
|
|
315
|
+
|
|
316
|
+
1. **Sitemap blog sample** - Up to 50 blog/article pages from `sitemap.xml`
|
|
317
|
+
2. **Nav link extraction** - Internal links from `<nav>` elements
|
|
318
|
+
3. **Common page variants** - `/about`, `/pricing`, `/services`, `/contact`, `/team`, `/resources`, `/docs`, `/case-studies`
|
|
319
|
+
4. **Sitemap content pages** - 6 non-blog pages from sitemap (service pages, product pages)
|
|
320
|
+
5. **Homepage link fallback** (v2.2+) - When no sitemap exists (or fewer than 4 blog pages found), extracts up to 30 internal links from the full homepage HTML to build a page list automatically
|
|
321
|
+
|
|
322
|
+
This ensures realistic scoring even for sites without a sitemap. Without the fallback, sites with no sitemap were only getting 1-5 pages checked, inflating scores.
|
|
323
|
+
|
|
312
324
|
## Full-Site Crawl
|
|
313
325
|
|
|
314
|
-
|
|
326
|
+
For even deeper analysis beyond the automatic page discovery, enable `--full-crawl` to BFS-crawl every discoverable page:
|
|
315
327
|
|
|
316
328
|
```bash
|
|
317
329
|
npx aeorank example.com --full-crawl # Up to 200 pages
|
|
@@ -518,6 +530,34 @@ console.log(result.comparison.siteBAdvantages); // Criteria where B leads
|
|
|
518
530
|
console.log(result.comparison.tied); // Criteria with equal scores
|
|
519
531
|
```
|
|
520
532
|
|
|
533
|
+
## Changelog
|
|
534
|
+
|
|
535
|
+
### v2.3.0 - Coherence Scaling & Script Stripping
|
|
536
|
+
|
|
537
|
+
- **Topic coherence scales with page count**: Sites with many pages (50+) no longer penalized for having more topic clusters. Cluster thresholds scale proportionally (pages/10, pages/5, pages/3). Absolute term presence (10+ pages) boosts focus score.
|
|
538
|
+
- **Strip inline JavaScript from scoring**: `<script>` and `<style>` tags are now removed before text analysis, preventing WP Rocket and similar deferred-loading scripts from corrupting regex-based scoring.
|
|
539
|
+
- **Regex safety net**: `checkQueryAnswerAlignment` wraps `new RegExp()` in try-catch to handle residual script content gracefully.
|
|
540
|
+
|
|
541
|
+
### v2.2.0 - Auto Page Discovery
|
|
542
|
+
|
|
543
|
+
Sites without a `sitemap.xml` now get up to 30 pages discovered from homepage links instead of 1-5. Prevents inflated scores from insufficient page coverage.
|
|
544
|
+
|
|
545
|
+
### v2.1.0 - Scoring Rebalance with Coherence Gate
|
|
546
|
+
|
|
547
|
+
Weight distribution redesigned: Content Substance ~55%, Organization ~30%, Plumbing ~15%. Coherence gate caps scores when topic focus is below 6/10.
|
|
548
|
+
|
|
549
|
+
### v2.0.0 - Topic Coherence & Content Depth
|
|
550
|
+
|
|
551
|
+
Added 2 new criteria (26 -> 28): Topic Coherence (14%) and Content Depth (7%). Blog sampling for coherence analysis.
|
|
552
|
+
|
|
553
|
+
### v1.6.0 - Link Graph & Fix Plan Engine
|
|
554
|
+
|
|
555
|
+
Internal linking analysis with orphan/pillar/hub detection, topic clusters. Phased fix plan generation with code examples.
|
|
556
|
+
|
|
557
|
+
### v1.5.0 - Per-Page Scoring
|
|
558
|
+
|
|
559
|
+
Individual page scores (0-100) against 14 page-level criteria. Top/bottom page rankings.
|
|
560
|
+
|
|
521
561
|
## Benchmark Dataset
|
|
522
562
|
|
|
523
563
|
The `data/` directory contains the largest open dataset of AI visibility scores - **13,619 domains** scored across 28 criteria, including **4,328 Y Combinator startups** across 48 batches (W06-W26):
|
package/dist/browser.js
CHANGED
|
@@ -209,18 +209,21 @@ async function prefetchSiteData(domain) {
|
|
|
209
209
|
if (homepage) homepage.category = "homepage";
|
|
210
210
|
return { domain, protocol, homepage, llmsTxt, robotsTxt, faqPage, sitemapXml, rssFeed, aiTxt, redirectedTo: null, parkedReason: null, blogSample };
|
|
211
211
|
}
|
|
212
|
+
function stripScripts(html) {
|
|
213
|
+
return html.replace(/<script(?![^>]*type\s*=\s*["']application\/ld\+json["'])[^>]*>[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "");
|
|
214
|
+
}
|
|
212
215
|
function getCombinedHtml(data) {
|
|
213
|
-
const parts = [data.homepage?.text || ""];
|
|
216
|
+
const parts = [stripScripts(data.homepage?.text || "")];
|
|
214
217
|
if (data.blogSample) {
|
|
215
218
|
for (const page of data.blogSample) {
|
|
216
|
-
parts.push(page.text);
|
|
219
|
+
parts.push(stripScripts(page.text));
|
|
217
220
|
}
|
|
218
221
|
}
|
|
219
222
|
return parts.join("\n");
|
|
220
223
|
}
|
|
221
224
|
function getBlogHtml(data) {
|
|
222
225
|
if (!data.blogSample || data.blogSample.length === 0) return "";
|
|
223
|
-
return data.blogSample.map((p) => p.text).join("\n");
|
|
226
|
+
return data.blogSample.map((p) => stripScripts(p.text)).join("\n");
|
|
224
227
|
}
|
|
225
228
|
function checkLlmsTxt(data) {
|
|
226
229
|
const findings = [];
|
|
@@ -1418,14 +1421,17 @@ function checkQueryAnswerAlignment(data) {
|
|
|
1418
1421
|
}
|
|
1419
1422
|
let answered = 0;
|
|
1420
1423
|
for (const qHeading of questionHeadings) {
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
|
|
1424
|
+
try {
|
|
1425
|
+
const escapedHeading = qHeading.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
1426
|
+
const pattern = new RegExp(escapedHeading + "[\\s\\S]{0,200}?<\\/h[23]>([\\s\\S]{0,1500}?)(?=<h[1-6]|$)", "i");
|
|
1427
|
+
const match = pattern.exec(combinedHtml);
|
|
1428
|
+
if (match) {
|
|
1429
|
+
const afterContent = match[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
|
|
1430
|
+
if (afterContent.length >= 20) {
|
|
1431
|
+
answered++;
|
|
1432
|
+
}
|
|
1428
1433
|
}
|
|
1434
|
+
} catch {
|
|
1429
1435
|
}
|
|
1430
1436
|
}
|
|
1431
1437
|
const rate = Math.round(answered / questionHeadings.length * 100);
|
|
@@ -1904,27 +1910,32 @@ function checkTopicCoherence(data) {
|
|
|
1904
1910
|
const bigramFocusRatio = blogPages.length > 0 ? dominantBigramCount / blogPages.length : 0;
|
|
1905
1911
|
let score = 0;
|
|
1906
1912
|
const bestFocusRatio = Math.max(focusRatio, bigramFocusRatio);
|
|
1913
|
+
const dominantPageCount = Math.max(dominantTermCount, dominantBigramCount);
|
|
1914
|
+
const hasStrongAbsolutePresence = dominantPageCount >= 10;
|
|
1907
1915
|
if (bestFocusRatio >= 0.8) {
|
|
1908
1916
|
score += 7;
|
|
1909
1917
|
} else if (bestFocusRatio >= 0.6) {
|
|
1910
1918
|
score += 6;
|
|
1911
|
-
} else if (bestFocusRatio >= 0.45) {
|
|
1919
|
+
} else if (bestFocusRatio >= 0.45 || hasStrongAbsolutePresence && bestFocusRatio >= 0.3) {
|
|
1912
1920
|
score += 5;
|
|
1913
|
-
} else if (bestFocusRatio >= 0.3) {
|
|
1914
|
-
score +=
|
|
1921
|
+
} else if (bestFocusRatio >= 0.3 || hasStrongAbsolutePresence && bestFocusRatio >= 0.2) {
|
|
1922
|
+
score += 4;
|
|
1915
1923
|
} else if (bestFocusRatio >= 0.15) {
|
|
1916
1924
|
score += 2;
|
|
1917
1925
|
} else {
|
|
1918
1926
|
score += 1;
|
|
1919
1927
|
}
|
|
1920
|
-
const clusterPenaltyReduced = focusRatio >= 0.7;
|
|
1921
|
-
|
|
1928
|
+
const clusterPenaltyReduced = focusRatio >= 0.7 || hasStrongAbsolutePresence;
|
|
1929
|
+
const scaledLow = Math.max(3, Math.floor(blogPages.length / 10));
|
|
1930
|
+
const scaledMid = Math.max(6, Math.floor(blogPages.length / 5));
|
|
1931
|
+
const scaledHigh = Math.max(10, Math.floor(blogPages.length / 3));
|
|
1932
|
+
if (topicClusterCount <= scaledLow) {
|
|
1922
1933
|
score += 3;
|
|
1923
1934
|
findings.push({ severity: "info", detail: `${topicClusterCount} topic cluster(s) - tightly focused content` });
|
|
1924
|
-
} else if (topicClusterCount <=
|
|
1935
|
+
} else if (topicClusterCount <= scaledMid) {
|
|
1925
1936
|
score += clusterPenaltyReduced ? 2 : 1;
|
|
1926
1937
|
findings.push({ severity: "info", detail: `${topicClusterCount} topic clusters${clusterPenaltyReduced ? " within a focused niche" : " - moderately focused"}` });
|
|
1927
|
-
} else if (topicClusterCount <=
|
|
1938
|
+
} else if (topicClusterCount <= scaledHigh) {
|
|
1928
1939
|
score += clusterPenaltyReduced ? 1 : 0;
|
|
1929
1940
|
if (!clusterPenaltyReduced) {
|
|
1930
1941
|
findings.push({ severity: "low", detail: `${topicClusterCount} topic clusters - scattered content`, fix: "Reduce the number of distinct topics. Focus blog content on 2-3 core expertise areas." });
|