aeorank 2.2.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # AEORank
2
2
 
3
- Score any website for AI engine visibility across 28 criteria. Pure HTTP + regex - zero API keys required.
3
+ Score any website for AI engine visibility across 28 criteria. Pure HTTP + regex - zero API keys, under 10 seconds.
4
4
 
5
5
  [![npm version](https://img.shields.io/npm/v/aeorank.svg)](https://www.npmjs.com/package/aeorank)
6
6
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
@@ -309,9 +309,21 @@ npm install puppeteer
309
309
 
310
310
  Use `--no-headless` to skip SPA rendering (faster but may produce lower scores for SPAs).
311
311
 
312
+ ## Page Discovery
313
+
314
+ AEORank automatically discovers and scores pages beyond just the homepage:
315
+
316
+ 1. **Sitemap blog sample** - Up to 50 blog/article pages from `sitemap.xml`
317
+ 2. **Nav link extraction** - Internal links from `<nav>` elements
318
+ 3. **Common page variants** - `/about`, `/pricing`, `/services`, `/contact`, `/team`, `/resources`, `/docs`, `/case-studies`
319
+ 4. **Sitemap content pages** - 6 non-blog pages from sitemap (service pages, product pages)
320
+ 5. **Homepage link fallback** (v2.2+) - When no sitemap exists (or fewer than 4 blog pages found), extracts up to 30 internal links from the full homepage HTML to build a page list automatically
321
+
322
+ This ensures realistic scoring even for sites without a sitemap. Without the fallback, sites with no sitemap were only getting 1-5 pages checked, inflating scores.
323
+
312
324
  ## Full-Site Crawl
313
325
 
314
- By default, AEORank audits the homepage plus up to 50 blog pages from the sitemap. For deeper analysis, enable `--full-crawl` to BFS-crawl every discoverable page:
326
+ For even deeper analysis beyond the automatic page discovery, enable `--full-crawl` to BFS-crawl every discoverable page:
315
327
 
316
328
  ```bash
317
329
  npx aeorank example.com --full-crawl # Up to 200 pages
@@ -518,6 +530,34 @@ console.log(result.comparison.siteBAdvantages); // Criteria where B leads
518
530
  console.log(result.comparison.tied); // Criteria with equal scores
519
531
  ```
520
532
 
533
+ ## Changelog
534
+
535
+ ### v2.3.0 - Coherence Scaling & Script Stripping
536
+
537
+ - **Topic coherence scales with page count**: Sites with many pages (50+) no longer penalized for having more topic clusters. Cluster thresholds scale proportionally (pages/10, pages/5, pages/3). Absolute term presence (10+ pages) boosts focus score.
538
+ - **Strip inline JavaScript from scoring**: `<script>` and `<style>` tags are now removed before text analysis, preventing WP Rocket and similar deferred-loading scripts from corrupting regex-based scoring.
539
+ - **Regex safety net**: `checkQueryAnswerAlignment` wraps `new RegExp()` in try-catch to handle residual script content gracefully.
540
+
541
+ ### v2.2.0 - Auto Page Discovery
542
+
543
+ Sites without a `sitemap.xml` now get up to 30 pages discovered from homepage links instead of 1-5. Prevents inflated scores from insufficient page coverage.
544
+
545
+ ### v2.1.0 - Scoring Rebalance with Coherence Gate
546
+
547
+ Weight distribution redesigned: Content Substance ~55%, Organization ~30%, Plumbing ~15%. Coherence gate caps scores when topic focus is below 6/10.
548
+
549
+ ### v2.0.0 - Topic Coherence & Content Depth
550
+
551
+ Added 2 new criteria (26 -> 28): Topic Coherence (14%) and Content Depth (7%). Blog sampling for coherence analysis.
552
+
553
+ ### v1.6.0 - Link Graph & Fix Plan Engine
554
+
555
+ Internal linking analysis with orphan/pillar/hub detection, topic clusters. Phased fix plan generation with code examples.
556
+
557
+ ### v1.5.0 - Per-Page Scoring
558
+
559
+ Individual page scores (0-100) against 14 page-level criteria. Top/bottom page rankings.
560
+
521
561
  ## Benchmark Dataset
522
562
 
523
563
  The `data/` directory contains the largest open dataset of AI visibility scores - **13,619 domains** scored across 28 criteria, including **4,328 Y Combinator startups** across 48 batches (W06-W26):
package/dist/browser.d.ts CHANGED
@@ -262,12 +262,25 @@ interface RawDataSummary {
262
262
  * Single entry point for all HTTP requests - no redundant fetches.
263
263
  */
264
264
  declare function prefetchSiteData(domain: string): Promise<SiteData>;
265
+ interface SitemapDateAnalysis {
266
+ recentCount: number;
267
+ isUniform: boolean;
268
+ uniformDetail?: string;
269
+ totalWithDates: number;
270
+ distinctRecentDays: number;
271
+ }
272
+ declare function countRecentSitemapDates(sitemapText: string): SitemapDateAnalysis;
265
273
  declare function extractRawDataSummary(data: SiteData): RawDataSummary;
266
274
  /**
267
275
  * Run all 26 criteria checks using pre-fetched site data.
268
276
  * All functions are synchronous (no HTTP calls) - data was already fetched.
269
277
  */
270
278
  declare function auditSiteFromData(data: SiteData): CriterionResult[];
279
+ /**
280
+ * Legacy entry point: fetches data and runs all checks.
281
+ * Used by analyzer.ts for the /api/aeo/analyze endpoint.
282
+ */
283
+ declare function auditSite(targetUrl: string): Promise<CriterionResult[]>;
271
284
 
272
285
  declare function calculateOverallScore(criteria: CriterionResult[]): number;
273
286
 
@@ -521,4 +534,4 @@ interface ComparisonResult {
521
534
  */
522
535
  declare function compare(domainA: string, domainB: string, options?: AuditOptions): Promise<ComparisonResult>;
523
536
 
524
- export { type AuditData, type AuditFinding, type AuditStatus, CRITERION_LABELS, type ComparisonResult, type CrawlOptions, type CrawlResult, type CriterionComparison, type CriterionDetail, type CriterionResult, type Deliverable, type DetailedFinding, type FetchResult, type FindingSeverity, type FindingType, type FixAction, type FixPhase, type FixPlan, type FixPlanSummary, type ImpactLevel, type LinkEdge, type LinkGraph, type LinkGraphStats, type PageCategory$1 as PageCategory, type PageCriterionScore$1 as PageCriterionScore, type PageIssue, type PageNode, type PageReview, type PageScoreResult, type ParkedDomainResult, type PitchMetric, type Priority, type RawDataSummary, type ScoreCardItem, type SerializedLinkGraph, type Severity, type SiteData, type Status, type TopicCluster, analyzeAllPages, analyzePage, auditSiteFromData, buildDetailedFindings, buildLinkGraph, buildScorecard, calculateOverallScore, compare, crawlFullSite, detectParkedDomain, extractAllUrlsFromSitemap, extractContentPagesFromSitemap, extractInternalLinks, extractLinksWithAnchors, extractNavLinks, extractRawDataSummary, fetchMultiPageData, generateBottomLine, generateFixPlan, generateOpportunities, generatePitchNumbers, generateVerdict, inferCategory, prefetchSiteData, scoreAllPages, scorePage, scoreToStatus, serializeLinkGraph };
537
+ export { type AuditData, type AuditFinding, type AuditStatus, CRITERION_LABELS, type ComparisonResult, type CrawlOptions, type CrawlResult, type CriterionComparison, type CriterionDetail, type CriterionResult, type Deliverable, type DetailedFinding, type FetchResult, type FindingSeverity, type FindingType, type FixAction, type FixPhase, type FixPlan, type FixPlanSummary, type ImpactLevel, type LinkEdge, type LinkGraph, type LinkGraphStats, type PageCategory$1 as PageCategory, type PageCriterionScore$1 as PageCriterionScore, type PageIssue, type PageNode, type PageReview, type PageScoreResult, type ParkedDomainResult, type PitchMetric, type Priority, type RawDataSummary, type ScoreCardItem, type SerializedLinkGraph, type Severity, type SiteData, type SitemapDateAnalysis, type Status, type TopicCluster, analyzeAllPages, analyzePage, auditSite, auditSiteFromData, buildDetailedFindings, buildLinkGraph, buildScorecard, calculateOverallScore, compare, countRecentSitemapDates, crawlFullSite, detectParkedDomain, extractAllUrlsFromSitemap, extractContentPagesFromSitemap, extractInternalLinks, extractLinksWithAnchors, extractNavLinks, extractRawDataSummary, fetchMultiPageData, generateBottomLine, generateFixPlan, generateOpportunities, generatePitchNumbers, generateVerdict, inferCategory, prefetchSiteData, scoreAllPages, scorePage, scoreToStatus, serializeLinkGraph };
package/dist/browser.js CHANGED
@@ -209,18 +209,21 @@ async function prefetchSiteData(domain) {
209
209
  if (homepage) homepage.category = "homepage";
210
210
  return { domain, protocol, homepage, llmsTxt, robotsTxt, faqPage, sitemapXml, rssFeed, aiTxt, redirectedTo: null, parkedReason: null, blogSample };
211
211
  }
212
+ function stripScripts(html) {
213
+ return html.replace(/<script(?![^>]*type\s*=\s*["']application\/ld\+json["'])[^>]*>[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "");
214
+ }
212
215
  function getCombinedHtml(data) {
213
- const parts = [data.homepage?.text || ""];
216
+ const parts = [stripScripts(data.homepage?.text || "")];
214
217
  if (data.blogSample) {
215
218
  for (const page of data.blogSample) {
216
- parts.push(page.text);
219
+ parts.push(stripScripts(page.text));
217
220
  }
218
221
  }
219
222
  return parts.join("\n");
220
223
  }
221
224
  function getBlogHtml(data) {
222
225
  if (!data.blogSample || data.blogSample.length === 0) return "";
223
- return data.blogSample.map((p) => p.text).join("\n");
226
+ return data.blogSample.map((p) => stripScripts(p.text)).join("\n");
224
227
  }
225
228
  function checkLlmsTxt(data) {
226
229
  const findings = [];
@@ -1418,14 +1421,17 @@ function checkQueryAnswerAlignment(data) {
1418
1421
  }
1419
1422
  let answered = 0;
1420
1423
  for (const qHeading of questionHeadings) {
1421
- const escapedHeading = qHeading.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
1422
- const pattern = new RegExp(escapedHeading + "[\\s\\S]{0,200}?<\\/h[23]>([\\s\\S]{0,1500}?)(?=<h[1-6]|$)", "i");
1423
- const match = pattern.exec(combinedHtml);
1424
- if (match) {
1425
- const afterContent = match[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
1426
- if (afterContent.length >= 20) {
1427
- answered++;
1424
+ try {
1425
+ const escapedHeading = qHeading.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
1426
+ const pattern = new RegExp(escapedHeading + "[\\s\\S]{0,200}?<\\/h[23]>([\\s\\S]{0,1500}?)(?=<h[1-6]|$)", "i");
1427
+ const match = pattern.exec(combinedHtml);
1428
+ if (match) {
1429
+ const afterContent = match[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
1430
+ if (afterContent.length >= 20) {
1431
+ answered++;
1432
+ }
1428
1433
  }
1434
+ } catch {
1429
1435
  }
1430
1436
  }
1431
1437
  const rate = Math.round(answered / questionHeadings.length * 100);
@@ -1904,27 +1910,32 @@ function checkTopicCoherence(data) {
1904
1910
  const bigramFocusRatio = blogPages.length > 0 ? dominantBigramCount / blogPages.length : 0;
1905
1911
  let score = 0;
1906
1912
  const bestFocusRatio = Math.max(focusRatio, bigramFocusRatio);
1913
+ const dominantPageCount = Math.max(dominantTermCount, dominantBigramCount);
1914
+ const hasStrongAbsolutePresence = dominantPageCount >= 10;
1907
1915
  if (bestFocusRatio >= 0.8) {
1908
1916
  score += 7;
1909
1917
  } else if (bestFocusRatio >= 0.6) {
1910
1918
  score += 6;
1911
- } else if (bestFocusRatio >= 0.45) {
1919
+ } else if (bestFocusRatio >= 0.45 || hasStrongAbsolutePresence && bestFocusRatio >= 0.3) {
1912
1920
  score += 5;
1913
- } else if (bestFocusRatio >= 0.3) {
1914
- score += 3;
1921
+ } else if (bestFocusRatio >= 0.3 || hasStrongAbsolutePresence && bestFocusRatio >= 0.2) {
1922
+ score += 4;
1915
1923
  } else if (bestFocusRatio >= 0.15) {
1916
1924
  score += 2;
1917
1925
  } else {
1918
1926
  score += 1;
1919
1927
  }
1920
- const clusterPenaltyReduced = focusRatio >= 0.7;
1921
- if (topicClusterCount <= 3) {
1928
+ const clusterPenaltyReduced = focusRatio >= 0.7 || hasStrongAbsolutePresence;
1929
+ const scaledLow = Math.max(3, Math.floor(blogPages.length / 10));
1930
+ const scaledMid = Math.max(6, Math.floor(blogPages.length / 5));
1931
+ const scaledHigh = Math.max(10, Math.floor(blogPages.length / 3));
1932
+ if (topicClusterCount <= scaledLow) {
1922
1933
  score += 3;
1923
1934
  findings.push({ severity: "info", detail: `${topicClusterCount} topic cluster(s) - tightly focused content` });
1924
- } else if (topicClusterCount <= 6) {
1935
+ } else if (topicClusterCount <= scaledMid) {
1925
1936
  score += clusterPenaltyReduced ? 2 : 1;
1926
1937
  findings.push({ severity: "info", detail: `${topicClusterCount} topic clusters${clusterPenaltyReduced ? " within a focused niche" : " - moderately focused"}` });
1927
- } else if (topicClusterCount <= 10) {
1938
+ } else if (topicClusterCount <= scaledHigh) {
1928
1939
  score += clusterPenaltyReduced ? 1 : 0;
1929
1940
  if (!clusterPenaltyReduced) {
1930
1941
  findings.push({ severity: "low", detail: `${topicClusterCount} topic clusters - scattered content`, fix: "Reduce the number of distinct topics. Focus blog content on 2-3 core expertise areas." });
@@ -2066,6 +2077,12 @@ function auditSiteFromData(data) {
2066
2077
  checkContentDepth(data, topicCoherence.score)
2067
2078
  ];
2068
2079
  }
2080
+ async function auditSite(targetUrl) {
2081
+ const url = new URL(targetUrl.startsWith("http") ? targetUrl : `https://${targetUrl}`);
2082
+ const domain = url.hostname.replace(/^www\./, "");
2083
+ const data = await prefetchSiteData(domain);
2084
+ return auditSiteFromData(data);
2085
+ }
2069
2086
 
2070
2087
  // src/scoring.ts
2071
2088
  var WEIGHTS = {
@@ -4906,12 +4923,14 @@ export {
4906
4923
  CRITERION_LABELS,
4907
4924
  analyzeAllPages,
4908
4925
  analyzePage,
4926
+ auditSite,
4909
4927
  auditSiteFromData,
4910
4928
  buildDetailedFindings,
4911
4929
  buildLinkGraph,
4912
4930
  buildScorecard,
4913
4931
  calculateOverallScore,
4914
4932
  compare,
4933
+ countRecentSitemapDates,
4915
4934
  crawlFullSite,
4916
4935
  detectParkedDomain,
4917
4936
  extractAllUrlsFromSitemap,