npm - aeorank - Versions diffs - 2.2.0 → 2.3.1 - Mend

aeorank 2.2.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # AEORank
-Score any website for AI engine visibility across 28 criteria. Pure HTTP + regex - zero API keys required.
+Score any website for AI engine visibility across 28 criteria. Pure HTTP + regex - zero API keys, under 10 seconds.
 [![npm version](https://img.shields.io/npm/v/aeorank.svg)](https://www.npmjs.com/package/aeorank)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
@@ -309,9 +309,21 @@ npm install puppeteer
 Use `--no-headless` to skip SPA rendering (faster but may produce lower scores for SPAs).
+## Page Discovery
+AEORank automatically discovers and scores pages beyond just the homepage:
+1. **Sitemap blog sample** - Up to 50 blog/article pages from `sitemap.xml`
+2. **Nav link extraction** - Internal links from `<nav>` elements
+3. **Common page variants** - `/about`, `/pricing`, `/services`, `/contact`, `/team`, `/resources`, `/docs`, `/case-studies`
+4. **Sitemap content pages** - 6 non-blog pages from sitemap (service pages, product pages)
+5. **Homepage link fallback** (v2.2+) - When no sitemap exists (or fewer than 4 blog pages found), extracts up to 30 internal links from the full homepage HTML to build a page list automatically
+This ensures realistic scoring even for sites without a sitemap. Without the fallback, sites with no sitemap were only getting 1-5 pages checked, inflating scores.
 ## Full-Site Crawl
-By default, AEORank audits the homepage plus up to 50 blog pages from the sitemap. For deeper analysis, enable `--full-crawl` to BFS-crawl every discoverable page:
+For even deeper analysis beyond the automatic page discovery, enable `--full-crawl` to BFS-crawl every discoverable page:
 ```bash
 npx aeorank example.com --full-crawl                    # Up to 200 pages
@@ -518,6 +530,34 @@ console.log(result.comparison.siteBAdvantages);   // Criteria where B leads
 console.log(result.comparison.tied);              // Criteria with equal scores
 ```
+## Changelog
+### v2.3.0 - Coherence Scaling & Script Stripping
+- **Topic coherence scales with page count**: Sites with many pages (50+) no longer penalized for having more topic clusters. Cluster thresholds scale proportionally (pages/10, pages/5, pages/3). Absolute term presence (10+ pages) boosts focus score.
+- **Strip inline JavaScript from scoring**: `<script>` and `<style>` tags are now removed before text analysis, preventing WP Rocket and similar deferred-loading scripts from corrupting regex-based scoring.
+- **Regex safety net**: `checkQueryAnswerAlignment` wraps `new RegExp()` in try-catch to handle residual script content gracefully.
+### v2.2.0 - Auto Page Discovery
+Sites without a `sitemap.xml` now get up to 30 pages discovered from homepage links instead of 1-5. Prevents inflated scores from insufficient page coverage.
+### v2.1.0 - Scoring Rebalance with Coherence Gate
+Weight distribution redesigned: Content Substance ~55%, Organization ~30%, Plumbing ~15%. Coherence gate caps scores when topic focus is below 6/10.
+### v2.0.0 - Topic Coherence & Content Depth
+Added 2 new criteria (26 -> 28): Topic Coherence (14%) and Content Depth (7%). Blog sampling for coherence analysis.
+### v1.6.0 - Link Graph & Fix Plan Engine
+Internal linking analysis with orphan/pillar/hub detection, topic clusters. Phased fix plan generation with code examples.
+### v1.5.0 - Per-Page Scoring
+Individual page scores (0-100) against 14 page-level criteria. Top/bottom page rankings.
 ## Benchmark Dataset
 The `data/` directory contains the largest open dataset of AI visibility scores - **13,619 domains** scored across 28 criteria, including **4,328 Y Combinator startups** across 48 batches (W06-W26):

package/dist/browser.d.ts CHANGED Viewed

@@ -262,12 +262,25 @@ interface RawDataSummary {
  * Single entry point for all HTTP requests - no redundant fetches.
  */
 declare function prefetchSiteData(domain: string): Promise<SiteData>;
+interface SitemapDateAnalysis {
+    recentCount: number;
+    isUniform: boolean;
+    uniformDetail?: string;
+    totalWithDates: number;
+    distinctRecentDays: number;
+}
+declare function countRecentSitemapDates(sitemapText: string): SitemapDateAnalysis;
 declare function extractRawDataSummary(data: SiteData): RawDataSummary;
 /**
  * Run all 26 criteria checks using pre-fetched site data.
  * All functions are synchronous (no HTTP calls) - data was already fetched.
  */
 declare function auditSiteFromData(data: SiteData): CriterionResult[];
+/**
+ * Legacy entry point: fetches data and runs all checks.
+ * Used by analyzer.ts for the /api/aeo/analyze endpoint.
+ */
+declare function auditSite(targetUrl: string): Promise<CriterionResult[]>;
 declare function calculateOverallScore(criteria: CriterionResult[]): number;
@@ -521,4 +534,4 @@ interface ComparisonResult {
  */
 declare function compare(domainA: string, domainB: string, options?: AuditOptions): Promise<ComparisonResult>;
-export { type AuditData, type AuditFinding, type AuditStatus, CRITERION_LABELS, type ComparisonResult, type CrawlOptions, type CrawlResult, type CriterionComparison, type CriterionDetail, type CriterionResult, type Deliverable, type DetailedFinding, type FetchResult, type FindingSeverity, type FindingType, type FixAction, type FixPhase, type FixPlan, type FixPlanSummary, type ImpactLevel, type LinkEdge, type LinkGraph, type LinkGraphStats, type PageCategory$1 as PageCategory, type PageCriterionScore$1 as PageCriterionScore, type PageIssue, type PageNode, type PageReview, type PageScoreResult, type ParkedDomainResult, type PitchMetric, type Priority, type RawDataSummary, type ScoreCardItem, type SerializedLinkGraph, type Severity, type SiteData, type Status, type TopicCluster, analyzeAllPages, analyzePage, auditSiteFromData, buildDetailedFindings, buildLinkGraph, buildScorecard, calculateOverallScore, compare, crawlFullSite, detectParkedDomain, extractAllUrlsFromSitemap, extractContentPagesFromSitemap, extractInternalLinks, extractLinksWithAnchors, extractNavLinks, extractRawDataSummary, fetchMultiPageData, generateBottomLine, generateFixPlan, generateOpportunities, generatePitchNumbers, generateVerdict, inferCategory, prefetchSiteData, scoreAllPages, scorePage, scoreToStatus, serializeLinkGraph };
+export { type AuditData, type AuditFinding, type AuditStatus, CRITERION_LABELS, type ComparisonResult, type CrawlOptions, type CrawlResult, type CriterionComparison, type CriterionDetail, type CriterionResult, type Deliverable, type DetailedFinding, type FetchResult, type FindingSeverity, type FindingType, type FixAction, type FixPhase, type FixPlan, type FixPlanSummary, type ImpactLevel, type LinkEdge, type LinkGraph, type LinkGraphStats, type PageCategory$1 as PageCategory, type PageCriterionScore$1 as PageCriterionScore, type PageIssue, type PageNode, type PageReview, type PageScoreResult, type ParkedDomainResult, type PitchMetric, type Priority, type RawDataSummary, type ScoreCardItem, type SerializedLinkGraph, type Severity, type SiteData, type SitemapDateAnalysis, type Status, type TopicCluster, analyzeAllPages, analyzePage, auditSite, auditSiteFromData, buildDetailedFindings, buildLinkGraph, buildScorecard, calculateOverallScore, compare, countRecentSitemapDates, crawlFullSite, detectParkedDomain, extractAllUrlsFromSitemap, extractContentPagesFromSitemap, extractInternalLinks, extractLinksWithAnchors, extractNavLinks, extractRawDataSummary, fetchMultiPageData, generateBottomLine, generateFixPlan, generateOpportunities, generatePitchNumbers, generateVerdict, inferCategory, prefetchSiteData, scoreAllPages, scorePage, scoreToStatus, serializeLinkGraph };

package/dist/browser.js CHANGED Viewed

@@ -209,18 +209,21 @@ async function prefetchSiteData(domain) {
   if (homepage) homepage.category = "homepage";
   return { domain, protocol, homepage, llmsTxt, robotsTxt, faqPage, sitemapXml, rssFeed, aiTxt, redirectedTo: null, parkedReason: null, blogSample };
 }
+function stripScripts(html) {
+  return html.replace(/<script(?![^>]*type\s*=\s*["']application\/ld\+json["'])[^>]*>[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "");
+}
 function getCombinedHtml(data) {
-  const parts = [data.homepage?.text || ""];
+  const parts = [stripScripts(data.homepage?.text || "")];
   if (data.blogSample) {
     for (const page of data.blogSample) {
-      parts.push(page.text);
+      parts.push(stripScripts(page.text));
     }
   }
   return parts.join("\n");
 }
 function getBlogHtml(data) {
   if (!data.blogSample || data.blogSample.length === 0) return "";
-  return data.blogSample.map((p) => p.text).join("\n");
+  return data.blogSample.map((p) => stripScripts(p.text)).join("\n");
 }
 function checkLlmsTxt(data) {
   const findings = [];
@@ -1418,14 +1421,17 @@ function checkQueryAnswerAlignment(data) {
   }
   let answered = 0;
   for (const qHeading of questionHeadings) {
-    const escapedHeading = qHeading.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
-    const pattern = new RegExp(escapedHeading + "[\\s\\S]{0,200}?<\\/h[23]>([\\s\\S]{0,1500}?)(?=<h[1-6]|$)", "i");
-    const match = pattern.exec(combinedHtml);
-    if (match) {
-      const afterContent = match[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
-      if (afterContent.length >= 20) {
-        answered++;
+    try {
+      const escapedHeading = qHeading.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+      const pattern = new RegExp(escapedHeading + "[\\s\\S]{0,200}?<\\/h[23]>([\\s\\S]{0,1500}?)(?=<h[1-6]|$)", "i");
+      const match = pattern.exec(combinedHtml);
+      if (match) {
+        const afterContent = match[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
+        if (afterContent.length >= 20) {
+          answered++;
+        }
       }
+    } catch {
     }
   }
   const rate = Math.round(answered / questionHeadings.length * 100);
@@ -1904,27 +1910,32 @@ function checkTopicCoherence(data) {
   const bigramFocusRatio = blogPages.length > 0 ? dominantBigramCount / blogPages.length : 0;
   let score = 0;
   const bestFocusRatio = Math.max(focusRatio, bigramFocusRatio);
+  const dominantPageCount = Math.max(dominantTermCount, dominantBigramCount);
+  const hasStrongAbsolutePresence = dominantPageCount >= 10;
   if (bestFocusRatio >= 0.8) {
     score += 7;
   } else if (bestFocusRatio >= 0.6) {
     score += 6;
-  } else if (bestFocusRatio >= 0.45) {
+  } else if (bestFocusRatio >= 0.45 || hasStrongAbsolutePresence && bestFocusRatio >= 0.3) {
     score += 5;
-  } else if (bestFocusRatio >= 0.3) {
-    score += 3;
+  } else if (bestFocusRatio >= 0.3 || hasStrongAbsolutePresence && bestFocusRatio >= 0.2) {
+    score += 4;
   } else if (bestFocusRatio >= 0.15) {
     score += 2;
   } else {
     score += 1;
   }
-  const clusterPenaltyReduced = focusRatio >= 0.7;
-  if (topicClusterCount <= 3) {
+  const clusterPenaltyReduced = focusRatio >= 0.7 || hasStrongAbsolutePresence;
+  const scaledLow = Math.max(3, Math.floor(blogPages.length / 10));
+  const scaledMid = Math.max(6, Math.floor(blogPages.length / 5));
+  const scaledHigh = Math.max(10, Math.floor(blogPages.length / 3));
+  if (topicClusterCount <= scaledLow) {
     score += 3;
     findings.push({ severity: "info", detail: `${topicClusterCount} topic cluster(s) - tightly focused content` });
-  } else if (topicClusterCount <= 6) {
+  } else if (topicClusterCount <= scaledMid) {
     score += clusterPenaltyReduced ? 2 : 1;
     findings.push({ severity: "info", detail: `${topicClusterCount} topic clusters${clusterPenaltyReduced ? " within a focused niche" : " - moderately focused"}` });
-  } else if (topicClusterCount <= 10) {
+  } else if (topicClusterCount <= scaledHigh) {
     score += clusterPenaltyReduced ? 1 : 0;
     if (!clusterPenaltyReduced) {
       findings.push({ severity: "low", detail: `${topicClusterCount} topic clusters - scattered content`, fix: "Reduce the number of distinct topics. Focus blog content on 2-3 core expertise areas." });
@@ -2066,6 +2077,12 @@ function auditSiteFromData(data) {
     checkContentDepth(data, topicCoherence.score)
   ];
 }
+async function auditSite(targetUrl) {
+  const url = new URL(targetUrl.startsWith("http") ? targetUrl : `https://${targetUrl}`);
+  const domain = url.hostname.replace(/^www\./, "");
+  const data = await prefetchSiteData(domain);
+  return auditSiteFromData(data);
+}
 // src/scoring.ts
 var WEIGHTS = {
@@ -4906,12 +4923,14 @@ export {
   CRITERION_LABELS,
   analyzeAllPages,
   analyzePage,
+  auditSite,
   auditSiteFromData,
   buildDetailedFindings,
   buildLinkGraph,
   buildScorecard,
   calculateOverallScore,
   compare,
+  countRecentSitemapDates,
   crawlFullSite,
   detectParkedDomain,
   extractAllUrlsFromSitemap,