npm - aeorank - Versions diffs - 3.2.0 → 3.2.1 - Mend

aeorank 3.2.0 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/README.md +14 -10
package/dist/browser.js +90 -64
package/dist/browser.js.map +1 -1
package/dist/{chunk-RYV25AUV.js → chunk-DW7MPQ4X.js} +188 -30
package/dist/chunk-DW7MPQ4X.js.map +1 -0
package/dist/chunk-PYV5JVTC.js +179 -0
package/dist/chunk-PYV5JVTC.js.map +1 -0
package/dist/cli.js +83 -59
package/dist/cli.js.map +1 -1
package/dist/{full-site-crawler-TQ35TB2X.js → full-site-crawler-HAF2X2X3.js} +2 -2
package/dist/{full-site-crawler-OBECS7AT.js → full-site-crawler-W3WSE6WT.js} +18 -30
package/dist/full-site-crawler-W3WSE6WT.js.map +1 -0
package/dist/index.cjs +277 -90
package/dist/index.cjs.map +1 -1
package/dist/index.js +90 -64
package/dist/index.js.map +1 -1
package/package.json +1 -1
package/dist/chunk-RYV25AUV.js.map +0 -1
package/dist/full-site-crawler-OBECS7AT.js.map +0 -1
/package/dist/{full-site-crawler-TQ35TB2X.js.map → full-site-crawler-HAF2X2X3.js.map} +0 -0

package/README.md CHANGED Viewed

@@ -96,8 +96,8 @@ AEORank evaluates 40 criteria that determine how AI engines (ChatGPT, Claude, Pe
 | Visible Date Signal | 1.5% | Visible publication dates with `<time>` elements |
 | Extraction Friction | 2% | Sentence length, voice-friendly leads, jargon density |
 | Image Context for AI | 0.5% | Figure/figcaption, descriptive alt text, contextual placement |
-| Schema Coverage & Depth | 0% | Schema markup on inner pages, not just homepage |
-| Speakable Schema | 0% | SpeakableSpecification for voice assistants |
+| Schema Coverage & Depth | 0.5% | Schema markup on inner pages, not just homepage |
+| Speakable Schema | 0.5% | SpeakableSpecification for voice assistants |
 **Pillar 5: AI Discovery (~10%)** - *Whether* AI crawlers can find you:
@@ -110,7 +110,7 @@ AEORank evaluates 40 criteria that determine how AI engines (ChatGPT, Claude, Pe
 | Content Licensing & AI Permissions | 1% | /ai.txt file, license schema for AI usage |
 | Sitemap Completeness | 1% | sitemap.xml with lastmod dates |
 | Canonical URL Strategy | 0.5% | Self-referencing canonical tags |
-| RSS/Atom Feed | 0% | RSS feed linked from homepage |
+| RSS/Atom Feed | 0.5% | RSS feed linked from homepage |
 > **Coherence Gate:** Sites with topic coherence below 6/10 are score-capped regardless of technical perfection. A scattered site with perfect robots.txt, llms.txt, and schema will score lower than a focused site with mediocre technical implementation.
 >
@@ -133,7 +133,7 @@ AEORank evaluates 40 criteria that determine how AI engines (ChatGPT, Claude, Pe
 | 10 | Semantic HTML5 & Accessibility | 2% | Technical Foundation |
 | 11 | Content Freshness Signals | 4% | Trust & Authority |
 | 12 | Sitemap Completeness | 1% | AI Discovery |
-| 13 | RSS/Atom Feed | 0% | AI Discovery |
+| 13 | RSS/Atom Feed | 0.5% | AI Discovery |
 | 14 | Table & List Extractability | 3% | Content Structure |
 | 15 | Definition Patterns | 1.5% | Content Structure |
 | 16 | Direct Answer Paragraphs | 5% | Content Structure |
@@ -142,8 +142,8 @@ AEORank evaluates 40 criteria that determine how AI engines (ChatGPT, Claude, Pe
 | 19 | Fact & Data Density | 6% | Answer Readiness |
 | 20 | Canonical URL Strategy | 0.5% | AI Discovery |
 | 21 | Content Publishing Velocity | 2% | AI Discovery |
-| 22 | Schema Coverage & Depth | 0% | Technical Foundation |
-| 23 | Speakable Schema | 0% | Technical Foundation |
+| 22 | Schema Coverage & Depth | 0.5% | Technical Foundation |
+| 23 | Speakable Schema | 0.5% | Technical Foundation |
 | 24 | Query-Answer Alignment | 4% | Content Structure |
 | 25 | Content Cannibalization | 2% | AI Discovery |
 | 26 | Visible Date Signal | 1.5% | Technical Foundation |
@@ -586,6 +586,14 @@ console.log(result.comparison.tied);              // Criteria with equal scores
 ## Changelog
+### v3.2.1 - Security & Release Hardening
+DNS-aware fetch guards now block hostnames that resolve to private or reserved IP ranges, including sitemap sub-fetch paths and headless rendering requests. The GitHub Action is now deterministic: it runs the bundled CLI from the tagged release instead of installing `aeorank@latest` at runtime, and CI/release workflows now use SHA-pinned actions plus `npm ci`.
+### v3.2.0 - Helpful Content Criteria
+Added 4 new criteria: Helpful Purpose Alignment, First-Hand Experience Signals, Creator Transparency, and Methodology Transparency. The model now scores 40 total criteria and 25 page-level criteria while explicitly avoiding any "AI-written" detector.
 ### v3.1.1 - Duplicate Detection False-Positive Fix
 Duplicate-content detection now ignores short metadata rows like `Deadline:` and `Decision timeline:` so structured guides do not get penalized for repeated timeline labels. Shared duplicate-matching logic is now used by both page scoring and site-wide crawling.
@@ -594,10 +602,6 @@ Duplicate-content detection now ignores short metadata rows like `Deadline:` and
 2 new criteria (#35-#36): Duplicate Content Blocks (intra-page, 5%) and Cross-Page Duplicate Content (3%). Detects identical text blocks within pages and copy-pasted paragraphs across pages using shingle-based Jaccard similarity. Boilerplate filtering excludes CTAs, signups, and template content from false positives. Duplication gate caps per-page scores when severe duplication is found. CLI now shows duplicate section names inline per page.
-### v3.2.0 - Helpful Content Criteria
-Added 4 new criteria: Helpful Purpose Alignment, First-Hand Experience Signals, Creator Transparency, and Methodology Transparency. The model now scores 40 total criteria and 25 page-level criteria while explicitly avoiding any "AI-written" detector.
 ### v3.0.0 - 5-Pillar Framework & 6 New Criteria
 Scoring Engine v2: 28 → 34 criteria with 5-pillar framework (Answer Readiness, Content Structure, Trust & Authority, Technical Foundation, AI Discovery). 6 new criteria targeting citation quality, evidence packaging, and extraction friction. Per-pillar sub-scores, top-3 fixes, client-friendly names. Single-page score cap at 75.

package/dist/browser.js CHANGED Viewed

@@ -2,8 +2,12 @@ import {
   crawlFullSite,
   extractAllUrlsFromSitemap,
   extractInternalLinks,
-  inferCategory
-} from "./chunk-RYV25AUV.js";
+  inferCategory,
+  isSafeFetchTarget,
+  isSafePublicUrl,
+  normalizeHostname,
+  safeFetch
+} from "./chunk-DW7MPQ4X.js";
 // src/parked-domain.ts
 var PARKING_PATHS = ["/lander", "/parking", "/park", "/sedoparking"];
@@ -317,18 +321,11 @@ function scoreMethodologyTransparency(html, url) {
 }
 // src/site-crawler.ts
-async function fetchText(url) {
-  try {
-    const res = await fetch(url, {
-      signal: AbortSignal.timeout(15e3),
-      headers: { "User-Agent": "AEO-Visibility-Bot/1.0" },
-      redirect: "follow"
-    });
-    const text = await res.text();
-    return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
-  } catch {
-    return null;
-  }
+async function fetchText(url, expectedDomain) {
+  const res = await safeFetch(url, { timeoutMs: 15e3, expectedDomain });
+  if (!res) return null;
+  const text = await res.text();
+  return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
 }
 function extractDomain(url) {
   return url.replace(/^https?:\/\//, "").replace(/\/.*/, "").replace(/:[0-9]+$/, "").replace(/^www\./, "").toLowerCase();
@@ -369,13 +366,16 @@ function isHtmlResponse(result) {
   return trimmed.startsWith("<!doctype html") || trimmed.startsWith("<html") || /<head[\s>]/i.test(trimmed);
 }
 async function prefetchSiteData(domain) {
+  if (!await isSafeFetchTarget(`https://${domain}`)) {
+    return { domain, protocol: null, homepage: null, llmsTxt: null, robotsTxt: null, faqPage: null, sitemapXml: null, rssFeed: null, aiTxt: null, redirectedTo: null, parkedReason: null, blogSample: [] };
+  }
   let protocol = null;
   let homepage = null;
-  homepage = await fetchText(`https://${domain}`);
+  homepage = await fetchText(`https://${domain}`, domain);
   if (homepage && homepage.status >= 200 && homepage.status < 400) {
     protocol = "https";
   } else {
-    homepage = await fetchText(`http://${domain}`);
+    homepage = await fetchText(`http://${domain}`, domain);
     if (homepage && homepage.status >= 200 && homepage.status < 400) {
       protocol = "http";
     }
@@ -395,38 +395,38 @@ async function prefetchSiteData(domain) {
   }
   const baseUrl = `${protocol}://${domain}`;
   const [llmsTxt, robotsTxt, faqPage, sitemapXml, aiTxt] = await Promise.all([
-    fetchText(`${baseUrl}/llms.txt`),
-    fetchText(`${baseUrl}/robots.txt`),
-    fetchText(`${baseUrl}/faq`).then(async (result) => {
+    fetchText(`${baseUrl}/llms.txt`, domain),
+    fetchText(`${baseUrl}/robots.txt`, domain),
+    fetchText(`${baseUrl}/faq`, domain).then(async (result) => {
       if (result && result.status === 200) return result;
       for (const path of ["/frequently-asked-questions", "/help", "/support", "/help-center"]) {
-        const fallback = await fetchText(`${baseUrl}${path}`);
+        const fallback = await fetchText(`${baseUrl}${path}`, domain);
         if (fallback && fallback.status === 200) return fallback;
       }
       return result;
     }),
-    fetchText(`${baseUrl}/sitemap.xml`),
-    fetchText(`${baseUrl}/ai.txt`)
+    fetchText(`${baseUrl}/sitemap.xml`, domain),
+    fetchText(`${baseUrl}/ai.txt`, domain)
   ]);
   let rssFeed = null;
   if (homepage) {
     const rssLinkMatch = homepage.text.match(/<link[^>]*type="application\/(?:rss|atom)\+xml"[^>]*href="([^"]*)"[^>]*>/i);
     if (rssLinkMatch) {
       const rssUrl = rssLinkMatch[1].startsWith("http") ? rssLinkMatch[1] : `${baseUrl}${rssLinkMatch[1]}`;
-      rssFeed = await fetchText(rssUrl);
+      rssFeed = await fetchText(rssUrl, domain);
     }
     if (!rssFeed || rssFeed.status !== 200) {
       for (const path of ["/feed", "/rss.xml", "/feed.xml"]) {
-        rssFeed = await fetchText(`${baseUrl}${path}`);
+        rssFeed = await fetchText(`${baseUrl}${path}`, domain);
         if (rssFeed && rssFeed.status === 200 && (rssFeed.text.includes("<rss") || rssFeed.text.includes("<feed") || rssFeed.text.includes("<channel"))) break;
         rssFeed = null;
       }
     }
   }
   if (sitemapXml && sitemapXml.status === 200 && sitemapXml.text.includes("<sitemapindex")) {
-    const subUrls = extractAllSubSitemapUrls(sitemapXml.text, 5);
+    const subUrls = extractAllSubSitemapUrls(sitemapXml.text, domain, 5);
     if (subUrls.length > 0) {
-      const subResults = await Promise.all(subUrls.map((u) => fetchText(u)));
+      const subResults = await Promise.all(subUrls.map((u) => fetchText(u, domain)));
       for (const sub of subResults) {
         if (sub && sub.status === 200) {
           sitemapXml.text += "\n" + sub.text;
@@ -439,7 +439,7 @@ async function prefetchSiteData(domain) {
     const sitemapForBlog = sitemapXml.text;
     const blogUrls = extractBlogUrlsFromSitemap(sitemapForBlog, domain, 50);
     if (blogUrls.length > 0) {
-      const fetched = await Promise.all(blogUrls.map((url) => fetchText(url)));
+      const fetched = await Promise.all(blogUrls.map((url) => fetchText(url, domain)));
       blogSample = fetched.filter(
         (r) => r !== null && r.status === 200 && r.text.length > 500
       );
@@ -1490,13 +1490,15 @@ function extractBlogUrlsFromSitemap(sitemapText, domain, limit = 50) {
   });
   return candidates.slice(0, limit).map((c) => c.url);
 }
-function extractAllSubSitemapUrls(sitemapText, limit = 5) {
+function extractAllSubSitemapUrls(sitemapText, domainOrLimit, maybeLimit = 5) {
   if (!sitemapText.includes("<sitemapindex")) return [];
+  const domain = typeof domainOrLimit === "string" ? domainOrLimit : void 0;
+  const limit = typeof domainOrLimit === "number" ? domainOrLimit : maybeLimit;
   const sitemapLocs = sitemapText.match(/<sitemap>[\s\S]*?<loc>([^<]+)<\/loc>[\s\S]*?<\/sitemap>/gi) || [];
   const urls = sitemapLocs.map((block) => {
     const match = block.match(/<loc>([^<]+)<\/loc>/i);
     return match ? match[1].trim() : "";
-  }).filter(Boolean);
+  }).filter((url) => !!url && isSafePublicUrl(url, domain));
   const preferred = urls.filter((u) => /post|blog|article|page/i.test(u));
   const rest = urls.filter((u) => !preferred.includes(u));
   return [...preferred, ...rest].slice(0, limit);
@@ -3143,7 +3145,11 @@ function auditSiteFromData(data) {
   ];
 }
 async function auditSite(targetUrl) {
-  const url = new URL(targetUrl.startsWith("http") ? targetUrl : `https://${targetUrl}`);
+  const normalizedTarget = targetUrl.startsWith("http") ? targetUrl : `https://${targetUrl}`;
+  if (!await isSafeFetchTarget(normalizedTarget)) {
+    throw new Error(`Refusing to audit private or local address: ${targetUrl}`);
+  }
+  const url = new URL(normalizedTarget);
   const domain = url.hostname.replace(/^www\./, "");
   const data = await prefetchSiteData(domain);
   return auditSiteFromData(data);
@@ -3208,9 +3214,9 @@ var WEIGHTS = {
   content_licensing: 0.01,
   sitemap_completeness: 0.01,
   canonical_url: 5e-3,
-  rss_feed: 0,
-  schema_coverage: 0,
-  speakable_schema: 0,
+  rss_feed: 5e-3,
+  schema_coverage: 5e-3,
+  speakable_schema: 5e-3,
   // ─── V2 Criteria (~15%) ───────────────────────────────────────────────────
   // Citation quality, evidence packaging, and extraction friction.
   citation_ready_writing: 0.04,
@@ -3375,8 +3381,8 @@ var PILLAR_WEIGHTS = {
   visible_date_signal: 0.015,
   extraction_friction: 0.02,
   image_context_ai: 5e-3,
-  schema_coverage: 0,
-  speakable_schema: 0,
+  schema_coverage: 5e-3,
+  speakable_schema: 5e-3,
   content_cannibalization: 0.02,
   llms_txt: 0.01,
   robots_txt: 0.01,
@@ -3384,7 +3390,7 @@ var PILLAR_WEIGHTS = {
   content_licensing: 0.01,
   canonical_url: 5e-3,
   sitemap_completeness: 0.01,
-  rss_feed: 0
+  rss_feed: 5e-3
 };
 var CRITERION_EFFORT = {
   topic_coherence: "High",
@@ -3683,9 +3689,9 @@ var CRITERION_WEIGHTS = {
   content_licensing: 0.01,
   sitemap_completeness: 0.01,
   canonical_url: 5e-3,
-  rss_feed: 0,
-  schema_coverage: 0,
-  speakable_schema: 0,
+  rss_feed: 5e-3,
+  schema_coverage: 5e-3,
+  speakable_schema: 5e-3,
   // V2 Criteria (~15%)
   citation_ready_writing: 0.04,
   answer_first_placement: 0.03,
@@ -4067,20 +4073,12 @@ function formatList(items) {
 }
 // src/multi-page-fetcher.ts
-async function fetchPage(url, timeoutMs = 1e4) {
-  try {
-    const res = await fetch(url, {
-      signal: AbortSignal.timeout(timeoutMs),
-      headers: { "User-Agent": "AEO-Visibility-Bot/1.0" },
-      redirect: "follow"
-    });
-    if (res.status !== 200) return null;
-    const text = await res.text();
-    if (text.length < 200) return null;
-    return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
-  } catch {
-    return null;
-  }
+async function fetchPage(url, domain, timeoutMs = 1e4) {
+  const res = await safeFetch(url, { timeoutMs, expectedDomain: domain });
+  if (!res || res.status !== 200) return null;
+  const text = await res.text();
+  if (text.length < 200) return null;
+  return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
 }
 var PAGE_VARIANTS = {
   about: ["/about", "/about-us", "/company", "/who-we-are"],
@@ -4236,7 +4234,7 @@ async function fetchMultiPageData(siteData, options) {
   }
   const entries = Array.from(urlsToFetch.entries());
   if (entries.length === 0) return 0;
-  const results = await Promise.all(entries.map(([url]) => fetchPage(url, timeoutMs)));
+  const results = await Promise.all(entries.map(([url]) => fetchPage(url, siteData.domain, timeoutMs)));
   if (!siteData.blogSample) siteData.blogSample = [];
   let added = 0;
   for (let i = 0; i < results.length; i++) {
@@ -5363,9 +5361,9 @@ var CRITERION_WEIGHTS2 = {
   content_licensing: 0.01,
   sitemap_completeness: 0.01,
   canonical_url: 5e-3,
-  rss_feed: 0,
-  schema_coverage: 0,
-  speakable_schema: 0,
+  rss_feed: 5e-3,
+  schema_coverage: 5e-3,
+  speakable_schema: 5e-3,
   // V2 Criteria (~15%)
   citation_ready_writing: 0.04,
   answer_first_placement: 0.03,
@@ -6823,6 +6821,13 @@ function isSpaShell(html) {
   return SPA_INDICATORS.some((pattern) => pattern.test(html));
 }
 async function fetchWithHeadless(url, options) {
+  let expectedDomain;
+  try {
+    expectedDomain = normalizeHostname(new URL(url).hostname);
+  } catch {
+    return null;
+  }
+  if (!await isSafeFetchTarget(url, expectedDomain)) return null;
   let puppeteer;
   try {
     const mod = "puppeteer";
@@ -6849,12 +6854,28 @@ async function fetchWithHeadless(url, options) {
     const page = await browser.newPage();
     await page.setRequestInterception(true);
     page.on("request", (req) => {
-      const type = req.resourceType();
-      if (["image", "font", "media", "stylesheet"].includes(type)) {
-        req.abort();
-      } else {
-        req.continue();
-      }
+      void (async () => {
+        const alreadyHandled = typeof req.isInterceptResolutionHandled === "function" ? req.isInterceptResolutionHandled() : false;
+        if (alreadyHandled) return;
+        if (!await isSafeFetchTarget(req.url(), expectedDomain)) {
+          try {
+            if (!req.isInterceptResolutionHandled?.()) await req.abort();
+          } catch {
+          }
+          return;
+        }
+        const type = req.resourceType();
+        try {
+          if (!req.isInterceptResolutionHandled?.()) {
+            if (["image", "font", "media", "stylesheet"].includes(type)) {
+              await req.abort();
+            } else {
+              await req.continue();
+            }
+          }
+        } catch {
+        }
+      })();
     });
     await page.setUserAgent("AEO-Visibility-Bot/1.0");
     await page.goto(url, { waitUntil: "networkidle2", timeout });
@@ -6867,6 +6888,7 @@ async function fetchWithHeadless(url, options) {
     }
     const html = await page.content();
     const finalUrl = page.url();
+    if (!await isSafeFetchTarget(finalUrl, expectedDomain)) return null;
     return {
       text: html.slice(0, 5e5),
       status: 200,
@@ -6889,6 +6911,10 @@ function getTextLength(html) {
   return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim().length;
 }
 async function audit(domain, options) {
+  const normalizedTarget = domain.startsWith("http") ? domain : `https://${domain}`;
+  if (!await isSafeFetchTarget(normalizedTarget)) {
+    throw new Error(`Refusing to audit private or local address: ${domain}`);
+  }
   const startTime = Date.now();
   let renderedWithHeadless = false;
   const siteData = await prefetchSiteData(domain);
@@ -6921,7 +6947,7 @@ async function audit(domain, options) {
     }
   }
   if (options?.fullCrawl) {
-    const { crawlFullSite: crawlFullSite2 } = await import("./full-site-crawler-TQ35TB2X.js");
+    const { crawlFullSite: crawlFullSite2 } = await import("./full-site-crawler-HAF2X2X3.js");
     const crawlResult = await crawlFullSite2(siteData, {
       maxPages: options.maxPages ?? 200,
       concurrency: options.concurrency ?? 5