npm - aiseo-audit - Versions diffs - 1.4.2 → 1.4.4 - Mend

aiseo-audit 1.4.2 → 1.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md CHANGED Viewed

@@ -5,7 +5,7 @@
 [![License: MIT](https://img.shields.io/badge/License-MIT-7EB6D7.svg)](https://opensource.org/licenses/MIT)
 [![Node.js](https://img.shields.io/badge/node-%3E%3D20-7EB6D7.svg)](https://nodejs.org)
 [![TypeScript](https://img.shields.io/badge/TypeScript-5.9-7EB6D7?logo=typescript&logoColor=white)](https://www.typescriptlang.org/)
-[![Tests](https://img.shields.io/badge/tests-259%20passed-8FBC8F)](https://github.com/agencyenterprise/aiseo-audit)
+[![Tests](https://img.shields.io/badge/tests-293%20passed-8FBC8F)](https://github.com/agencyenterprise/aiseo-audit)
 [![Coverage](https://img.shields.io/codecov/c/github/agencyenterprise/aiseo-audit?color=8FBC8F&label=coverage)](https://codecov.io/gh/agencyenterprise/aiseo-audit)
 <div align="center">
@@ -15,12 +15,10 @@
 Deterministic CLI that audits web pages for **AI search readiness**. Think Lighthouse, but for how well AI engines can fetch, extract, understand, and cite your content.
-> [!TIP]
-> Run `aiseo-audit https://www.aiseo-audit.com` to see a 100/100 [A Score](https://www.aiseo-audit.com/).
 **AI SEO measures how reusable your content is for generative engines, not traditional search rankings.**
 - [Quick Start](#quick-start)
+- [CI/CD](#cicd)
 - [CLI Options](#cli-options)
 - [Site-Wide Auditing](#site-wide-auditing)
 - [Local Development](#local-development)
@@ -51,6 +49,11 @@ aiseo-audit goes deeper:
 ## Quick Start
+```bash
+# Try it instantly — no install required
+npx aiseo-audit https://yoursite.com
+```
 ```bash
 # As a project dependency
 npm install aiseo-audit
@@ -95,6 +98,25 @@ aiseo-audit https://example.com --user-agent "MyBot/1.0"
 aiseo-audit https://example.com --config aiseo.config.json
 ```
+## CI/CD
+Drop this into any GitHub Actions workflow to gate PRs on AI search readiness:
+```yaml
+# .github/workflows/aiseo-audit.yml
+name: AI SEO Audit
+on:
+  pull_request:
+  push:
+    branches: [main]
+jobs:
+  audit:
+    runs-on: ubuntu-latest
+    steps:
+      - run: npx aiseo-audit https://yoursite.com --fail-under 70
+```
 ## CLI Options
 | Option                 | Description                                                                 | Default                                       |
@@ -155,23 +177,6 @@ aiseo-audit --sitemap https://example.com/projects/sitemap.xml --signals-base ht
 Every report format explicitly shows which URL domain signals were fetched from, so there is no guesswork about where `robots.txt`, `llms.txt`, and `llms-full.txt` were checked.
-## CI/CD
-```yaml
-# .github/workflows/aiseo-audit.yml
-name: AI SEO Audit
-on:
-  pull_request:
-  push:
-    branches: [main]
-jobs:
-  audit:
-    runs-on: ubuntu-latest
-    steps:
-      - run: npx aiseo-audit https://yoursite.com --fail-under 70
-```
 ## User Agent
 By default, all HTTP requests (page fetch, `robots.txt`, `llms.txt`) are sent with the header `User-Agent: AISEOAudit/<version>`. This is intentional. If a site blocks unknown bots, that is a meaningful negative signal for AI search readiness, and the audit should surface it as a failing "Fetch Success" score.

package/dist/cli.js CHANGED Viewed

@@ -28,7 +28,7 @@ var import_zod3 = require("zod");
 // src/modules/analyzer/constants.ts
 var DOMAIN_SIGNAL_TIMEOUT_CAP = 5e3;
-var VERSION = true ? "1.4.2" : "0.0.0";
+var VERSION = true ? "1.4.4" : "0.0.0";
 // src/modules/fetcher/constants.ts
 var MAX_RESPONSE_SIZE = 10 * 1024 * 1024;
@@ -40,6 +40,56 @@ var DEFAULT_HEADERS = {
 };
 // src/utils/http.ts
+var FetchError = class extends Error {
+  code;
+  url;
+  constructor(code, url, message) {
+    super(message);
+    this.name = "FetchError";
+    this.code = code;
+    this.url = url;
+  }
+};
+function classifyFetchError(err, url) {
+  if (err instanceof FetchError) return err;
+  const msg = err instanceof Error ? err.message : String(err);
+  const cause = err instanceof Error && err.cause instanceof Error ? err.cause.message : "";
+  const combined = `${msg} ${cause}`.toLowerCase();
+  if (err instanceof DOMException || err instanceof Error && err.name === "AbortError" || combined.includes("abort")) {
+    return new FetchError(
+      "TIMEOUT",
+      url,
+      `Request timed out. The server at "${new URL(url).hostname}" did not respond in time.`
+    );
+  }
+  if (combined.includes("getaddrinfo") || combined.includes("enotfound")) {
+    const hostname = new URL(url).hostname;
+    return new FetchError(
+      "DNS_FAILURE",
+      url,
+      `DNS lookup failed for "${hostname}". Check that the domain exists and is spelled correctly.`
+    );
+  }
+  if (combined.includes("econnrefused")) {
+    return new FetchError(
+      "CONNECTION_REFUSED",
+      url,
+      `Connection refused by "${new URL(url).hostname}". The server may be down or not accepting connections.`
+    );
+  }
+  if (combined.includes("cert") || combined.includes("ssl") || combined.includes("tls") || combined.includes("unable to verify")) {
+    return new FetchError(
+      "TLS_ERROR",
+      url,
+      `TLS/SSL error connecting to "${new URL(url).hostname}". The site may have an invalid or expired certificate.`
+    );
+  }
+  return new FetchError(
+    "NETWORK_ERROR",
+    url,
+    `Network error fetching "${url}": ${msg}`
+  );
+}
 async function httpGet(options) {
   const controller = new AbortController();
   const timer = setTimeout(() => controller.abort(), options.timeout);
@@ -55,14 +105,18 @@ async function httpGet(options) {
     });
     const contentLength = response.headers.get("content-length");
     if (contentLength && parseInt(contentLength, 10) > MAX_RESPONSE_SIZE) {
-      throw new Error(
-        `Response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`
+      throw new FetchError(
+        "TOO_LARGE",
+        options.url,
+        `Response from "${new URL(options.url).hostname}" exceeds the ${Math.round(MAX_RESPONSE_SIZE / 1024 / 1024)}MB size limit.`
       );
     }
     const data = await response.text();
     if (data.length > MAX_RESPONSE_SIZE) {
-      throw new Error(
-        `Response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`
+      throw new FetchError(
+        "TOO_LARGE",
+        options.url,
+        `Response from "${new URL(options.url).hostname}" exceeds the ${Math.round(MAX_RESPONSE_SIZE / 1024 / 1024)}MB size limit.`
       );
     }
     const headers = {};
@@ -75,6 +129,8 @@ async function httpGet(options) {
       headers,
       finalUrl: response.url
     };
+  } catch (err) {
+    throw classifyFetchError(err, options.url);
   } finally {
     clearTimeout(timer);
   }
@@ -102,6 +158,8 @@ async function httpHead(options) {
       headers,
       finalUrl: response.url
     };
+  } catch (err) {
+    throw classifyFetchError(err, options.url);
   } finally {
     clearTimeout(timer);
   }
@@ -350,23 +408,407 @@ function countSyllables(word) {
 }
 // src/modules/audits/support/nlp.ts
+var STOPWORDS = /* @__PURE__ */ new Set([
+  "a",
+  "an",
+  "the",
+  "and",
+  "or",
+  "but",
+  "in",
+  "on",
+  "at",
+  "to",
+  "for",
+  "of",
+  "with",
+  "by",
+  "from",
+  "as",
+  "is",
+  "was",
+  "are",
+  "were",
+  "been",
+  "be",
+  "have",
+  "has",
+  "had",
+  "do",
+  "does",
+  "did",
+  "will",
+  "would",
+  "could",
+  "should",
+  "may",
+  "might",
+  "shall",
+  "can",
+  "need",
+  "must",
+  "that",
+  "which",
+  "who",
+  "whom",
+  "this",
+  "these",
+  "those",
+  "it",
+  "its",
+  "he",
+  "she",
+  "they",
+  "we",
+  "you",
+  "i",
+  "me",
+  "him",
+  "her",
+  "us",
+  "them",
+  "my",
+  "your",
+  "his",
+  "our",
+  "their",
+  "what",
+  "when",
+  "where",
+  "how",
+  "why",
+  "all",
+  "each",
+  "every",
+  "both",
+  "few",
+  "more",
+  "most",
+  "other",
+  "some",
+  "such",
+  "no",
+  "nor",
+  "not",
+  "only",
+  "own",
+  "same",
+  "so",
+  "than",
+  "too",
+  "very",
+  "just",
+  "about",
+  "above",
+  "after",
+  "again",
+  "also",
+  "any",
+  "because",
+  "before",
+  "between",
+  "during",
+  "here",
+  "if",
+  "into",
+  "like",
+  "new",
+  "now",
+  "over",
+  "then",
+  "there",
+  "through",
+  "under",
+  "up",
+  "out",
+  "off",
+  "down",
+  "much",
+  "well",
+  "back",
+  "even",
+  "still",
+  "also",
+  "get",
+  "got",
+  "one",
+  "two",
+  "make",
+  "many",
+  "say",
+  "said",
+  "see",
+  "go",
+  "come",
+  "take",
+  "know",
+  "think",
+  "good",
+  "great",
+  "first",
+  "last",
+  "long",
+  "way",
+  "find",
+  "use",
+  "used",
+  "using",
+  "while",
+  "being",
+  "made",
+  "however",
+  "since",
+  "per",
+  "via",
+  "based",
+  "within",
+  "without",
+  "across",
+  "along",
+  "around",
+  "among",
+  "until",
+  "another",
+  "www",
+  "http",
+  "https",
+  "com"
+]);
+var ACRONYM_STOPLIST = /* @__PURE__ */ new Set([
+  "I",
+  "A",
+  "OK",
+  "AM",
+  "PM",
+  "US",
+  "UK",
+  "EU",
+  "VS",
+  "EG",
+  "IE",
+  "ET",
+  "AL",
+  "HTML",
+  "CSS",
+  "JS",
+  "TS",
+  "URL",
+  "HTTP",
+  "HTTPS",
+  "API",
+  "SDK",
+  "CLI",
+  "GUI",
+  "PDF",
+  "CSV",
+  "JSON",
+  "XML",
+  "SQL",
+  "RSS",
+  "FTP",
+  "SSH",
+  "SSL",
+  "TLS",
+  "DNS",
+  "TCP",
+  "UDP",
+  "IP",
+  "RAM",
+  "ROM",
+  "CPU",
+  "GPU",
+  "SSD",
+  "HDD",
+  "USB",
+  "HDMI",
+  "FAQ",
+  "DIY",
+  "ASAP",
+  "FYI",
+  "TBD",
+  "TBA",
+  "ETA",
+  "ROI",
+  "KPI",
+  "CEO",
+  "CTO",
+  "CFO",
+  "COO",
+  "CIO",
+  "VP",
+  "SVP",
+  "EVP",
+  "HR",
+  "PR",
+  "QA",
+  "IT",
+  "RD",
+  "RND",
+  "LLC",
+  "INC",
+  "LTD",
+  "CORP",
+  "PLC",
+  "USD",
+  "EUR",
+  "GBP",
+  "JPY",
+  "CAD",
+  "ID",
+  "NO",
+  "RE",
+  "CC",
+  "BCC",
+  "GEO",
+  "SEO",
+  "SEM",
+  "PPC",
+  "CMS",
+  "CRM",
+  "ERP",
+  "SaaS",
+  "AI",
+  "ML",
+  "NLP",
+  "LLM",
+  "GPT",
+  "NER",
+  "TLDR",
+  "AKA",
+  "RSVP",
+  "PS"
+]);
+var ORG_SUFFIXES = /\b(?:Inc|Corp|Corporation|LLC|Ltd|Limited|Co|Company|Group|Foundation|Institute|University|Association|Society|Agency|Authority|Bureau|Commission|Council|Department|Board|Trust|Fund|Partners|Ventures|Labs|Technologies|Solutions|Systems|Services|Consulting|Media|Network|Studios|Entertainment|Healthcare|Pharmaceuticals|Dynamics|Holdings|Capital|Enterprises|International)\b/i;
+var PERSON_HONORIFICS = /\b(?:Mr|Mrs|Ms|Miss|Dr|Prof|Professor|Rev|Reverend|Sen|Senator|Rep|Representative|Gov|Governor|Pres|President|Gen|General|Col|Colonel|Sgt|Sergeant|Cpl|Corporal|Pvt|Private|Adm|Admiral|Capt|Captain|Lt|Lieutenant|Maj|Major|Sir|Dame|Lord|Lady|Hon|Honorable|Judge|Justice|Chancellor|Dean|Provost)\.\s*/;
+function extractAcronymEntities(text) {
+  const matches = text.match(/\b[A-Z]{2,6}\b/g);
+  if (!matches) return [];
+  const seen = /* @__PURE__ */ new Set();
+  const results = [];
+  for (const m of matches) {
+    if (!ACRONYM_STOPLIST.has(m) && !seen.has(m)) {
+      seen.add(m);
+      results.push(m);
+    }
+  }
+  return results;
+}
+function extractTitleCaseEntities(text) {
+  const pattern = /\b([A-Z][a-z]+(?:\s+(?:of|the|and|for|de|van|von|al|el|la|le|del|der|den|das|di|du))?\s+(?:[A-Z][a-z]+)(?:\s+[A-Z][a-z]+){0,3})\b/g;
+  const sentences = text.split(/[.!?]\s+/);
+  const sentenceStarts = /* @__PURE__ */ new Set();
+  for (const s of sentences) {
+    const trimmed = s.trim();
+    const firstWord = trimmed.split(/\s+/)[0];
+    if (firstWord) sentenceStarts.add(firstWord);
+  }
+  const seen = /* @__PURE__ */ new Set();
+  const results = [];
+  let match;
+  while ((match = pattern.exec(text)) !== null) {
+    const entity = match[1];
+    const firstWord = entity.split(/\s+/)[0];
+    if (sentenceStarts.has(firstWord) && !text.includes(`. ${entity}`) && !text.includes(`, ${entity}`)) {
+      const escapedEntity = entity.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+      const appearances = text.match(new RegExp(escapedEntity, "g"));
+      if (!appearances || appearances.length < 2) continue;
+    }
+    if (!seen.has(entity) && entity.split(/\s+/).length >= 2) {
+      seen.add(entity);
+      results.push(entity);
+    }
+  }
+  return results;
+}
+function isOrganizationByPattern(entity) {
+  return ORG_SUFFIXES.test(entity);
+}
+function isPersonByHonorific(text, entity) {
+  const escapedEntity = entity.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+  const pattern = new RegExp(
+    `(?:${PERSON_HONORIFICS.source})\\s*${escapedEntity}`,
+    "i"
+  );
+  return pattern.test(text);
+}
+function extractTopicsByTfIdf(text, limit) {
+  const lower = text.toLowerCase();
+  const words = lower.replace(/[^a-z0-9\s'-]/g, " ").split(/\s+/).filter((w) => w.length > 2 && !STOPWORDS.has(w));
+  if (words.length === 0) return [];
+  const freq = /* @__PURE__ */ new Map();
+  for (const w of words) {
+    freq.set(w, (freq.get(w) || 0) + 1);
+  }
+  for (let i = 0; i < words.length - 1; i++) {
+    const bigram = `${words[i]} ${words[i + 1]}`;
+    freq.set(bigram, (freq.get(bigram) || 0) + 1);
+  }
+  const candidates = [];
+  for (const [term, count] of freq) {
+    if (count >= 2) {
+      const isBigram = term.includes(" ");
+      const score = isBigram ? count * 1.5 : count;
+      candidates.push([term, score]);
+    }
+  }
+  candidates.sort((a, b) => b[1] - a[1]);
+  return candidates.slice(0, limit).map(([term]) => term);
+}
+function smartDedup(entities) {
+  if (entities.length === 0) return [];
+  const sorted = [...entities].sort((a, b) => b.length - a.length);
+  const result = [];
+  const lowerSeen = /* @__PURE__ */ new Set();
+  for (const entity of sorted) {
+    const lower = entity.toLowerCase();
+    if (lowerSeen.has(lower)) continue;
+    let isSubstring = false;
+    for (const accepted of lowerSeen) {
+      if (accepted.includes(lower)) {
+        isSubstring = true;
+        break;
+      }
+    }
+    if (isSubstring) continue;
+    result.push(entity);
+    lowerSeen.add(lower);
+  }
+  return result;
+}
+function mergeEntityLists(compromiseList, supplementalList, limit) {
+  const combined = [...compromiseList, ...supplementalList];
+  return smartDedup(combined).slice(0, limit);
+}
 function extractEntities(text) {
   const doc = (0, import_compromise.default)(text);
-  const people = [...new Set(doc.people().out("array"))].slice(
-    0,
-    10
-  );
-  const organizations = [
+  const compromisePeople = [...new Set(doc.people().out("array"))];
+  const compromiseOrgs = [
     ...new Set(doc.organizations().out("array"))
-  ].slice(0, 10);
-  const places = [...new Set(doc.places().out("array"))].slice(
-    0,
+  ];
+  const compromisePlaces = [...new Set(doc.places().out("array"))];
+  const acronyms = extractAcronymEntities(text);
+  const titleCaseEntities = extractTitleCaseEntities(text);
+  const supplementalPeople = [];
+  const supplementalOrgs = [];
+  const unclassified = [];
+  for (const entity of titleCaseEntities) {
+    if (isPersonByHonorific(text, entity)) {
+      supplementalPeople.push(entity);
+    } else if (isOrganizationByPattern(entity)) {
+      supplementalOrgs.push(entity);
+    } else {
+      unclassified.push(entity);
+    }
+  }
+  unclassified.push(...acronyms);
+  const people = mergeEntityLists(compromisePeople, supplementalPeople, 10);
+  const organizations = mergeEntityLists(
+    compromiseOrgs,
+    [...supplementalOrgs, ...unclassified],
     10
   );
-  const topics = [...new Set(doc.topics().out("array"))].slice(
-    0,
-    15
-  );
+  const places = smartDedup([...new Set(compromisePlaces)]).slice(0, 10);
+  const topics = extractTopicsByTfIdf(text, 15);
   const imperativeVerbCount = doc.verbs().isImperative().length;
   const numberCount = doc.numbers().length;
   return {
@@ -441,11 +883,11 @@ function maxFactors(factors) {
 }
 // src/modules/audits/categories/answerability.ts
-function auditAnswerability(page) {
+function auditAnswerability(page, preExtracted) {
   const text = page.cleanText;
   const $ = page.$;
   const factors = [];
-  const { imperativeVerbCount = 0 } = extractEntities(text);
+  const { imperativeVerbCount = 0 } = preExtracted ?? extractEntities(text);
   const defCount = countPatternMatches(text, DEFINITION_PATTERNS);
   const defScore = thresholdScore(defCount, [
     [6, 10],
@@ -1104,10 +1546,10 @@ function auditContentStructure(page) {
 }
 // src/modules/audits/categories/entity-clarity.ts
-function auditEntityClarity(page) {
+function auditEntityClarity(page, preExtracted) {
   const text = page.cleanText;
   const factors = [];
-  const entities = extractEntities(text);
+  const entities = preExtracted ?? extractEntities(text);
   const totalEntities = entities.people.length + entities.organizations.length + entities.places.length + entities.topics.length;
   const richScore = thresholdScore(totalEntities, [
     [9, 20],
@@ -1136,13 +1578,14 @@ function auditEntityClarity(page) {
     }
   }
   const consistencyRatio = keyWords.length > 0 ? topicOverlap / keyWords.length : 0;
-  const consistencyScore = consistencyRatio >= 0.5 ? 25 : consistencyRatio > 0 ? 15 : 5;
+  const consistencyScore = keyWords.length === 0 ? 0 : consistencyRatio >= 0.5 ? 25 : consistencyRatio > 0 ? 15 : 0;
   factors.push(
     makeFactor(
       "Topic Consistency",
       consistencyScore,
       25,
-      `${topicOverlap}/${keyWords.length} title keywords align with content topics`
+      `${topicOverlap}/${keyWords.length} title keywords align with content topics`,
+      keyWords.length === 0 ? "neutral" : void 0
     )
   );
   const wordCount = countWords(text);
@@ -1171,11 +1614,11 @@ function auditEntityClarity(page) {
 }
 // src/modules/audits/categories/grounding-signals.ts
-function auditGroundingSignals(page) {
+function auditGroundingSignals(page, preExtracted) {
   const $ = page.$;
   const text = page.cleanText;
   const factors = [];
-  const { numberCount = 0 } = extractEntities(text);
+  const { numberCount = 0 } = preExtracted ?? extractEntities(text);
   const externalLinks = page.externalLinks;
   const extScore = thresholdScore(externalLinks.length, [
     [6, 13],
@@ -1345,15 +1788,16 @@ function auditReadabilityForCompression(page) {
 // src/modules/audits/service.ts
 function runAudits(page, fetchResult, domainSignals) {
+  const entities = extractEntities(page.cleanText);
   const extractability = auditContentExtractability(
     page,
     fetchResult,
     domainSignals
   );
   const structure = auditContentStructure(page);
-  const answerability = auditAnswerability(page);
-  const entityClarity = auditEntityClarity(page);
-  const groundingSignals = auditGroundingSignals(page);
+  const answerability = auditAnswerability(page, entities);
+  const entityClarity = auditEntityClarity(page, entities);
+  const groundingSignals = auditGroundingSignals(page, entities);
   const authorityContext = auditAuthorityContext(page);
   const readability = auditReadabilityForCompression(page);
   return {