npm - aiseo-audit - Versions diffs - 1.2.7 → 1.3.0 - Mend

aiseo-audit 1.2.7 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md CHANGED Viewed

@@ -5,7 +5,7 @@
 [![License: MIT](https://img.shields.io/badge/License-MIT-7EB6D7.svg)](https://opensource.org/licenses/MIT)
 [![Node.js](https://img.shields.io/badge/node-%3E%3D20-7EB6D7.svg)](https://nodejs.org)
 [![TypeScript](https://img.shields.io/badge/TypeScript-5.9-7EB6D7?logo=typescript&logoColor=white)](https://www.typescriptlang.org/)
-[![Tests](https://img.shields.io/badge/tests-194%20passed-8FBC8F)](https://github.com/agencyenterprise/aiseo-audit)
+[![Tests](https://img.shields.io/badge/tests-238%20passed-8FBC8F)](https://github.com/agencyenterprise/aiseo-audit)
 [![Coverage](https://img.shields.io/codecov/c/github/agencyenterprise/aiseo-audit?color=8FBC8F&label=coverage)](https://codecov.io/gh/agencyenterprise/aiseo-audit)
 <div align="center">
@@ -15,6 +15,9 @@
 Deterministic CLI that audits web pages for **AI search readiness**. Think Lighthouse, but for how well AI engines can fetch, extract, understand, and cite your content.
+> [!TIP]
+> Run `aiseo-audit https://www.aiseo-audit.com` to see a 100/100 [A+ Score](https://www.aiseo-audit.com/).
 **AI SEO measures how reusable your content is for generative engines, not traditional search rankings.**
 - [Quick Start](#quick-start)

package/dist/cli.js CHANGED Viewed

@@ -27,7 +27,7 @@ var import_commander = require("commander");
 // src/modules/analyzer/constants.ts
 var DOMAIN_SIGNAL_TIMEOUT_CAP = 5e3;
-var VERSION = true ? "1.2.7" : "0.0.0";
+var VERSION = true ? "1.3.0" : "0.0.0";
 // src/modules/fetcher/constants.ts
 var MAX_RESPONSE_SIZE = 10 * 1024 * 1024;
@@ -366,7 +366,16 @@ function extractEntities(text) {
     0,
     15
   );
-  return { people, organizations, places, topics };
+  const imperativeVerbCount = doc.verbs().isImperative().length;
+  const numberCount = doc.numbers().length;
+  return {
+    people,
+    organizations,
+    places,
+    topics,
+    imperativeVerbCount,
+    numberCount
+  };
 }
 function computeFleschReadingEase(text) {
   const words = text.split(/\s+/).filter((w) => w.length > 0);
@@ -435,6 +444,7 @@ function auditAnswerability(page) {
   const text = page.cleanText;
   const $ = page.$;
   const factors = [];
+  const { imperativeVerbCount = 0 } = extractEntities(text);
   const defCount = countPatternMatches(text, DEFINITION_PATTERNS);
   const defScore = thresholdScore(defCount, [
     [6, 10],
@@ -479,7 +489,7 @@ function auditAnswerability(page) {
   );
   const stepCount = countPatternMatches(text, STEP_PATTERNS);
   const hasOl = $("ol").length > 0;
-  const stepTotal = stepCount + (hasOl ? 2 : 0);
+  const stepTotal = stepCount + imperativeVerbCount + (hasOl ? 2 : 0);
   const stepScore = thresholdScore(stepTotal, [
     [5, 10],
     [2, 7],
@@ -491,7 +501,7 @@ function auditAnswerability(page) {
       "Step-by-Step Content",
       stepScore,
       10,
-      `${stepCount} step indicators${hasOl ? ", ordered lists found" : ""}`
+      `${stepCount} step indicators, ${imperativeVerbCount} instruction verbs${hasOl ? ", ordered lists found" : ""}`
     )
   );
   const questionMatches = text.match(/[^.!?]*\?/g) || [];
@@ -790,51 +800,99 @@ function auditAuthorityContext(page) {
 }
 // src/modules/audits/support/robots.ts
+function parseRobotGroups(robotsTxt) {
+  const groups = [];
+  let current = null;
+  for (const raw of robotsTxt.split("\n")) {
+    const line = raw.split("#")[0].trim();
+    if (!line) {
+      current = null;
+      continue;
+    }
+    const colonAt = line.indexOf(":");
+    if (colonAt === -1) continue;
+    const field = line.slice(0, colonAt).trim().toLowerCase();
+    const value = line.slice(colonAt + 1).trim();
+    if (field === "user-agent") {
+      if (!current) {
+        current = { agents: [], rules: [] };
+        groups.push(current);
+      }
+      current.agents.push(value.toLowerCase());
+    } else if (field === "disallow" || field === "allow") {
+      if (current) {
+        current.rules.push({ type: field, path: value });
+      }
+    }
+  }
+  return groups;
+}
+function matchingRulesForCrawler(groups, crawlerLower) {
+  const specific = [];
+  const wildcard = [];
+  for (const group of groups) {
+    if (group.agents.includes(crawlerLower)) specific.push(...group.rules);
+    else if (group.agents.includes("*")) wildcard.push(...group.rules);
+  }
+  return { specific, wildcard };
+}
+function resolvesPathAsBlocked(rules, path) {
+  let bestMatchLength = -1;
+  let bestMatchIsDisallow = false;
+  for (const rule of rules) {
+    const rulePath = rule.path;
+    if (!rulePath || !path.startsWith(rulePath)) continue;
+    if (rulePath.length > bestMatchLength) {
+      bestMatchLength = rulePath.length;
+      bestMatchIsDisallow = rule.type === "disallow";
+    } else if (rulePath.length === bestMatchLength && rule.type === "allow") {
+      bestMatchIsDisallow = false;
+    }
+  }
+  return bestMatchLength >= 0 && bestMatchIsDisallow;
+}
+function findPartialBlocks(rules) {
+  return rules.filter((r) => r.type === "disallow" && r.path && r.path !== "/").map((r) => r.path);
+}
 function checkCrawlerAccess(robotsTxt) {
   if (!robotsTxt)
     return { allowed: [], blocked: [], unknown: [...AI_CRAWLERS] };
-  const lines = robotsTxt.split("\n").map((l) => l.trim());
+  const groups = parseRobotGroups(robotsTxt);
   const allowed = [];
   const blocked = [];
   const unknown = [];
+  const partiallyBlocked = [];
   for (const crawler of AI_CRAWLERS) {
     const crawlerLower = crawler.toLowerCase();
-    let currentAgent = "";
-    let isBlocked = false;
-    let found = false;
-    for (const line of lines) {
-      const lower = line.toLowerCase();
-      if (lower.startsWith("user-agent:")) {
-        currentAgent = lower.split(":")[1]?.trim() || "";
-      } else if (currentAgent === crawlerLower || currentAgent === "*") {
-        if (lower.startsWith("disallow:")) {
-          const path = lower.split(":")[1]?.trim();
-          if (path === "/") {
-            if (currentAgent === crawlerLower) {
-              isBlocked = true;
-              found = true;
-            } else if (currentAgent === "*" && !found) {
-              isBlocked = true;
-            }
-          }
-        } else if (lower.startsWith("allow:")) {
-          if (currentAgent === crawlerLower) {
-            found = true;
-            isBlocked = false;
-          }
-        }
-      }
+    const { specific, wildcard } = matchingRulesForCrawler(
+      groups,
+      crawlerLower
+    );
+    const applicableRules = specific.length > 0 ? specific : wildcard;
+    if (applicableRules.length === 0) {
+      unknown.push(crawler);
+      continue;
     }
-    if (found) {
-      if (isBlocked) blocked.push(crawler);
-      else allowed.push(crawler);
-    } else if (isBlocked) {
+    const isSiteBlocked = resolvesPathAsBlocked(applicableRules, "/");
+    if (isSiteBlocked) {
       blocked.push(crawler);
     } else {
-      unknown.push(crawler);
+      allowed.push(crawler);
+      const pathBlocks = findPartialBlocks(applicableRules);
+      for (const path of pathBlocks) {
+        const entry = `${crawler}: ${path}`;
+        if (!partiallyBlocked.includes(entry)) {
+          partiallyBlocked.push(entry);
+        }
+      }
     }
   }
-  return { allowed, blocked, unknown };
+  return {
+    allowed,
+    blocked,
+    unknown,
+    ...partiallyBlocked.length > 0 && { partiallyBlocked }
+  };
 }
 // src/modules/audits/categories/content-extractability.ts
@@ -851,7 +909,7 @@ function auditContentExtractability(page, fetchResult, domainSignals) {
     )
   );
   const extractRatio = page.stats.rawByteLength > 0 ? page.stats.cleanTextLength / page.stats.rawByteLength : 0;
-  const extractScore = extractRatio >= 0.05 && extractRatio <= 0.15 ? 12 : extractRatio >= 0.01 ? 8 : extractRatio > 0.15 ? 10 : 2;
+  const extractScore = extractRatio >= 0.05 && extractRatio <= 0.15 ? 12 : extractRatio > 0.15 ? 10 : extractRatio >= 0.01 ? 8 : 2;
   factors.push(
     makeFactor(
       "Text Extraction Quality",
@@ -876,7 +934,7 @@ function auditContentExtractability(page, fetchResult, domainSignals) {
     )
   );
   const wc = page.stats.wordCount;
-  const wcScore = wc >= 300 && wc <= 3e3 ? 12 : wc >= 100 ? 8 : wc > 3e3 ? 10 : 2;
+  const wcScore = wc >= 300 && wc <= 3e3 ? 12 : wc > 3e3 ? 10 : wc >= 100 ? 8 : 2;
   factors.push(makeFactor("Word Count Adequacy", wcScore, 12, `${wc} words`));
   if (domainSignals) {
     const access2 = checkCrawlerAccess(domainSignals.robotsTxt);
@@ -891,6 +949,10 @@ function auditContentExtractability(page, fetchResult, domainSignals) {
       )
     );
     rawData.crawlerAccess = access2;
+    rawData.llmsTxt = {
+      llmsTxtExists: domainSignals.llmsTxtExists,
+      llmsFullTxtExists: domainSignals.llmsFullTxtExists
+    };
     const hasLlms = domainSignals.llmsTxtExists;
     const hasLlmsFull = domainSignals.llmsFullTxtExists;
     const llmsScore = hasLlms && hasLlmsFull ? 6 : hasLlms || hasLlmsFull ? 4 : 0;
@@ -1110,6 +1172,7 @@ function auditGroundingSignals(page) {
   const $ = page.$;
   const text = page.cleanText;
   const factors = [];
+  const { numberCount = 0 } = extractEntities(text);
   const externalLinks = page.externalLinks;
   const extScore = thresholdScore(externalLinks.length, [
     [6, 13],
@@ -1143,7 +1206,8 @@ function auditGroundingSignals(page) {
     )
   );
   const numericCount = countPatternMatches(text, NUMERIC_CLAIM_PATTERNS);
-  const numScore = thresholdScore(numericCount, [
+  const totalNumericSignals = numericCount + numberCount;
+  const numScore = thresholdScore(totalNumericSignals, [
     [9, 13],
     [4, 9],
     [1, 5],
@@ -1154,7 +1218,7 @@ function auditGroundingSignals(page) {
       "Numeric Claims",
       numScore,
       13,
-      `${numericCount} statistical references`
+      `${numericCount} statistical references, ${numberCount} numeric values`
     )
   );
   const attrCount = countPatternMatches(text, ATTRIBUTION_PATTERNS);
@@ -1359,7 +1423,11 @@ function removeBoilerplate($) {
 function normalizeWhitespace(text) {
   return text.replace(/\s+/g, " ").trim();
 }
+var BLOCK_ELEMENTS = "p,div,td,th,li,h1,h2,h3,h4,h5,h6,dt,dd,br,blockquote,section,article";
 function extractCleanText($) {
+  $(BLOCK_ELEMENTS).each((_, el) => {
+    $(el).append(" ");
+  });
   return normalizeWhitespace($("body").text());
 }
@@ -1368,7 +1436,9 @@ function extractPage(html, url) {
   const $ = cheerio.load(html);
   const title = $("title").text().trim() || $('meta[property="og:title"]').attr("content")?.trim() || "";
   const metaDescription = $('meta[name="description"]').attr("content")?.trim() || $('meta[property="og:description"]').attr("content")?.trim() || "";
-  const rawText = $("body").text().replace(/\s+/g, " ").trim();
+  const $raw = cheerio.load(html);
+  $raw("script, style, noscript").remove();
+  const rawText = $raw("body").text().replace(/\s+/g, " ").trim();
   const rawByteLength = Buffer.byteLength(html, "utf-8");
   const h1Count = $("h1").length;
   const h2Count = $("h2").length;
@@ -1380,9 +1450,22 @@ function extractPage(html, url) {
   const listItemCount = $("li").length;
   const tableCount = $("table").length;
   const paragraphCount = $("p").length;
+  const GENERIC_ALT_VALUES = /* @__PURE__ */ new Set([
+    "image",
+    "photo",
+    "logo",
+    "icon",
+    "picture",
+    "img",
+    "graphic",
+    "thumbnail"
+  ]);
   let imagesWithAlt = 0;
   $("img").each((_, el) => {
-    if ($(el).attr("alt")) imagesWithAlt++;
+    const alt = $(el).attr("alt")?.trim() ?? "";
+    const words = alt.split(/\s+/).filter((w) => w.length > 0);
+    const isMeaningful = words.length > 1 && alt.length < 200 && !GENERIC_ALT_VALUES.has(alt.toLowerCase());
+    if (isMeaningful) imagesWithAlt++;
   });
   const pageDomain = getDomain(url);
   const externalLinks = [];
@@ -1516,9 +1599,19 @@ var RECOMMENDATION_BUILDERS = {
     }
     return `Your robots.txt is blocking ${blocked}. Blocking these crawlers means your content cannot be discovered or cited by AI engines.`;
   },
-  "LLMs.txt Presence": static_(
-    "Consider adding llms.txt and llms-full.txt files at your domain root. This emerging standard provides AI systems with a structured overview of your site, helping them understand and reference your content more effectively."
-  ),
+  "LLMs.txt Presence": (rawData) => {
+    const llms = rawData.llmsTxt;
+    if (!llms) {
+      return "Consider adding llms.txt and llms-full.txt files at your domain root. This emerging standard provides AI systems with a structured overview of your site.";
+    }
+    if (llms.llmsTxtExists && !llms.llmsFullTxtExists) {
+      return "You have llms.txt but are missing llms-full.txt. Adding llms-full.txt provides AI systems with a comprehensive version of your site documentation for deeper ingestion.";
+    }
+    if (!llms.llmsTxtExists && llms.llmsFullTxtExists) {
+      return "You have llms-full.txt but are missing llms.txt. Adding llms.txt provides AI systems with a concise structured overview of your site's purpose and key pages.";
+    }
+    return "Consider adding llms.txt and llms-full.txt files at your domain root. This emerging standard provides AI systems with a structured overview of your site.";
+  },
   "Image Accessibility": (rawData) => {
     const images = rawData.imageAccessibility;
     if (!images || images.imageCount === 0) {

package/dist/cli.mjs CHANGED Viewed

@@ -3,7 +3,7 @@ import { Command } from "commander";
 // src/modules/analyzer/constants.ts
 var DOMAIN_SIGNAL_TIMEOUT_CAP = 5e3;
-var VERSION = true ? "1.2.7" : "0.0.0";
+var VERSION = true ? "1.3.0" : "0.0.0";
 // src/modules/fetcher/constants.ts
 var MAX_RESPONSE_SIZE = 10 * 1024 * 1024;
@@ -342,7 +342,16 @@ function extractEntities(text) {
     0,
     15
   );
-  return { people, organizations, places, topics };
+  const imperativeVerbCount = doc.verbs().isImperative().length;
+  const numberCount = doc.numbers().length;
+  return {
+    people,
+    organizations,
+    places,
+    topics,
+    imperativeVerbCount,
+    numberCount
+  };
 }
 function computeFleschReadingEase(text) {
   const words = text.split(/\s+/).filter((w) => w.length > 0);
@@ -411,6 +420,7 @@ function auditAnswerability(page) {
   const text = page.cleanText;
   const $ = page.$;
   const factors = [];
+  const { imperativeVerbCount = 0 } = extractEntities(text);
   const defCount = countPatternMatches(text, DEFINITION_PATTERNS);
   const defScore = thresholdScore(defCount, [
     [6, 10],
@@ -455,7 +465,7 @@ function auditAnswerability(page) {
   );
   const stepCount = countPatternMatches(text, STEP_PATTERNS);
   const hasOl = $("ol").length > 0;
-  const stepTotal = stepCount + (hasOl ? 2 : 0);
+  const stepTotal = stepCount + imperativeVerbCount + (hasOl ? 2 : 0);
   const stepScore = thresholdScore(stepTotal, [
     [5, 10],
     [2, 7],
@@ -467,7 +477,7 @@ function auditAnswerability(page) {
       "Step-by-Step Content",
       stepScore,
       10,
-      `${stepCount} step indicators${hasOl ? ", ordered lists found" : ""}`
+      `${stepCount} step indicators, ${imperativeVerbCount} instruction verbs${hasOl ? ", ordered lists found" : ""}`
     )
   );
   const questionMatches = text.match(/[^.!?]*\?/g) || [];
@@ -766,51 +776,99 @@ function auditAuthorityContext(page) {
 }
 // src/modules/audits/support/robots.ts
+function parseRobotGroups(robotsTxt) {
+  const groups = [];
+  let current = null;
+  for (const raw of robotsTxt.split("\n")) {
+    const line = raw.split("#")[0].trim();
+    if (!line) {
+      current = null;
+      continue;
+    }
+    const colonAt = line.indexOf(":");
+    if (colonAt === -1) continue;
+    const field = line.slice(0, colonAt).trim().toLowerCase();
+    const value = line.slice(colonAt + 1).trim();
+    if (field === "user-agent") {
+      if (!current) {
+        current = { agents: [], rules: [] };
+        groups.push(current);
+      }
+      current.agents.push(value.toLowerCase());
+    } else if (field === "disallow" || field === "allow") {
+      if (current) {
+        current.rules.push({ type: field, path: value });
+      }
+    }
+  }
+  return groups;
+}
+function matchingRulesForCrawler(groups, crawlerLower) {
+  const specific = [];
+  const wildcard = [];
+  for (const group of groups) {
+    if (group.agents.includes(crawlerLower)) specific.push(...group.rules);
+    else if (group.agents.includes("*")) wildcard.push(...group.rules);
+  }
+  return { specific, wildcard };
+}
+function resolvesPathAsBlocked(rules, path) {
+  let bestMatchLength = -1;
+  let bestMatchIsDisallow = false;
+  for (const rule of rules) {
+    const rulePath = rule.path;
+    if (!rulePath || !path.startsWith(rulePath)) continue;
+    if (rulePath.length > bestMatchLength) {
+      bestMatchLength = rulePath.length;
+      bestMatchIsDisallow = rule.type === "disallow";
+    } else if (rulePath.length === bestMatchLength && rule.type === "allow") {
+      bestMatchIsDisallow = false;
+    }
+  }
+  return bestMatchLength >= 0 && bestMatchIsDisallow;
+}
+function findPartialBlocks(rules) {
+  return rules.filter((r) => r.type === "disallow" && r.path && r.path !== "/").map((r) => r.path);
+}
 function checkCrawlerAccess(robotsTxt) {
   if (!robotsTxt)
     return { allowed: [], blocked: [], unknown: [...AI_CRAWLERS] };
-  const lines = robotsTxt.split("\n").map((l) => l.trim());
+  const groups = parseRobotGroups(robotsTxt);
   const allowed = [];
   const blocked = [];
   const unknown = [];
+  const partiallyBlocked = [];
   for (const crawler of AI_CRAWLERS) {
     const crawlerLower = crawler.toLowerCase();
-    let currentAgent = "";
-    let isBlocked = false;
-    let found = false;
-    for (const line of lines) {
-      const lower = line.toLowerCase();
-      if (lower.startsWith("user-agent:")) {
-        currentAgent = lower.split(":")[1]?.trim() || "";
-      } else if (currentAgent === crawlerLower || currentAgent === "*") {
-        if (lower.startsWith("disallow:")) {
-          const path = lower.split(":")[1]?.trim();
-          if (path === "/") {
-            if (currentAgent === crawlerLower) {
-              isBlocked = true;
-              found = true;
-            } else if (currentAgent === "*" && !found) {
-              isBlocked = true;
-            }
-          }
-        } else if (lower.startsWith("allow:")) {
-          if (currentAgent === crawlerLower) {
-            found = true;
-            isBlocked = false;
-          }
-        }
-      }
+    const { specific, wildcard } = matchingRulesForCrawler(
+      groups,
+      crawlerLower
+    );
+    const applicableRules = specific.length > 0 ? specific : wildcard;
+    if (applicableRules.length === 0) {
+      unknown.push(crawler);
+      continue;
     }
-    if (found) {
-      if (isBlocked) blocked.push(crawler);
-      else allowed.push(crawler);
-    } else if (isBlocked) {
+    const isSiteBlocked = resolvesPathAsBlocked(applicableRules, "/");
+    if (isSiteBlocked) {
       blocked.push(crawler);
     } else {
-      unknown.push(crawler);
+      allowed.push(crawler);
+      const pathBlocks = findPartialBlocks(applicableRules);
+      for (const path of pathBlocks) {
+        const entry = `${crawler}: ${path}`;
+        if (!partiallyBlocked.includes(entry)) {
+          partiallyBlocked.push(entry);
+        }
+      }
     }
   }
-  return { allowed, blocked, unknown };
+  return {
+    allowed,
+    blocked,
+    unknown,
+    ...partiallyBlocked.length > 0 && { partiallyBlocked }
+  };
 }
 // src/modules/audits/categories/content-extractability.ts
@@ -827,7 +885,7 @@ function auditContentExtractability(page, fetchResult, domainSignals) {
     )
   );
   const extractRatio = page.stats.rawByteLength > 0 ? page.stats.cleanTextLength / page.stats.rawByteLength : 0;
-  const extractScore = extractRatio >= 0.05 && extractRatio <= 0.15 ? 12 : extractRatio >= 0.01 ? 8 : extractRatio > 0.15 ? 10 : 2;
+  const extractScore = extractRatio >= 0.05 && extractRatio <= 0.15 ? 12 : extractRatio > 0.15 ? 10 : extractRatio >= 0.01 ? 8 : 2;
   factors.push(
     makeFactor(
       "Text Extraction Quality",
@@ -852,7 +910,7 @@ function auditContentExtractability(page, fetchResult, domainSignals) {
     )
   );
   const wc = page.stats.wordCount;
-  const wcScore = wc >= 300 && wc <= 3e3 ? 12 : wc >= 100 ? 8 : wc > 3e3 ? 10 : 2;
+  const wcScore = wc >= 300 && wc <= 3e3 ? 12 : wc > 3e3 ? 10 : wc >= 100 ? 8 : 2;
   factors.push(makeFactor("Word Count Adequacy", wcScore, 12, `${wc} words`));
   if (domainSignals) {
     const access2 = checkCrawlerAccess(domainSignals.robotsTxt);
@@ -867,6 +925,10 @@ function auditContentExtractability(page, fetchResult, domainSignals) {
       )
     );
     rawData.crawlerAccess = access2;
+    rawData.llmsTxt = {
+      llmsTxtExists: domainSignals.llmsTxtExists,
+      llmsFullTxtExists: domainSignals.llmsFullTxtExists
+    };
     const hasLlms = domainSignals.llmsTxtExists;
     const hasLlmsFull = domainSignals.llmsFullTxtExists;
     const llmsScore = hasLlms && hasLlmsFull ? 6 : hasLlms || hasLlmsFull ? 4 : 0;
@@ -1086,6 +1148,7 @@ function auditGroundingSignals(page) {
   const $ = page.$;
   const text = page.cleanText;
   const factors = [];
+  const { numberCount = 0 } = extractEntities(text);
   const externalLinks = page.externalLinks;
   const extScore = thresholdScore(externalLinks.length, [
     [6, 13],
@@ -1119,7 +1182,8 @@ function auditGroundingSignals(page) {
     )
   );
   const numericCount = countPatternMatches(text, NUMERIC_CLAIM_PATTERNS);
-  const numScore = thresholdScore(numericCount, [
+  const totalNumericSignals = numericCount + numberCount;
+  const numScore = thresholdScore(totalNumericSignals, [
     [9, 13],
     [4, 9],
     [1, 5],
@@ -1130,7 +1194,7 @@ function auditGroundingSignals(page) {
       "Numeric Claims",
       numScore,
       13,
-      `${numericCount} statistical references`
+      `${numericCount} statistical references, ${numberCount} numeric values`
     )
   );
   const attrCount = countPatternMatches(text, ATTRIBUTION_PATTERNS);
@@ -1335,7 +1399,11 @@ function removeBoilerplate($) {
 function normalizeWhitespace(text) {
   return text.replace(/\s+/g, " ").trim();
 }
+var BLOCK_ELEMENTS = "p,div,td,th,li,h1,h2,h3,h4,h5,h6,dt,dd,br,blockquote,section,article";
 function extractCleanText($) {
+  $(BLOCK_ELEMENTS).each((_, el) => {
+    $(el).append(" ");
+  });
   return normalizeWhitespace($("body").text());
 }
@@ -1344,7 +1412,9 @@ function extractPage(html, url) {
   const $ = cheerio.load(html);
   const title = $("title").text().trim() || $('meta[property="og:title"]').attr("content")?.trim() || "";
   const metaDescription = $('meta[name="description"]').attr("content")?.trim() || $('meta[property="og:description"]').attr("content")?.trim() || "";
-  const rawText = $("body").text().replace(/\s+/g, " ").trim();
+  const $raw = cheerio.load(html);
+  $raw("script, style, noscript").remove();
+  const rawText = $raw("body").text().replace(/\s+/g, " ").trim();
   const rawByteLength = Buffer.byteLength(html, "utf-8");
   const h1Count = $("h1").length;
   const h2Count = $("h2").length;
@@ -1356,9 +1426,22 @@ function extractPage(html, url) {
   const listItemCount = $("li").length;
   const tableCount = $("table").length;
   const paragraphCount = $("p").length;
+  const GENERIC_ALT_VALUES = /* @__PURE__ */ new Set([
+    "image",
+    "photo",
+    "logo",
+    "icon",
+    "picture",
+    "img",
+    "graphic",
+    "thumbnail"
+  ]);
   let imagesWithAlt = 0;
   $("img").each((_, el) => {
-    if ($(el).attr("alt")) imagesWithAlt++;
+    const alt = $(el).attr("alt")?.trim() ?? "";
+    const words = alt.split(/\s+/).filter((w) => w.length > 0);
+    const isMeaningful = words.length > 1 && alt.length < 200 && !GENERIC_ALT_VALUES.has(alt.toLowerCase());
+    if (isMeaningful) imagesWithAlt++;
   });
   const pageDomain = getDomain(url);
   const externalLinks = [];
@@ -1492,9 +1575,19 @@ var RECOMMENDATION_BUILDERS = {
     }
     return `Your robots.txt is blocking ${blocked}. Blocking these crawlers means your content cannot be discovered or cited by AI engines.`;
   },
-  "LLMs.txt Presence": static_(
-    "Consider adding llms.txt and llms-full.txt files at your domain root. This emerging standard provides AI systems with a structured overview of your site, helping them understand and reference your content more effectively."
-  ),
+  "LLMs.txt Presence": (rawData) => {
+    const llms = rawData.llmsTxt;
+    if (!llms) {
+      return "Consider adding llms.txt and llms-full.txt files at your domain root. This emerging standard provides AI systems with a structured overview of your site.";
+    }
+    if (llms.llmsTxtExists && !llms.llmsFullTxtExists) {
+      return "You have llms.txt but are missing llms-full.txt. Adding llms-full.txt provides AI systems with a comprehensive version of your site documentation for deeper ingestion.";
+    }
+    if (!llms.llmsTxtExists && llms.llmsFullTxtExists) {
+      return "You have llms-full.txt but are missing llms.txt. Adding llms.txt provides AI systems with a concise structured overview of your site's purpose and key pages.";
+    }
+    return "Consider adding llms.txt and llms-full.txt files at your domain root. This emerging standard provides AI systems with a structured overview of your site.";
+  },
   "Image Accessibility": (rawData) => {
     const images = rawData.imageAccessibility;
     if (!images || images.imageCount === 0) {

package/dist/index.d.mts CHANGED Viewed

@@ -90,6 +90,11 @@ declare const AnalyzerResultSchema: z.ZodObject<{
             allowed: z.ZodArray<z.ZodString>;
             blocked: z.ZodArray<z.ZodString>;
             unknown: z.ZodArray<z.ZodString>;
+            partiallyBlocked: z.ZodOptional<z.ZodArray<z.ZodString>>;
+        }, z.core.$strip>>;
+        llmsTxt: z.ZodOptional<z.ZodObject<{
+            llmsTxtExists: z.ZodBoolean;
+            llmsFullTxtExists: z.ZodBoolean;
         }, z.core.$strip>>;
         sectionLengths: z.ZodOptional<z.ZodObject<{
             sectionCount: z.ZodNumber;
@@ -105,6 +110,8 @@ declare const AnalyzerResultSchema: z.ZodObject<{
             organizations: z.ZodArray<z.ZodString>;
             places: z.ZodArray<z.ZodString>;
             topics: z.ZodArray<z.ZodString>;
+            imperativeVerbCount: z.ZodOptional<z.ZodNumber>;
+            numberCount: z.ZodOptional<z.ZodNumber>;
         }, z.core.$strip>>;
         externalLinks: z.ZodOptional<z.ZodArray<z.ZodObject<{
             url: z.ZodString;
@@ -266,6 +273,11 @@ declare const AuditResultSchema: z.ZodObject<{
             allowed: z.ZodArray<z.ZodString>;
             blocked: z.ZodArray<z.ZodString>;
             unknown: z.ZodArray<z.ZodString>;
+            partiallyBlocked: z.ZodOptional<z.ZodArray<z.ZodString>>;
+        }, z.core.$strip>>;
+        llmsTxt: z.ZodOptional<z.ZodObject<{
+            llmsTxtExists: z.ZodBoolean;
+            llmsFullTxtExists: z.ZodBoolean;
         }, z.core.$strip>>;
         sectionLengths: z.ZodOptional<z.ZodObject<{
             sectionCount: z.ZodNumber;
@@ -281,6 +293,8 @@ declare const AuditResultSchema: z.ZodObject<{
             organizations: z.ZodArray<z.ZodString>;
             places: z.ZodArray<z.ZodString>;
             topics: z.ZodArray<z.ZodString>;
+            imperativeVerbCount: z.ZodOptional<z.ZodNumber>;
+            numberCount: z.ZodOptional<z.ZodNumber>;
         }, z.core.$strip>>;
         externalLinks: z.ZodOptional<z.ZodArray<z.ZodObject<{
             url: z.ZodString;

package/dist/index.d.ts CHANGED Viewed

@@ -90,6 +90,11 @@ declare const AnalyzerResultSchema: z.ZodObject<{
             allowed: z.ZodArray<z.ZodString>;
             blocked: z.ZodArray<z.ZodString>;
             unknown: z.ZodArray<z.ZodString>;
+            partiallyBlocked: z.ZodOptional<z.ZodArray<z.ZodString>>;
+        }, z.core.$strip>>;
+        llmsTxt: z.ZodOptional<z.ZodObject<{
+            llmsTxtExists: z.ZodBoolean;
+            llmsFullTxtExists: z.ZodBoolean;
         }, z.core.$strip>>;
         sectionLengths: z.ZodOptional<z.ZodObject<{
             sectionCount: z.ZodNumber;
@@ -105,6 +110,8 @@ declare const AnalyzerResultSchema: z.ZodObject<{
             organizations: z.ZodArray<z.ZodString>;
             places: z.ZodArray<z.ZodString>;
             topics: z.ZodArray<z.ZodString>;
+            imperativeVerbCount: z.ZodOptional<z.ZodNumber>;
+            numberCount: z.ZodOptional<z.ZodNumber>;
         }, z.core.$strip>>;
         externalLinks: z.ZodOptional<z.ZodArray<z.ZodObject<{
             url: z.ZodString;
@@ -266,6 +273,11 @@ declare const AuditResultSchema: z.ZodObject<{
             allowed: z.ZodArray<z.ZodString>;
             blocked: z.ZodArray<z.ZodString>;
             unknown: z.ZodArray<z.ZodString>;
+            partiallyBlocked: z.ZodOptional<z.ZodArray<z.ZodString>>;
+        }, z.core.$strip>>;
+        llmsTxt: z.ZodOptional<z.ZodObject<{
+            llmsTxtExists: z.ZodBoolean;
+            llmsFullTxtExists: z.ZodBoolean;
         }, z.core.$strip>>;
         sectionLengths: z.ZodOptional<z.ZodObject<{
             sectionCount: z.ZodNumber;
@@ -281,6 +293,8 @@ declare const AuditResultSchema: z.ZodObject<{
             organizations: z.ZodArray<z.ZodString>;
             places: z.ZodArray<z.ZodString>;
             topics: z.ZodArray<z.ZodString>;
+            imperativeVerbCount: z.ZodOptional<z.ZodNumber>;
+            numberCount: z.ZodOptional<z.ZodNumber>;
         }, z.core.$strip>>;
         externalLinks: z.ZodOptional<z.ZodArray<z.ZodObject<{
             url: z.ZodString;

package/dist/index.js CHANGED Viewed

@@ -364,7 +364,16 @@ function extractEntities(text) {
     0,
     15
   );
-  return { people, organizations, places, topics };
+  const imperativeVerbCount = doc.verbs().isImperative().length;
+  const numberCount = doc.numbers().length;
+  return {
+    people,
+    organizations,
+    places,
+    topics,
+    imperativeVerbCount,
+    numberCount
+  };
 }
 function computeFleschReadingEase(text) {
   const words = text.split(/\s+/).filter((w) => w.length > 0);
@@ -433,6 +442,7 @@ function auditAnswerability(page) {
   const text = page.cleanText;
   const $ = page.$;
   const factors = [];
+  const { imperativeVerbCount = 0 } = extractEntities(text);
   const defCount = countPatternMatches(text, DEFINITION_PATTERNS);
   const defScore = thresholdScore(defCount, [
     [6, 10],
@@ -477,7 +487,7 @@ function auditAnswerability(page) {
   );
   const stepCount = countPatternMatches(text, STEP_PATTERNS);
   const hasOl = $("ol").length > 0;
-  const stepTotal = stepCount + (hasOl ? 2 : 0);
+  const stepTotal = stepCount + imperativeVerbCount + (hasOl ? 2 : 0);
   const stepScore = thresholdScore(stepTotal, [
     [5, 10],
     [2, 7],
@@ -489,7 +499,7 @@ function auditAnswerability(page) {
       "Step-by-Step Content",
       stepScore,
       10,
-      `${stepCount} step indicators${hasOl ? ", ordered lists found" : ""}`
+      `${stepCount} step indicators, ${imperativeVerbCount} instruction verbs${hasOl ? ", ordered lists found" : ""}`
     )
   );
   const questionMatches = text.match(/[^.!?]*\?/g) || [];
@@ -788,51 +798,99 @@ function auditAuthorityContext(page) {
 }
 // src/modules/audits/support/robots.ts
+function parseRobotGroups(robotsTxt) {
+  const groups = [];
+  let current = null;
+  for (const raw of robotsTxt.split("\n")) {
+    const line = raw.split("#")[0].trim();
+    if (!line) {
+      current = null;
+      continue;
+    }
+    const colonAt = line.indexOf(":");
+    if (colonAt === -1) continue;
+    const field = line.slice(0, colonAt).trim().toLowerCase();
+    const value = line.slice(colonAt + 1).trim();
+    if (field === "user-agent") {
+      if (!current) {
+        current = { agents: [], rules: [] };
+        groups.push(current);
+      }
+      current.agents.push(value.toLowerCase());
+    } else if (field === "disallow" || field === "allow") {
+      if (current) {
+        current.rules.push({ type: field, path: value });
+      }
+    }
+  }
+  return groups;
+}
+function matchingRulesForCrawler(groups, crawlerLower) {
+  const specific = [];
+  const wildcard = [];
+  for (const group of groups) {
+    if (group.agents.includes(crawlerLower)) specific.push(...group.rules);
+    else if (group.agents.includes("*")) wildcard.push(...group.rules);
+  }
+  return { specific, wildcard };
+}
+function resolvesPathAsBlocked(rules, path) {
+  let bestMatchLength = -1;
+  let bestMatchIsDisallow = false;
+  for (const rule of rules) {
+    const rulePath = rule.path;
+    if (!rulePath || !path.startsWith(rulePath)) continue;
+    if (rulePath.length > bestMatchLength) {
+      bestMatchLength = rulePath.length;
+      bestMatchIsDisallow = rule.type === "disallow";
+    } else if (rulePath.length === bestMatchLength && rule.type === "allow") {
+      bestMatchIsDisallow = false;
+    }
+  }
+  return bestMatchLength >= 0 && bestMatchIsDisallow;
+}
+function findPartialBlocks(rules) {
+  return rules.filter((r) => r.type === "disallow" && r.path && r.path !== "/").map((r) => r.path);
+}
 function checkCrawlerAccess(robotsTxt) {
   if (!robotsTxt)
     return { allowed: [], blocked: [], unknown: [...AI_CRAWLERS] };
-  const lines = robotsTxt.split("\n").map((l) => l.trim());
+  const groups = parseRobotGroups(robotsTxt);
   const allowed = [];
   const blocked = [];
   const unknown = [];
+  const partiallyBlocked = [];
   for (const crawler of AI_CRAWLERS) {
     const crawlerLower = crawler.toLowerCase();
-    let currentAgent = "";
-    let isBlocked = false;
-    let found = false;
-    for (const line of lines) {
-      const lower = line.toLowerCase();
-      if (lower.startsWith("user-agent:")) {
-        currentAgent = lower.split(":")[1]?.trim() || "";
-      } else if (currentAgent === crawlerLower || currentAgent === "*") {
-        if (lower.startsWith("disallow:")) {
-          const path = lower.split(":")[1]?.trim();
-          if (path === "/") {
-            if (currentAgent === crawlerLower) {
-              isBlocked = true;
-              found = true;
-            } else if (currentAgent === "*" && !found) {
-              isBlocked = true;
-            }
-          }
-        } else if (lower.startsWith("allow:")) {
-          if (currentAgent === crawlerLower) {
-            found = true;
-            isBlocked = false;
-          }
-        }
-      }
+    const { specific, wildcard } = matchingRulesForCrawler(
+      groups,
+      crawlerLower
+    );
+    const applicableRules = specific.length > 0 ? specific : wildcard;
+    if (applicableRules.length === 0) {
+      unknown.push(crawler);
+      continue;
     }
-    if (found) {
-      if (isBlocked) blocked.push(crawler);
-      else allowed.push(crawler);
-    } else if (isBlocked) {
+    const isSiteBlocked = resolvesPathAsBlocked(applicableRules, "/");
+    if (isSiteBlocked) {
       blocked.push(crawler);
     } else {
-      unknown.push(crawler);
+      allowed.push(crawler);
+      const pathBlocks = findPartialBlocks(applicableRules);
+      for (const path of pathBlocks) {
+        const entry = `${crawler}: ${path}`;
+        if (!partiallyBlocked.includes(entry)) {
+          partiallyBlocked.push(entry);
+        }
+      }
     }
   }
-  return { allowed, blocked, unknown };
+  return {
+    allowed,
+    blocked,
+    unknown,
+    ...partiallyBlocked.length > 0 && { partiallyBlocked }
+  };
 }
 // src/modules/audits/categories/content-extractability.ts
@@ -849,7 +907,7 @@ function auditContentExtractability(page, fetchResult, domainSignals) {
     )
   );
   const extractRatio = page.stats.rawByteLength > 0 ? page.stats.cleanTextLength / page.stats.rawByteLength : 0;
-  const extractScore = extractRatio >= 0.05 && extractRatio <= 0.15 ? 12 : extractRatio >= 0.01 ? 8 : extractRatio > 0.15 ? 10 : 2;
+  const extractScore = extractRatio >= 0.05 && extractRatio <= 0.15 ? 12 : extractRatio > 0.15 ? 10 : extractRatio >= 0.01 ? 8 : 2;
   factors.push(
     makeFactor(
       "Text Extraction Quality",
@@ -874,7 +932,7 @@ function auditContentExtractability(page, fetchResult, domainSignals) {
     )
   );
   const wc = page.stats.wordCount;
-  const wcScore = wc >= 300 && wc <= 3e3 ? 12 : wc >= 100 ? 8 : wc > 3e3 ? 10 : 2;
+  const wcScore = wc >= 300 && wc <= 3e3 ? 12 : wc > 3e3 ? 10 : wc >= 100 ? 8 : 2;
   factors.push(makeFactor("Word Count Adequacy", wcScore, 12, `${wc} words`));
   if (domainSignals) {
     const access2 = checkCrawlerAccess(domainSignals.robotsTxt);
@@ -889,6 +947,10 @@ function auditContentExtractability(page, fetchResult, domainSignals) {
       )
     );
     rawData.crawlerAccess = access2;
+    rawData.llmsTxt = {
+      llmsTxtExists: domainSignals.llmsTxtExists,
+      llmsFullTxtExists: domainSignals.llmsFullTxtExists
+    };
     const hasLlms = domainSignals.llmsTxtExists;
     const hasLlmsFull = domainSignals.llmsFullTxtExists;
     const llmsScore = hasLlms && hasLlmsFull ? 6 : hasLlms || hasLlmsFull ? 4 : 0;
@@ -1108,6 +1170,7 @@ function auditGroundingSignals(page) {
   const $ = page.$;
   const text = page.cleanText;
   const factors = [];
+  const { numberCount = 0 } = extractEntities(text);
   const externalLinks = page.externalLinks;
   const extScore = thresholdScore(externalLinks.length, [
     [6, 13],
@@ -1141,7 +1204,8 @@ function auditGroundingSignals(page) {
     )
   );
   const numericCount = countPatternMatches(text, NUMERIC_CLAIM_PATTERNS);
-  const numScore = thresholdScore(numericCount, [
+  const totalNumericSignals = numericCount + numberCount;
+  const numScore = thresholdScore(totalNumericSignals, [
     [9, 13],
     [4, 9],
     [1, 5],
@@ -1152,7 +1216,7 @@ function auditGroundingSignals(page) {
       "Numeric Claims",
       numScore,
       13,
-      `${numericCount} statistical references`
+      `${numericCount} statistical references, ${numberCount} numeric values`
     )
   );
   const attrCount = countPatternMatches(text, ATTRIBUTION_PATTERNS);
@@ -1357,7 +1421,11 @@ function removeBoilerplate($) {
 function normalizeWhitespace(text) {
   return text.replace(/\s+/g, " ").trim();
 }
+var BLOCK_ELEMENTS = "p,div,td,th,li,h1,h2,h3,h4,h5,h6,dt,dd,br,blockquote,section,article";
 function extractCleanText($) {
+  $(BLOCK_ELEMENTS).each((_, el) => {
+    $(el).append(" ");
+  });
   return normalizeWhitespace($("body").text());
 }
@@ -1366,7 +1434,9 @@ function extractPage(html, url) {
   const $ = cheerio.load(html);
   const title = $("title").text().trim() || $('meta[property="og:title"]').attr("content")?.trim() || "";
   const metaDescription = $('meta[name="description"]').attr("content")?.trim() || $('meta[property="og:description"]').attr("content")?.trim() || "";
-  const rawText = $("body").text().replace(/\s+/g, " ").trim();
+  const $raw = cheerio.load(html);
+  $raw("script, style, noscript").remove();
+  const rawText = $raw("body").text().replace(/\s+/g, " ").trim();
   const rawByteLength = Buffer.byteLength(html, "utf-8");
   const h1Count = $("h1").length;
   const h2Count = $("h2").length;
@@ -1378,9 +1448,22 @@ function extractPage(html, url) {
   const listItemCount = $("li").length;
   const tableCount = $("table").length;
   const paragraphCount = $("p").length;
+  const GENERIC_ALT_VALUES = /* @__PURE__ */ new Set([
+    "image",
+    "photo",
+    "logo",
+    "icon",
+    "picture",
+    "img",
+    "graphic",
+    "thumbnail"
+  ]);
   let imagesWithAlt = 0;
   $("img").each((_, el) => {
-    if ($(el).attr("alt")) imagesWithAlt++;
+    const alt = $(el).attr("alt")?.trim() ?? "";
+    const words = alt.split(/\s+/).filter((w) => w.length > 0);
+    const isMeaningful = words.length > 1 && alt.length < 200 && !GENERIC_ALT_VALUES.has(alt.toLowerCase());
+    if (isMeaningful) imagesWithAlt++;
   });
   const pageDomain = getDomain(url);
   const externalLinks = [];
@@ -1440,7 +1523,7 @@ var import_zod = require("zod");
 // src/modules/analyzer/constants.ts
 var DOMAIN_SIGNAL_TIMEOUT_CAP = 5e3;
-var VERSION = true ? "1.2.7" : "0.0.0";
+var VERSION = true ? "1.3.0" : "0.0.0";
 // src/modules/fetcher/schema.ts
 var FetchOptionsSchema = import_zod.z.object({
@@ -1520,9 +1603,19 @@ var RECOMMENDATION_BUILDERS = {
     }
     return `Your robots.txt is blocking ${blocked}. Blocking these crawlers means your content cannot be discovered or cited by AI engines.`;
   },
-  "LLMs.txt Presence": static_(
-    "Consider adding llms.txt and llms-full.txt files at your domain root. This emerging standard provides AI systems with a structured overview of your site, helping them understand and reference your content more effectively."
-  ),
+  "LLMs.txt Presence": (rawData) => {
+    const llms = rawData.llmsTxt;
+    if (!llms) {
+      return "Consider adding llms.txt and llms-full.txt files at your domain root. This emerging standard provides AI systems with a structured overview of your site.";
+    }
+    if (llms.llmsTxtExists && !llms.llmsFullTxtExists) {
+      return "You have llms.txt but are missing llms-full.txt. Adding llms-full.txt provides AI systems with a comprehensive version of your site documentation for deeper ingestion.";
+    }
+    if (!llms.llmsTxtExists && llms.llmsFullTxtExists) {
+      return "You have llms-full.txt but are missing llms.txt. Adding llms.txt provides AI systems with a concise structured overview of your site's purpose and key pages.";
+    }
+    return "Consider adding llms.txt and llms-full.txt files at your domain root. This emerging standard provides AI systems with a structured overview of your site.";
+  },
   "Image Accessibility": (rawData) => {
     const images = rawData.imageAccessibility;
     if (!images || images.imageCount === 0) {

package/dist/index.mjs CHANGED Viewed

@@ -326,7 +326,16 @@ function extractEntities(text) {
     0,
     15
   );
-  return { people, organizations, places, topics };
+  const imperativeVerbCount = doc.verbs().isImperative().length;
+  const numberCount = doc.numbers().length;
+  return {
+    people,
+    organizations,
+    places,
+    topics,
+    imperativeVerbCount,
+    numberCount
+  };
 }
 function computeFleschReadingEase(text) {
   const words = text.split(/\s+/).filter((w) => w.length > 0);
@@ -395,6 +404,7 @@ function auditAnswerability(page) {
   const text = page.cleanText;
   const $ = page.$;
   const factors = [];
+  const { imperativeVerbCount = 0 } = extractEntities(text);
   const defCount = countPatternMatches(text, DEFINITION_PATTERNS);
   const defScore = thresholdScore(defCount, [
     [6, 10],
@@ -439,7 +449,7 @@ function auditAnswerability(page) {
   );
   const stepCount = countPatternMatches(text, STEP_PATTERNS);
   const hasOl = $("ol").length > 0;
-  const stepTotal = stepCount + (hasOl ? 2 : 0);
+  const stepTotal = stepCount + imperativeVerbCount + (hasOl ? 2 : 0);
   const stepScore = thresholdScore(stepTotal, [
     [5, 10],
     [2, 7],
@@ -451,7 +461,7 @@ function auditAnswerability(page) {
       "Step-by-Step Content",
       stepScore,
       10,
-      `${stepCount} step indicators${hasOl ? ", ordered lists found" : ""}`
+      `${stepCount} step indicators, ${imperativeVerbCount} instruction verbs${hasOl ? ", ordered lists found" : ""}`
     )
   );
   const questionMatches = text.match(/[^.!?]*\?/g) || [];
@@ -750,51 +760,99 @@ function auditAuthorityContext(page) {
 }
 // src/modules/audits/support/robots.ts
+function parseRobotGroups(robotsTxt) {
+  const groups = [];
+  let current = null;
+  for (const raw of robotsTxt.split("\n")) {
+    const line = raw.split("#")[0].trim();
+    if (!line) {
+      current = null;
+      continue;
+    }
+    const colonAt = line.indexOf(":");
+    if (colonAt === -1) continue;
+    const field = line.slice(0, colonAt).trim().toLowerCase();
+    const value = line.slice(colonAt + 1).trim();
+    if (field === "user-agent") {
+      if (!current) {
+        current = { agents: [], rules: [] };
+        groups.push(current);
+      }
+      current.agents.push(value.toLowerCase());
+    } else if (field === "disallow" || field === "allow") {
+      if (current) {
+        current.rules.push({ type: field, path: value });
+      }
+    }
+  }
+  return groups;
+}
+function matchingRulesForCrawler(groups, crawlerLower) {
+  const specific = [];
+  const wildcard = [];
+  for (const group of groups) {
+    if (group.agents.includes(crawlerLower)) specific.push(...group.rules);
+    else if (group.agents.includes("*")) wildcard.push(...group.rules);
+  }
+  return { specific, wildcard };
+}
+function resolvesPathAsBlocked(rules, path) {
+  let bestMatchLength = -1;
+  let bestMatchIsDisallow = false;
+  for (const rule of rules) {
+    const rulePath = rule.path;
+    if (!rulePath || !path.startsWith(rulePath)) continue;
+    if (rulePath.length > bestMatchLength) {
+      bestMatchLength = rulePath.length;
+      bestMatchIsDisallow = rule.type === "disallow";
+    } else if (rulePath.length === bestMatchLength && rule.type === "allow") {
+      bestMatchIsDisallow = false;
+    }
+  }
+  return bestMatchLength >= 0 && bestMatchIsDisallow;
+}
+function findPartialBlocks(rules) {
+  return rules.filter((r) => r.type === "disallow" && r.path && r.path !== "/").map((r) => r.path);
+}
 function checkCrawlerAccess(robotsTxt) {
   if (!robotsTxt)
     return { allowed: [], blocked: [], unknown: [...AI_CRAWLERS] };
-  const lines = robotsTxt.split("\n").map((l) => l.trim());
+  const groups = parseRobotGroups(robotsTxt);
   const allowed = [];
   const blocked = [];
   const unknown = [];
+  const partiallyBlocked = [];
   for (const crawler of AI_CRAWLERS) {
     const crawlerLower = crawler.toLowerCase();
-    let currentAgent = "";
-    let isBlocked = false;
-    let found = false;
-    for (const line of lines) {
-      const lower = line.toLowerCase();
-      if (lower.startsWith("user-agent:")) {
-        currentAgent = lower.split(":")[1]?.trim() || "";
-      } else if (currentAgent === crawlerLower || currentAgent === "*") {
-        if (lower.startsWith("disallow:")) {
-          const path = lower.split(":")[1]?.trim();
-          if (path === "/") {
-            if (currentAgent === crawlerLower) {
-              isBlocked = true;
-              found = true;
-            } else if (currentAgent === "*" && !found) {
-              isBlocked = true;
-            }
-          }
-        } else if (lower.startsWith("allow:")) {
-          if (currentAgent === crawlerLower) {
-            found = true;
-            isBlocked = false;
-          }
-        }
-      }
+    const { specific, wildcard } = matchingRulesForCrawler(
+      groups,
+      crawlerLower
+    );
+    const applicableRules = specific.length > 0 ? specific : wildcard;
+    if (applicableRules.length === 0) {
+      unknown.push(crawler);
+      continue;
     }
-    if (found) {
-      if (isBlocked) blocked.push(crawler);
-      else allowed.push(crawler);
-    } else if (isBlocked) {
+    const isSiteBlocked = resolvesPathAsBlocked(applicableRules, "/");
+    if (isSiteBlocked) {
       blocked.push(crawler);
     } else {
-      unknown.push(crawler);
+      allowed.push(crawler);
+      const pathBlocks = findPartialBlocks(applicableRules);
+      for (const path of pathBlocks) {
+        const entry = `${crawler}: ${path}`;
+        if (!partiallyBlocked.includes(entry)) {
+          partiallyBlocked.push(entry);
+        }
+      }
     }
   }
-  return { allowed, blocked, unknown };
+  return {
+    allowed,
+    blocked,
+    unknown,
+    ...partiallyBlocked.length > 0 && { partiallyBlocked }
+  };
 }
 // src/modules/audits/categories/content-extractability.ts
@@ -811,7 +869,7 @@ function auditContentExtractability(page, fetchResult, domainSignals) {
     )
   );
   const extractRatio = page.stats.rawByteLength > 0 ? page.stats.cleanTextLength / page.stats.rawByteLength : 0;
-  const extractScore = extractRatio >= 0.05 && extractRatio <= 0.15 ? 12 : extractRatio >= 0.01 ? 8 : extractRatio > 0.15 ? 10 : 2;
+  const extractScore = extractRatio >= 0.05 && extractRatio <= 0.15 ? 12 : extractRatio > 0.15 ? 10 : extractRatio >= 0.01 ? 8 : 2;
   factors.push(
     makeFactor(
       "Text Extraction Quality",
@@ -836,7 +894,7 @@ function auditContentExtractability(page, fetchResult, domainSignals) {
     )
   );
   const wc = page.stats.wordCount;
-  const wcScore = wc >= 300 && wc <= 3e3 ? 12 : wc >= 100 ? 8 : wc > 3e3 ? 10 : 2;
+  const wcScore = wc >= 300 && wc <= 3e3 ? 12 : wc > 3e3 ? 10 : wc >= 100 ? 8 : 2;
   factors.push(makeFactor("Word Count Adequacy", wcScore, 12, `${wc} words`));
   if (domainSignals) {
     const access2 = checkCrawlerAccess(domainSignals.robotsTxt);
@@ -851,6 +909,10 @@ function auditContentExtractability(page, fetchResult, domainSignals) {
       )
     );
     rawData.crawlerAccess = access2;
+    rawData.llmsTxt = {
+      llmsTxtExists: domainSignals.llmsTxtExists,
+      llmsFullTxtExists: domainSignals.llmsFullTxtExists
+    };
     const hasLlms = domainSignals.llmsTxtExists;
     const hasLlmsFull = domainSignals.llmsFullTxtExists;
     const llmsScore = hasLlms && hasLlmsFull ? 6 : hasLlms || hasLlmsFull ? 4 : 0;
@@ -1070,6 +1132,7 @@ function auditGroundingSignals(page) {
   const $ = page.$;
   const text = page.cleanText;
   const factors = [];
+  const { numberCount = 0 } = extractEntities(text);
   const externalLinks = page.externalLinks;
   const extScore = thresholdScore(externalLinks.length, [
     [6, 13],
@@ -1103,7 +1166,8 @@ function auditGroundingSignals(page) {
     )
   );
   const numericCount = countPatternMatches(text, NUMERIC_CLAIM_PATTERNS);
-  const numScore = thresholdScore(numericCount, [
+  const totalNumericSignals = numericCount + numberCount;
+  const numScore = thresholdScore(totalNumericSignals, [
     [9, 13],
     [4, 9],
     [1, 5],
@@ -1114,7 +1178,7 @@ function auditGroundingSignals(page) {
       "Numeric Claims",
       numScore,
       13,
-      `${numericCount} statistical references`
+      `${numericCount} statistical references, ${numberCount} numeric values`
     )
   );
   const attrCount = countPatternMatches(text, ATTRIBUTION_PATTERNS);
@@ -1319,7 +1383,11 @@ function removeBoilerplate($) {
 function normalizeWhitespace(text) {
   return text.replace(/\s+/g, " ").trim();
 }
+var BLOCK_ELEMENTS = "p,div,td,th,li,h1,h2,h3,h4,h5,h6,dt,dd,br,blockquote,section,article";
 function extractCleanText($) {
+  $(BLOCK_ELEMENTS).each((_, el) => {
+    $(el).append(" ");
+  });
   return normalizeWhitespace($("body").text());
 }
@@ -1328,7 +1396,9 @@ function extractPage(html, url) {
   const $ = cheerio.load(html);
   const title = $("title").text().trim() || $('meta[property="og:title"]').attr("content")?.trim() || "";
   const metaDescription = $('meta[name="description"]').attr("content")?.trim() || $('meta[property="og:description"]').attr("content")?.trim() || "";
-  const rawText = $("body").text().replace(/\s+/g, " ").trim();
+  const $raw = cheerio.load(html);
+  $raw("script, style, noscript").remove();
+  const rawText = $raw("body").text().replace(/\s+/g, " ").trim();
   const rawByteLength = Buffer.byteLength(html, "utf-8");
   const h1Count = $("h1").length;
   const h2Count = $("h2").length;
@@ -1340,9 +1410,22 @@ function extractPage(html, url) {
   const listItemCount = $("li").length;
   const tableCount = $("table").length;
   const paragraphCount = $("p").length;
+  const GENERIC_ALT_VALUES = /* @__PURE__ */ new Set([
+    "image",
+    "photo",
+    "logo",
+    "icon",
+    "picture",
+    "img",
+    "graphic",
+    "thumbnail"
+  ]);
   let imagesWithAlt = 0;
   $("img").each((_, el) => {
-    if ($(el).attr("alt")) imagesWithAlt++;
+    const alt = $(el).attr("alt")?.trim() ?? "";
+    const words = alt.split(/\s+/).filter((w) => w.length > 0);
+    const isMeaningful = words.length > 1 && alt.length < 200 && !GENERIC_ALT_VALUES.has(alt.toLowerCase());
+    if (isMeaningful) imagesWithAlt++;
   });
   const pageDomain = getDomain(url);
   const externalLinks = [];
@@ -1402,7 +1485,7 @@ import { z } from "zod";
 // src/modules/analyzer/constants.ts
 var DOMAIN_SIGNAL_TIMEOUT_CAP = 5e3;
-var VERSION = true ? "1.2.7" : "0.0.0";
+var VERSION = true ? "1.3.0" : "0.0.0";
 // src/modules/fetcher/schema.ts
 var FetchOptionsSchema = z.object({
@@ -1482,9 +1565,19 @@ var RECOMMENDATION_BUILDERS = {
     }
     return `Your robots.txt is blocking ${blocked}. Blocking these crawlers means your content cannot be discovered or cited by AI engines.`;
   },
-  "LLMs.txt Presence": static_(
-    "Consider adding llms.txt and llms-full.txt files at your domain root. This emerging standard provides AI systems with a structured overview of your site, helping them understand and reference your content more effectively."
-  ),
+  "LLMs.txt Presence": (rawData) => {
+    const llms = rawData.llmsTxt;
+    if (!llms) {
+      return "Consider adding llms.txt and llms-full.txt files at your domain root. This emerging standard provides AI systems with a structured overview of your site.";
+    }
+    if (llms.llmsTxtExists && !llms.llmsFullTxtExists) {
+      return "You have llms.txt but are missing llms-full.txt. Adding llms-full.txt provides AI systems with a comprehensive version of your site documentation for deeper ingestion.";
+    }
+    if (!llms.llmsTxtExists && llms.llmsFullTxtExists) {
+      return "You have llms-full.txt but are missing llms.txt. Adding llms.txt provides AI systems with a concise structured overview of your site's purpose and key pages.";
+    }
+    return "Consider adding llms.txt and llms-full.txt files at your domain root. This emerging standard provides AI systems with a structured overview of your site.";
+  },
   "Image Accessibility": (rawData) => {
     const images = rawData.imageAccessibility;
     if (!images || images.imageCount === 0) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "aiseo-audit",
-  "version": "1.2.7",
+  "version": "1.3.0",
   "description": "Lighthouse for AI SEO. Audit any webpage for AI search readiness. 7 categories, 30+ factors, research-backed scoring. Deterministic, engine-agnostic, zero API keys.",
   "main": "./dist/index.js",
   "module": "./dist/index.mjs",