npm - agentimization - Versions diffs - 0.1.3 → 0.2.1 - Mend

agentimization 0.1.3 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -2,14 +2,12 @@
 [![npm version](https://img.shields.io/npm/v/agentimization?style=flat-square&color=blue)](https://www.npmjs.com/package/agentimization)
-```text
-╭───────────────────────────────────────────────╮
-│ ▓░▒▓░░▒░▓▒░▓▓░▒░▓░░▒▓▒░▓░░▓▒░▓░▒░▓░░▒▓░░      │
-│ ░▓▒░▓░░▒▓▒░▓░░▒▓▓░▒░▓▒░░▓▒░▓░▒░░▓▒░░▓░▒       │
-│ ▓░▒▓░░▒▓▒░░▓░▒▓▒░░▓░░▓▒░▓░▒░░▓▒░▓░░▒▓░        │
-│ ░▒▓░▒░▓▒░░▓░▒▓░░▒▓▒░░▓░▒▓░░▒▓░ agentimization │
-╰───────────────────────────────────────────────╯
-```
+<p align="center">
+  <picture>
+    <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/antlio/agentimization/main/assets/hero-loading-dark.svg">
+    <img src="https://raw.githubusercontent.com/antlio/agentimization/main/assets/hero-loading-light.svg" alt="agentimization" width="620">
+  </picture>
+</p>
 geo audit for agent-ready websites and projects.

package/dist/index.js CHANGED Viewed

@@ -4105,6 +4105,140 @@ var DEFAULT_CONFIG = {
   onEvent: () => {
   }
 };
+var stripHtml = (html) => html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim();
+var extractMarkdownLinks = (markdown) => {
+  const links = [];
+  const linkRegex = /\[.+?\]\(([^)]+)\)/g;
+  let match;
+  while ((match = linkRegex.exec(markdown)) !== null) {
+    links.push(match[1]);
+  }
+  return links;
+};
+var extractLinks = (html, baseUrl) => {
+  const links = [];
+  const linkRegex = /<a[^>]+href=["']([^"']+)["']/gi;
+  let match;
+  while ((match = linkRegex.exec(html)) !== null) {
+    try {
+      const resolved = new URL(match[1], baseUrl).href;
+      links.push(resolved);
+    } catch {
+    }
+  }
+  return links;
+};
+var extractMetaTags = (html) => {
+  const meta = {};
+  const metaRegex = /<meta[^>]+(?:name|property)=["']([^"']+)["'][^>]+content=["']([^"']+)["']/gi;
+  let match;
+  while ((match = metaRegex.exec(html)) !== null) {
+    meta[match[1].toLowerCase()] = match[2];
+  }
+  const metaRegex2 = /<meta[^>]+content=["']([^"']+)["'][^>]+(?:name|property)=["']([^"']+)["']/gi;
+  while ((match = metaRegex2.exec(html)) !== null) {
+    meta[match[2].toLowerCase()] = match[1];
+  }
+  return meta;
+};
+var extractJsonLd = (html) => {
+  const results = [];
+  const regex = /<script[^>]+type=["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/gi;
+  let match;
+  while ((match = regex.exec(html)) !== null) {
+    try {
+      results.push(JSON.parse(match[1]));
+    } catch {
+    }
+  }
+  return results;
+};
+var readAttr = (attrs, name) => {
+  const re = new RegExp(`\\b${name}=(?:"([^"]*)"|'([^']*)')`, "i");
+  const m = attrs.match(re);
+  if (!m) return void 0;
+  return m[1] ?? m[2];
+};
+var extractImages = (html) => {
+  const images = [];
+  const imgRegex = /<img\b([^>]*)>/gi;
+  let match;
+  while ((match = imgRegex.exec(html)) !== null) {
+    const attrs = match[1];
+    const src = readAttr(attrs, "src");
+    if (src === void 0) continue;
+    images.push({ src, alt: readAttr(attrs, "alt") });
+  }
+  return images;
+};
+var extractHeadings = (html) => {
+  const headings = [];
+  const regex = /<h([1-6])[^>]*>([\s\S]*?)<\/h\1>/gi;
+  let match;
+  while ((match = regex.exec(html)) !== null) {
+    headings.push({
+      level: parseInt(match[1], 10),
+      text: stripHtml(match[2]).trim()
+    });
+  }
+  return headings;
+};
+var hasServerRenderedContent = (html) => {
+  const withoutScripts = html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "");
+  const textContent = stripHtml(withoutScripts);
+  return textContent.length > 100;
+};
+var findContentStartPosition = (html) => {
+  const markers = [
+    /<main[\s>]/i,
+    /<article[\s>]/i,
+    /id=["']content["']/i,
+    /id=["']main["']/i,
+    /class=["'][^"']*content[^"']*["']/i,
+    /role=["']main["']/i
+  ];
+  for (const marker of markers) {
+    const match = html.search(marker);
+    if (match >= 0) {
+      return match / html.length;
+    }
+  }
+  const firstP = html.search(/<p[\s>]/i);
+  if (firstP >= 0) {
+    return firstP / html.length;
+  }
+  return 0.5;
+};
+var extractCodeFences = (markdown) => {
+  const fences = [];
+  const lines = markdown.split("\n");
+  let inFence = false;
+  let currentLang = "";
+  for (const line of lines) {
+    const openMatch = line.match(/^```(\w*)/);
+    if (openMatch && !inFence) {
+      inFence = true;
+      currentLang = openMatch[1] ?? "";
+    } else if (line.trim() === "```" && inFence) {
+      fences.push({ lang: currentLang, closed: true });
+      inFence = false;
+      currentLang = "";
+    }
+  }
+  if (inFence) {
+    fences.push({ lang: currentLang, closed: false });
+  }
+  return fences;
+};
+var parseSitemapUrls = (xml) => {
+  const urls = [];
+  const regex = /<loc>([^<]+)<\/loc>/gi;
+  let match;
+  while ((match = regex.exec(xml)) !== null) {
+    urls.push(match[1].trim());
+  }
+  return urls;
+};
 var llmsTxtExists = {
   id: "llms-txt-exists",
   name: "llms.txt Exists",
@@ -4144,7 +4278,7 @@ var llmsTxtValid = {
         name: "llms.txt Valid Structure",
         category: "content-discoverability",
         status: "skip",
-        message: "Skipped \u2014 no llms.txt found"
+        message: "Skipped: no llms.txt found"
       };
     }
     const issues = [];
@@ -4189,7 +4323,7 @@ var llmsTxtSize = {
         name: "llms.txt Size",
         category: "content-discoverability",
         status: "skip",
-        message: "Skipped \u2014 no llms.txt found"
+        message: "Skipped: no llms.txt found"
       };
     }
     const size = ctx.llmsTxt.length;
@@ -4227,7 +4361,7 @@ var llmsTxtFreshness = {
         name: "llms.txt Coverage",
         category: "content-discoverability",
         status: "skip",
-        message: "Skipped \u2014 no llms.txt found"
+        message: "Skipped: no llms.txt found"
       };
     }
     if (ctx.sitemapUrls.length === 0) {
@@ -4257,11 +4391,9 @@ var llmsTxtFreshness = {
         return null;
       }
     };
-    const linkRegex = /\[.+?\]\(([^)]+)\)/g;
     const llmsKeys = /* @__PURE__ */ new Set();
-    let match;
-    while ((match = linkRegex.exec(ctx.llmsTxt)) !== null) {
-      const k = keyFor(match[1]);
+    for (const link of extractMarkdownLinks(ctx.llmsTxt)) {
+      const k = keyFor(link);
       if (k) llmsKeys.add(k);
     }
     const sitemapKeys = /* @__PURE__ */ new Set();
@@ -4301,7 +4433,7 @@ var llmsTxtFreshness = {
       category: "content-discoverability",
       status: coveragePct >= 40 || freshnessPct >= 70 ? "warn" : "fail",
       message: `${message}${missingFromLlms > 0 ? ` \xB7 ${missingFromLlms} sitemap pages not in llms.txt` : ""}${staleInLlms > 0 ? ` \xB7 ${staleInLlms} llms.txt links not in sitemap` : ""}`,
-      suggestion: coveragePct < freshnessPct ? "Add missing sitemap pages to llms.txt to improve AI agent discoverability." : "Some llms.txt links aren't in the sitemap \u2014 they may be stale or your sitemap may be incomplete.",
+      suggestion: coveragePct < freshnessPct ? "Add missing sitemap pages to llms.txt to improve AI agent discoverability." : "Some llms.txt links aren't in the sitemap. They may be stale or your sitemap may be incomplete.",
       metadata: {
         coveragePct,
         freshnessPct,
@@ -4327,15 +4459,13 @@ var llmsTxtLinksResolve = {
         name: "llms.txt Links Resolve",
         category: "content-discoverability",
         status: "skip",
-        message: "Skipped \u2014 no llms.txt found"
+        message: "Skipped: no llms.txt found"
       };
     }
-    const linkRegex = /\[.+?\]\(([^)]+)\)/g;
     const urls = [];
-    let match;
-    while ((match = linkRegex.exec(ctx.llmsTxt)) !== null) {
+    for (const link of extractMarkdownLinks(ctx.llmsTxt)) {
       try {
-        const resolved2 = new URL(match[1], ctx.baseUrl.origin);
+        const resolved2 = new URL(link, ctx.baseUrl.origin);
         if (resolved2.origin === ctx.baseUrl.origin) {
           urls.push(resolved2.href);
         }
@@ -4376,7 +4506,7 @@ var llmsTxtLinksResolve = {
       name: "llms.txt Links Resolve",
       category: "content-discoverability",
       status: "fail",
-      message: `${resolved}/${sampled.length} sampled links resolve \u2014 ${sampled.length - resolved} broken`,
+      message: `${resolved}/${sampled.length} sampled links resolve, ${sampled.length - resolved} broken`,
       suggestion: "Fix broken links in llms.txt. AI agents will fail to fetch these pages.",
       metadata: { resolved, sampled: sampled.length, total: urls.length }
     };
@@ -4395,15 +4525,10 @@ var llmsTxtLinksMarkdown = {
         name: "llms.txt Links Markdown",
         category: "content-discoverability",
         status: "skip",
-        message: "Skipped \u2014 no llms.txt found"
+        message: "Skipped: no llms.txt found"
       };
     }
-    const linkRegex = /\[.+?\]\(([^)]+)\)/g;
-    const urls = [];
-    let m;
-    while ((m = linkRegex.exec(ctx.llmsTxt)) !== null) {
-      urls.push(m[1]);
-    }
+    const urls = extractMarkdownLinks(ctx.llmsTxt);
     if (urls.length === 0) {
       return {
         id: "llms-txt-links-markdown",
@@ -4451,7 +4576,7 @@ var llmsTxtLinksMarkdown = {
       category: "content-discoverability",
       status: "fail",
       message: `Only ${mdLinks}/${urls.length} llms.txt links point to .md URLs (${pct}%)`,
-      suggestion: "Most llms.txt links are HTML-only. Serve a markdown version at .md URLs and link to those \u2014 agents get cleaner content and fewer parse failures.",
+      suggestion: "Most llms.txt links are HTML-only. Serve a markdown version at .md URLs and link to those, so agents get cleaner content and fewer parse failures.",
       metadata: { mdLinks, total: urls.length, pct }
     };
   }
@@ -4553,6 +4678,176 @@ var robotsTxtAgentRules = {
     };
   }
 };
+var llmsFullExists = {
+  id: "llms-full-exists",
+  name: "llms-full.txt Exists",
+  category: "content-discoverability",
+  description: "Checks if llms-full.txt (the complete-content variant) is present at the site root",
+  weight: 0.4,
+  run: async (ctx) => {
+    if (ctx.llmsFullTxt) {
+      return {
+        id: "llms-full-exists",
+        name: "llms-full.txt Exists",
+        category: "content-discoverability",
+        status: "pass",
+        message: ctx.mode === "local" ? "llms-full.txt found in project root" : `llms-full.txt found at ${ctx.baseUrl.origin}/llms-full.txt`
+      };
+    }
+    return {
+      id: "llms-full-exists",
+      name: "llms-full.txt Exists",
+      category: "content-discoverability",
+      status: "info",
+      message: "No llms-full.txt found (optional)",
+      suggestion: "If your llms.txt is large or you want agents to get full content in one fetch, add a /llms-full.txt containing the concatenated markdown of your docs."
+    };
+  }
+};
+var llmsFullValid = {
+  id: "llms-full-valid",
+  name: "llms-full.txt Valid Structure",
+  category: "content-discoverability",
+  description: "Checks if llms-full.txt has recognizable markdown structure (headings, content)",
+  weight: 0.4,
+  run: async (ctx) => {
+    if (!ctx.llmsFullTxt) {
+      return {
+        id: "llms-full-valid",
+        name: "llms-full.txt Valid Structure",
+        category: "content-discoverability",
+        status: "skip",
+        message: "Skipped: no llms-full.txt found"
+      };
+    }
+    const hasHeadings = /^#{1,3}\s+/m.test(ctx.llmsFullTxt);
+    const hasProse = ctx.llmsFullTxt.length > 600;
+    if (hasHeadings && hasProse) {
+      return {
+        id: "llms-full-valid",
+        name: "llms-full.txt Valid Structure",
+        category: "content-discoverability",
+        status: "pass",
+        message: "llms-full.txt has recognizable markdown structure"
+      };
+    }
+    return {
+      id: "llms-full-valid",
+      name: "llms-full.txt Valid Structure",
+      category: "content-discoverability",
+      status: "warn",
+      message: `llms-full.txt found but ${!hasHeadings ? "has no markdown headings" : "has little content"}`,
+      suggestion: "llms-full.txt should contain the full markdown content of your docs, with headings, so agents can parse it."
+    };
+  }
+};
+var llmsFullSize = {
+  id: "llms-full-size",
+  name: "llms-full.txt Size",
+  category: "content-discoverability",
+  description: "Checks if llms-full.txt size is within the expected range (substantial but not excessive)",
+  weight: 0.3,
+  run: async (ctx) => {
+    if (!ctx.llmsFullTxt) {
+      return {
+        id: "llms-full-size",
+        name: "llms-full.txt Size",
+        category: "content-discoverability",
+        status: "skip",
+        message: "Skipped: no llms-full.txt found"
+      };
+    }
+    const size = ctx.llmsFullTxt.length;
+    const MIN = 1e4;
+    const MAX = 5e6;
+    if (size >= MIN && size <= MAX) {
+      return {
+        id: "llms-full-size",
+        name: "llms-full.txt Size",
+        category: "content-discoverability",
+        status: "pass",
+        message: `llms-full.txt is ${size.toLocaleString()} characters (within expected range)`,
+        metadata: { size }
+      };
+    }
+    return {
+      id: "llms-full-size",
+      name: "llms-full.txt Size",
+      category: "content-discoverability",
+      status: "warn",
+      message: size < MIN ? `llms-full.txt is only ${size.toLocaleString()} characters, smaller than expected for a full-content file` : `llms-full.txt is ${size.toLocaleString()} characters, large enough to overflow agent context windows`,
+      suggestion: size < MIN ? "llms-full.txt should contain your complete documentation. If it's this small, llms.txt alone may be enough." : "Consider trimming llms-full.txt or splitting content so agents can fetch what fits their context window.",
+      metadata: { size }
+    };
+  }
+};
+var llmsFullLinksResolve = {
+  id: "llms-full-links-resolve",
+  name: "llms-full.txt Links Resolve",
+  category: "content-discoverability",
+  description: "Checks if links in llms-full.txt return 200 OK",
+  weight: 0.4,
+  requiresNetwork: true,
+  run: async (ctx) => {
+    if (!ctx.llmsFullTxt) {
+      return {
+        id: "llms-full-links-resolve",
+        name: "llms-full.txt Links Resolve",
+        category: "content-discoverability",
+        status: "skip",
+        message: "Skipped: no llms-full.txt found"
+      };
+    }
+    const urls = [];
+    for (const link of extractMarkdownLinks(ctx.llmsFullTxt)) {
+      try {
+        const resolved2 = new URL(link, ctx.baseUrl.origin);
+        if (resolved2.origin === ctx.baseUrl.origin) {
+          urls.push(resolved2.href);
+        }
+      } catch {
+      }
+    }
+    if (urls.length === 0) {
+      return {
+        id: "llms-full-links-resolve",
+        name: "llms-full.txt Links Resolve",
+        category: "content-discoverability",
+        status: "info",
+        message: "No same-origin links found in llms-full.txt"
+      };
+    }
+    const sampled = urls.slice(0, 10);
+    const results = await Promise.allSettled(
+      sampled.map(async (url) => {
+        const resp = await fetch(url, { method: "HEAD", redirect: "follow" });
+        return { url, status: resp.status };
+      })
+    );
+    const resolved = results.filter(
+      (r) => r.status === "fulfilled" && r.value.status >= 200 && r.value.status < 400
+    ).length;
+    if (resolved === sampled.length) {
+      return {
+        id: "llms-full-links-resolve",
+        name: "llms-full.txt Links Resolve",
+        category: "content-discoverability",
+        status: "pass",
+        message: `All ${resolved} sampled same-origin links resolve (${urls.length} total links)`,
+        metadata: { resolved, sampled: sampled.length, total: urls.length }
+      };
+    }
+    return {
+      id: "llms-full-links-resolve",
+      name: "llms-full.txt Links Resolve",
+      category: "content-discoverability",
+      status: "fail",
+      message: `${resolved}/${sampled.length} sampled links resolve, ${sampled.length - resolved} broken`,
+      suggestion: "Fix broken links in llms-full.txt. AI agents will fail to fetch these pages.",
+      metadata: { resolved, sampled: sampled.length, total: urls.length }
+    };
+  }
+};
 var contentDiscoverabilityChecks = [
   llmsTxtExists,
   llmsTxtValid,
@@ -4560,21 +4855,29 @@ var contentDiscoverabilityChecks = [
   llmsTxtFreshness,
   llmsTxtLinksResolve,
   llmsTxtLinksMarkdown,
+  llmsFullExists,
+  llmsFullValid,
+  llmsFullSize,
+  llmsFullLinksResolve,
   sitemapExists,
   robotsTxtAgentRules
 ];
-var makeHeaders = (config) => ({
+var BROWSER_UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
+var makeHeaders = (config, asBrowser = false) => asBrowser ? {
+  "User-Agent": BROWSER_UA,
+  Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
+} : {
   "User-Agent": config.userAgent ?? DEFAULT_CONFIG.userAgent,
   Accept: "text/html,application/xhtml+xml,text/markdown,text/plain,*/*"
-});
-var fetchPage = async (url, config = {}) => {
+};
+var fetchPage = async (url, config = {}, asBrowser = false) => {
   const timeout = config.timeout ?? DEFAULT_CONFIG.timeout;
   const start = Date.now();
   const controller = new AbortController();
   const timer = setTimeout(() => controller.abort(), timeout);
   try {
     const response = await fetch(url, {
-      headers: makeHeaders(config),
+      headers: makeHeaders(config, asBrowser),
       signal: controller.signal,
       redirect: "follow"
     });
@@ -4627,13 +4930,13 @@ var fetchWithContentNegotiation = async (url, accept, config = {}) => {
     clearTimeout(timer);
   }
 };
-var fetchMany = async (urls, config = {}) => {
+var fetchMany = async (urls, config = {}, asBrowser = false) => {
   const concurrency = config.concurrency ?? DEFAULT_CONFIG.concurrency;
   const results = [];
   for (let i = 0; i < urls.length; i += concurrency) {
     const chunk = urls.slice(i, i + concurrency);
     const chunkResults = await Promise.allSettled(
-      chunk.map((url) => fetchPage(url, config))
+      chunk.map((url) => fetchPage(url, config, asBrowser))
     );
     for (const result of chunkResults) {
       if (result.status === "fulfilled") {
@@ -4796,131 +5099,6 @@ var markdownAvailabilityChecks = [
   contentNegotiation,
   markdownContentParity
 ];
-var stripHtml = (html) => html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim();
-var extractLinks = (html, baseUrl) => {
-  const links = [];
-  const linkRegex = /<a[^>]+href=["']([^"']+)["']/gi;
-  let match;
-  while ((match = linkRegex.exec(html)) !== null) {
-    try {
-      const resolved = new URL(match[1], baseUrl).href;
-      links.push(resolved);
-    } catch {
-    }
-  }
-  return links;
-};
-var extractMetaTags = (html) => {
-  const meta = {};
-  const metaRegex = /<meta[^>]+(?:name|property)=["']([^"']+)["'][^>]+content=["']([^"']+)["']/gi;
-  let match;
-  while ((match = metaRegex.exec(html)) !== null) {
-    meta[match[1].toLowerCase()] = match[2];
-  }
-  const metaRegex2 = /<meta[^>]+content=["']([^"']+)["'][^>]+(?:name|property)=["']([^"']+)["']/gi;
-  while ((match = metaRegex2.exec(html)) !== null) {
-    meta[match[2].toLowerCase()] = match[1];
-  }
-  return meta;
-};
-var extractJsonLd = (html) => {
-  const results = [];
-  const regex = /<script[^>]+type=["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/gi;
-  let match;
-  while ((match = regex.exec(html)) !== null) {
-    try {
-      results.push(JSON.parse(match[1]));
-    } catch {
-    }
-  }
-  return results;
-};
-var readAttr = (attrs, name) => {
-  const re = new RegExp(`\\b${name}=(?:"([^"]*)"|'([^']*)')`, "i");
-  const m = attrs.match(re);
-  if (!m) return void 0;
-  return m[1] ?? m[2];
-};
-var extractImages = (html) => {
-  const images = [];
-  const imgRegex = /<img\b([^>]*)>/gi;
-  let match;
-  while ((match = imgRegex.exec(html)) !== null) {
-    const attrs = match[1];
-    const src = readAttr(attrs, "src");
-    if (src === void 0) continue;
-    images.push({ src, alt: readAttr(attrs, "alt") });
-  }
-  return images;
-};
-var extractHeadings = (html) => {
-  const headings = [];
-  const regex = /<h([1-6])[^>]*>([\s\S]*?)<\/h\1>/gi;
-  let match;
-  while ((match = regex.exec(html)) !== null) {
-    headings.push({
-      level: parseInt(match[1], 10),
-      text: stripHtml(match[2]).trim()
-    });
-  }
-  return headings;
-};
-var hasServerRenderedContent = (html) => {
-  const withoutScripts = html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "");
-  const textContent = stripHtml(withoutScripts);
-  return textContent.length > 100;
-};
-var findContentStartPosition = (html) => {
-  const markers = [
-    /<main[\s>]/i,
-    /<article[\s>]/i,
-    /id=["']content["']/i,
-    /id=["']main["']/i,
-    /class=["'][^"']*content[^"']*["']/i,
-    /role=["']main["']/i
-  ];
-  for (const marker of markers) {
-    const match = html.search(marker);
-    if (match >= 0) {
-      return match / html.length;
-    }
-  }
-  const firstP = html.search(/<p[\s>]/i);
-  if (firstP >= 0) {
-    return firstP / html.length;
-  }
-  return 0.5;
-};
-var extractCodeFences = (markdown) => {
-  const fences = [];
-  const lines = markdown.split("\n");
-  let inFence = false;
-  let currentLang = "";
-  for (const line of lines) {
-    const openMatch = line.match(/^```(\w*)/);
-    if (openMatch && !inFence) {
-      inFence = true;
-      currentLang = openMatch[1] ?? "";
-    } else if (line.trim() === "```" && inFence) {
-      fences.push({ lang: currentLang, closed: true });
-      inFence = false;
-      currentLang = "";
-    }
-  }
-  if (inFence) {
-    fences.push({ lang: currentLang, closed: false });
-  }
-  return fences;
-};
-var parseSitemapUrls = (xml) => {
-  const urls = [];
-  const regex = /<loc>([^<]+)<\/loc>/gi;
-  let match;
-  while ((match = regex.exec(xml)) !== null) {
-    urls.push(match[1].trim());
-  }
-  return urls;
-};
 var MAX_HTML_CHARS = 5e4;
 var MAX_MD_CHARS = 5e4;
 var renderingStrategy = {
@@ -6239,6 +6417,56 @@ var mcpServerCard = {
     }
   }
 };
+var mcpToolCount = {
+  id: "mcp-tool-count",
+  name: "MCP Tool Count",
+  category: "agent-protocols",
+  description: "Checks that the MCP server card exposes at least one tool",
+  weight: 0.4,
+  run: async (ctx) => {
+    if (!ctx.mcpServerCard) {
+      return {
+        id: "mcp-tool-count",
+        name: "MCP Tool Count",
+        category: "agent-protocols",
+        status: "skip",
+        message: "Skipped: no MCP server card found"
+      };
+    }
+    let card;
+    try {
+      card = JSON.parse(ctx.mcpServerCard);
+    } catch {
+      return {
+        id: "mcp-tool-count",
+        name: "MCP Tool Count",
+        category: "agent-protocols",
+        status: "skip",
+        message: "Skipped: MCP server card is invalid JSON"
+      };
+    }
+    const toolCount = Array.isArray(card.tools) ? card.tools.length : Array.isArray(card.capabilities?.tools) ? card.capabilities.tools.length : 0;
+    if (toolCount > 0) {
+      return {
+        id: "mcp-tool-count",
+        name: "MCP Tool Count",
+        category: "agent-protocols",
+        status: "pass",
+        message: `MCP server exposes ${toolCount} tool${toolCount === 1 ? "" : "s"}`,
+        metadata: { toolCount }
+      };
+    }
+    return {
+      id: "mcp-tool-count",
+      name: "MCP Tool Count",
+      category: "agent-protocols",
+      status: "warn",
+      message: "MCP server card found but exposes no tools",
+      suggestion: "List your MCP server's tools in the server card so agents know what actions are available before connecting.",
+      metadata: { toolCount }
+    };
+  }
+};
 var apiCatalog = {
   id: "api-catalog",
   name: "API Catalog (RFC 9727)",
@@ -6312,7 +6540,7 @@ var contentSignals = {
         name: "Content Signals (AI Usage Declarations)",
         category: "agent-protocols",
         status: "info",
-        message: "No robots.txt found \u2014 cannot check for content signals",
+        message: "No robots.txt found, cannot check for content signals",
         suggestion: "Add a robots.txt with Content Signals directives to declare how AI agents may use your content (ai-train, ai-input, search)."
       };
     }
@@ -6497,7 +6725,7 @@ var agentsMd = {
         category: "agent-protocols",
         status: "fail",
         message: "No AGENTS.md or AGENT.md found",
-        suggestion: "Add an AGENTS.md at the project root. This is the universal agent configuration file \u2014 a README for AI coding agents. Include build/test commands, architecture overview, conventions, and any gotchas. Used by 60k+ open-source projects."
+        suggestion: "Add an AGENTS.md at the project root. This is the universal agent configuration file, a README for AI coding agents. Include build/test commands, architecture overview, conventions, and any gotchas. Used by 60k+ open-source projects."
       };
     }
     const content = ctx.agentsMd;
@@ -6549,6 +6777,7 @@ var agentsMd = {
 };
 var agentProtocolChecks = [
   mcpServerCard,
+  mcpToolCount,
   apiCatalog,
   contentSignals,
   linkHeaders,
@@ -6725,7 +6954,7 @@ var buildRemoteContext = async (targetUrl, config) => {
   const apiCatalog2 = apiCatalogResult.status === "fulfilled" && apiCatalogResult.value?.statusCode === 200 ? apiCatalogResult.value.text : void 0;
   const agentSkillsIndex2 = agentSkillsResult.status === "fulfilled" && agentSkillsResult.value?.statusCode === 200 ? agentSkillsResult.value.text : void 0;
   const agentsMd2 = void 0;
-  const sitemapUrls = sitemapXml ? parseSitemapUrls(sitemapXml) : [];
+  let sitemapUrls = sitemapXml ? parseSitemapUrls(sitemapXml) : [];
   if (!sitemapXml && robotsTxt) {
     const sitemapMatch = robotsTxt.match(/Sitemap:\s*(.+)/i);
     if (sitemapMatch) {
@@ -6735,10 +6964,28 @@ var buildRemoteContext = async (targetUrl, config) => {
       }
     }
   }
+  const isSitemapIndex = (sitemapXml ?? "").includes("<sitemapindex");
+  if (isSitemapIndex && sitemapUrls.length > 0) {
+    const nested = await Promise.allSettled(
+      sitemapUrls.slice(0, 20).map((u) => fetchText(u, config))
+    );
+    sitemapUrls = nested.flatMap(
+      (r) => r.status === "fulfilled" && r.value?.statusCode === 200 ? parseSitemapUrls(r.value.text) : []
+    );
+  }
   let pagesToSample = [];
   if (sitemapUrls.length > 0) {
-    const shuffled = [...sitemapUrls].sort(() => Math.random() - 0.5);
-    pagesToSample = shuffled.slice(0, config.sampleSize);
+    const pathPrefix = baseUrl.pathname.replace(/\/+$/, "");
+    const scoped = pathPrefix.length > 1 ? sitemapUrls.filter((u) => {
+      try {
+        return new URL(u).pathname.startsWith(pathPrefix);
+      } catch {
+        return false;
+      }
+    }) : sitemapUrls;
+    const pool = scoped.length > 0 ? scoped : sitemapUrls;
+    const step = Math.max(1, Math.floor(pool.length / config.sampleSize));
+    pagesToSample = pool.filter((_, i) => i % step === 0).slice(0, config.sampleSize);
   } else {
     const mainPage = await fetchPage(targetUrl, config);
     const linkRegex = /<a[^>]+href=["']([^"'#]+)["']/gi;
@@ -6758,14 +7005,16 @@ var buildRemoteContext = async (targetUrl, config) => {
   if (!pagesToSample.includes(targetUrl)) {
     pagesToSample.unshift(targetUrl);
   }
-  const sampledPages = await fetchMany(pagesToSample, config);
+  const sampledPages = await fetchMany(pagesToSample, config, true);
   emit({ type: "context-ready", pageCount: sampledPages.length });
-  for (const page of sampledPages) {
-    const mdResult = await fetchWithContentNegotiation(page.url, "text/markdown", config);
-    if (mdResult && mdResult.statusCode === 200 && (mdResult.contentType.includes("text/markdown") || mdResult.contentType.includes("text/plain"))) {
-      page.markdown = mdResult.text;
-    }
-  }
+  await Promise.allSettled(
+    sampledPages.map(async (page) => {
+      const mdResult = await fetchWithContentNegotiation(page.url, "text/markdown", config);
+      if (mdResult && mdResult.statusCode === 200 && (mdResult.contentType.includes("text/markdown") || mdResult.contentType.includes("text/plain"))) {
+        page.markdown = mdResult.text;
+      }
+    })
+  );
   return {
     mode: "remote",
     targetUrl,
@@ -7421,31 +7670,57 @@ import { Box as Box4, Text as Text4, useApp, useInput } from "ink";
 // src/ui/agent-prompt.ts
 import { execSync } from "child_process";
 import { platform } from "os";
-var CATEGORY_LABELS2 = {
-  "content-discoverability": "Content Discoverability",
-  "markdown-availability": "Markdown Availability",
-  "content-structure": "Content Structure",
-  "page-size": "Page Size & Rendering",
-  "url-stability": "URL Stability",
-  "authentication": "Authentication & Access",
-  "geo-signals": "GEO Signals",
-  "agent-protocols": "Agent Protocols"
-};
-var statusEmoji = (status) => {
+var statusMarker = (status) => {
   switch (status) {
     case "pass":
-      return "\u2705";
+      return "PASS";
     case "warn":
-      return "\u26A0\uFE0F";
+      return "WARN";
     case "fail":
-      return "\u274C";
+      return "FAIL";
     case "skip":
-      return "\u23ED\uFE0F";
+      return "SKIP";
     case "info":
-      return "\u2139\uFE0F";
-  }
-};
-var buildIssuesBlock = (result, opts) => {
+      return "INFO";
+  }
+};
+var RATIONALE_LEAD = /^(generative engines|ai (agents|engines|crawlers|search)|this |these |without (it|this)|used by|some agents|blocked agents|missing content|each redirect|citing sources|shorter descriptions|the more context)\b/i;
+var terseSuggestion = (suggestion) => {
+  const sentences = suggestion.split(/(?<=\.)\s+(?=[A-Z])/);
+  const kept = [];
+  const rescuedUrls = [];
+  for (const raw of sentences) {
+    const s = raw.trim();
+    if (!s) continue;
+    if (RATIONALE_LEAD.test(s)) {
+      const url = s.match(/https?:\/\/\S+/)?.[0];
+      if (url && !suggestion.slice(0, suggestion.indexOf(s)).includes(url)) rescuedUrls.push(url.replace(/[.)]+$/, ""));
+      continue;
+    }
+    kept.push(s);
+  }
+  const base = (kept.join(" ") || suggestion).trim();
+  return rescuedUrls.length > 0 ? `${base} ${rescuedUrls.join(" ")}` : base;
+};
+var asciiPunct = (s) => s.replace(/[—–]/g, "-").replace(/·/g, "-").replace(/→/g, "->").replace(/;/g, ",");
+var SUCCESS_TABLE = {
+  "llms-txt-exists": { success: "GET /llms.txt returns 200 with an H1, a blockquote summary, and >=1 ## link section." },
+  "sitemap-exists": { success: "GET /sitemap.xml returns 200 valid XML listing all public pages." },
+  "markdown-url-support": (m) => ({ success: `appending .md to each page URL returns 200 text/markdown (now ${m.supported ?? 0}/${m.total ?? "?"}).` }),
+  "structured-data-coverage": { success: "every sampled page has a valid schema.org JSON-LD block." },
+  "topical-authority-signals": (m) => ({ success: `avg >=5 internal links/page and >=70% of pages have >=3 (now avg ${m.avgLinks ?? 0}/page).` }),
+  "content-freshness": { success: ">=80% of pages expose a machine-readable date (Last-Modified, meta, or JSON-LD)." },
+  "eeat-signals": { success: "each content page names an author with credentials and links to an about/team page." },
+  "canonical-url-consistency": { success: 'every page has a self-referencing <link rel="canonical">.' },
+  "mcp-server-card": { success: "GET /.well-known/mcp/server-card.json returns valid JSON with name + description + >=1 tool." },
+  "section-header-quality": { success: "every page has exactly one H1 and no skipped heading levels." }
+};
+var resolveSuccess = (issue) => {
+  const entry = SUCCESS_TABLE[issue.id];
+  if (!entry) return void 0;
+  return (typeof entry === "function" ? entry(issue.metadata ?? {}) : entry).success;
+};
+var buildIssuesBlock = (result, opts, terse = false) => {
   const failures = result.checks.filter((c) => c.status === "fail");
   const warnings = result.checks.filter((c) => c.status === "warn");
   const issues = [...failures, ...warnings];
@@ -7454,8 +7729,10 @@ var buildIssuesBlock = (result, opts) => {
     lines.push(`All checks passed! No fixes needed.`);
     return lines;
   }
-  lines.push(`Fix the following GEO issues to make this ${opts.mode === "local" ? "project" : "website"} more discoverable by AI agents:`);
-  lines.push(``);
+  if (!terse) {
+    lines.push(`Fix the following GEO issues to make this ${opts.mode === "local" ? "project" : "website"} more discoverable by AI agents:`);
+    lines.push(``);
+  }
   const byCategory = /* @__PURE__ */ new Map();
   for (const issue of issues) {
     const existing = byCategory.get(issue.category) ?? [];
@@ -7463,15 +7740,20 @@ var buildIssuesBlock = (result, opts) => {
     byCategory.set(issue.category, existing);
   }
   for (const [cat, catIssues] of byCategory) {
-    const label = CATEGORY_LABELS2[cat] ?? cat;
+    const label = CATEGORY_LABELS[cat] ?? cat;
     const catScore = result.categories[cat]?.score ?? "?";
-    lines.push(`### ${label} (${catScore}/100)`);
+    lines.push(terse ? label : `### ${label} (${catScore}/100)`);
     lines.push(``);
     for (const issue of catIssues) {
-      lines.push(`- ${statusEmoji(issue.status)} **${issue.id}**: ${issue.message}`);
-      if (issue.suggestion) {
-        lines.push(`  - **Fix:** ${issue.suggestion}`);
+      if (terse) {
+        lines.push(`- ${issue.id} (${statusMarker(issue.status)}): ${asciiPunct(issue.message)}`);
+        if (issue.suggestion) lines.push(`  -> ${asciiPunct(terseSuggestion(issue.suggestion))}`);
+        continue;
       }
+      lines.push(`- ${statusMarker(issue.status)} **${issue.id}**: ${issue.message}`);
+      if (issue.suggestion) lines.push(`  - **Fix:** ${issue.suggestion}`);
+      const success = resolveSuccess(issue);
+      if (success) lines.push(`  - **Success:** ${success}`);
     }
     lines.push(``);
   }
@@ -7479,15 +7761,12 @@ var buildIssuesBlock = (result, opts) => {
 };
 var generateClipboardPrompt = (result, opts) => {
   const lines = [];
-  lines.push(`# Fix GEO issues \u2014 ${opts.target}`);
+  const subject = opts.mode === "local" ? "project" : "website";
+  lines.push(`Fix these GEO issues on ${opts.target} so AI agents can discover this ${subject}. Fixes are grouped by area. Do FAIL before WARN.`);
   lines.push(``);
-  lines.push(`Score: ${result.grade} (${result.overall_score}/100) \xB7 ${result.summary.failed} failed, ${result.summary.warned} warnings`);
-  lines.push(``);
-  lines.push(...buildIssuesBlock(result, opts));
+  lines.push(...buildIssuesBlock(result, opts, true));
   if (opts.mode === "local") {
     lines.push(`Files are at \`${opts.target}\`. Fix the issues above, then re-run \`agentimization ${opts.target}\` to verify.`);
-  } else {
-    lines.push(`Prioritize failures (\u274C) over warnings (\u26A0\uFE0F). Suggest specific code changes.`);
   }
   return lines.join("\n");
 };
@@ -7510,7 +7789,7 @@ var generateAgentPrompt = (result, opts) => {
     lines.push(`These checks are already good (don't break them while fixing the issues above):`);
     lines.push(``);
     for (const pass of passes) {
-      lines.push(`- \u2705 **${pass.id}**: ${pass.message}`);
+      lines.push(`- PASS **${pass.id}**: ${pass.message}`);
     }
     lines.push(``);
   }
@@ -7523,7 +7802,7 @@ var generateAgentPrompt = (result, opts) => {
   } else {
     lines.push(`This is a remote site audit of ${opts.target}.`);
     lines.push(`Please suggest the specific code changes needed to fix each issue.`);
-    lines.push(`Prioritize failures (\u274C) over warnings (\u26A0\uFE0F).`);
+    lines.push(`Prioritize FAIL over WARN.`);
   }
   lines.push(``);
   lines.push(`Focus on the highest-impact fixes first. The goal is to maximize the GEO score so AI agents can discover, parse, and cite this content effectively.`);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agentimization",
-  "version": "0.1.3",
+  "version": "0.2.1",
   "description": "GEO audit CLI — check if your website is agent-ready",
   "license": "MIT",
   "author": "Anthony Lio <hello@antl.io>",