npm - glippy-mcp - Versions diffs - 0.3.2 → 0.4.0 - Mend

glippy-mcp 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/LICENSE CHANGED Viewed

@@ -14,7 +14,7 @@ you a non-exclusive, non-transferable, revocable license to install and
 use the Software solely for your own internal business or personal use.
 A valid, paid license key (format: GLMCP-XXXX-XXXX-XXXX) is required to
-use the Software. License keys may be purchased at https://glippy.dev.
+use the Software. License keys may be purchased at https://www.glippy.dev.
 Running the Software without a valid license key, or in a manner that
 circumvents license verification, is not permitted.
@@ -61,4 +61,4 @@ THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 This Agreement is governed by the laws of the Netherlands, without regard
 to its conflict of laws principles.
-For license purchases or enquiries: https://glippy.dev
+For license purchases or enquiries: https://www.glippy.dev

package/README.md CHANGED Viewed

@@ -12,6 +12,7 @@ It wraps the Glippy desktop app's server-side analysis engine (`geo-checker.js`)
 - Full 16-category GEO analysis with weighted scoring
 - robots.txt AI crawler access detection
 - llms.txt file discovery and parsing
+- **Agent-readiness discovery** - detects emerging agent standards (Content-Signal, llms-full.txt, MCP/A2A/Agent-Skills cards, schemamap, NLWeb, feed discovery)
 - Sitemap crawling and multi-page analysis
 - Domain comparison and competitive analysis
 - Export to styled Markdown or HTML reports
@@ -40,6 +41,7 @@ It wraps the Glippy desktop app's server-side analysis engine (`geo-checker.js`)
   - [export_report](#export_report)
   - [export_bulk_report](#export_bulk_report)
 - [GEO Scoring Categories](#geo-scoring-categories)
+- [Agent-Readiness Discovery](#agent-readiness-discovery)
 - [Rate Limiting](#rate-limiting)
 - [Output Formats](#output-formats)
 - [Chrome Rendering Fallback](#chrome-rendering-fallback)
@@ -78,7 +80,7 @@ npx -y glippy-mcp
 ### License Key
-A valid Glippy MCP license key (`GLMCP-XXXX-XXXX-XXXX`) is required. Get one at [glippy.dev](https://glippy.dev).
+A valid Glippy MCP license key (`GLMCP-XXXX-XXXX-XXXX`) is required. Get one at [glippy.dev](https://www.glippy.dev).
 The server validates the key against the Glippy API on first use and caches the result for 24 hours. **Analysis runs locally on your machine** — only the license check calls the server.
@@ -234,6 +236,7 @@ Check which AI crawlers are blocked on example.com
   - AmazonBot
   - cohere-ai
 - Sitemap references found in robots.txt
+- Content-Signal directive (`search` / `ai-input` / `ai-train` preferences), when present
 ---
@@ -300,7 +303,7 @@ Analyse multiple domains in parallel and compare scores.
 | Parameter | Type | Required | Description |
 |-----------|------|----------|-------------|
-| `domains` | array[string] | Yes | List of 2-10 domains to compare, e.g. `["example.com", "competitor.com"]`. Do not include `https://` prefix. |
+| `domains` | array[string] | Yes | List of 2-50 domains to compare, e.g. `["example.com", "competitor.com"]`. Do not include `https://` prefix. For more than 50 domains, split across multiple runs and merge the results. |
 | `max_pages` | integer | No | Maximum pages to crawl per domain (1-10). Default: `10`. |
 | `render_mode` | enum | No | `"static"` (default), `"auto"` (static with Chrome fallback on bot-block), or `"chrome"` (always Chrome). See [Chrome Rendering Fallback](#chrome-rendering-fallback). |
 | `output_format` | enum | No | `"text"` (default) for comparison table, `"json"` for raw results to pass to `export_bulk_report`. |
@@ -428,7 +431,7 @@ Generate a styled report for bulk analysis.
 | Parameter | Type | Required | Description |
 |-----------|------|----------|-------------|
 | `format` | enum | Yes | Report format: `"markdown"` or `"html"` |
-| `domains` | array[string] | No* | Compare 2-10 domains. Do not include `https://`. |
+| `domains` | array[string] | No* | Compare 2-50 domains. Do not include `https://`. For more than 50, run multiple times. |
 | `urls` | array[string] | No* | Analyse 1-50,000 specific URLs. Include `https://`. |
 | `sitemap_url` | string | No* | Crawl a sitemap URL. |
 | `analysis_results` | object | No* | Pre-computed results from `compare_domains`, `analyze_urls`, or `analyze_sitemap` (with `output_format="json"`). |
@@ -470,11 +473,11 @@ The analysis evaluates 16 categories, each with a weight reflecting its importan
 | 3 | **Accessibility for Agents** | 1.0x | Lang attribute, alt text on images, ARIA labels, descriptive link text |
 | 4 | **Internal Linking** | 1.0x | Link density, navigation structure, breadcrumb markup |
 | 5 | **Meta & Discoverability** | 1.0x | Title, meta description, canonical URL, Open Graph tags, hreflang |
-| 6 | **Machine Readability** | 1.5x | SSR detection, bot blocking checks, robots.txt rules, llms.txt presence* |
+| 6 | **Machine Readability** | 1.5x | SSR detection, bot blocking checks, robots.txt rules, llms.txt presence*, robots.txt Content-Signal directive, llms-full.txt, HTTP Link discovery headers, Markdown source endpoints, RSS/Atom/JSON feed discovery |
 | 7 | **Entity & Authority** | 1.0x | Author info, publication dates, organization schema, E-E-A-T signals, credentials, editorial policy, contact completeness |
 | 8 | **Citability & Answer-Readiness** | 1.3x | FAQ content, data tables, lists, lead paragraph quality |
 | 9 | **Performance & Crawlability** | 0.3x | Image dimensions, lazy loading, resource hints |
-| 10 | **Agent Interactivity** | 0.2x | WebMCP tools, form annotations, agent-callable actions |
+| 10 | **Agent Interactivity** | 0.2x | WebMCP tools, form annotations, agent-callable actions, MCP server card (`/.well-known/mcp/server-card.json`), A2A agent card, Agent-Skills index, NLWeb endpoint, schemamap |
 | 11 | **Content Positioning** | 1.2x | Brand differentiation, proof points, social proof |
 | 12 | **Content Freshness** | 0.8x | Date signals, content age, temporal language |
 | 13 | **Information Density** | 1.0x | Substantive-to-filler ratio, section depth, claim-evidence pairing |
@@ -492,6 +495,29 @@ The analysis evaluates 16 categories, each with a weight reflecting its importan
 ---
+## Agent-Readiness Discovery
+Alongside the established checks, the server probes a set of **emerging agent-readiness standards** (largely from [specification.website](https://specification.website)). These surfaces let agents discover and consume a site without scraping HTML.
+These checks are **bonus-scored**: a site gets credit when a surface is present, but absence is reported as informational guidance rather than a penalty. This keeps the long tail of sites that have not adopted these new standards from being unfairly marked down, while still rewarding early adopters.
+| Surface | Where it's checked | What it signals |
+|---------|--------------------|-----------------|
+| **Content-Signal** | robots.txt directive | Machine-readable AI usage preferences (`search` / `ai-input` / `ai-train`). Only `ai-input=no` affects AI answer visibility; `ai-train=no` is treated as a training-only preference with no citation impact. |
+| **llms-full.txt** | `/llms-full.txt` | Concatenated Markdown corpus of the pages listed in llms.txt, for full-context ingestion. Very large files (>5 MB) are flagged. |
+| **HTTP Link discovery** | response `Link` header | Resource discovery via headers (`rel="describedby"`, `api-catalog`, `sitemap`, `mcp`, `service-desc`, `nlweb`) without parsing HTML. |
+| **Markdown source endpoint** | `<link rel="alternate" type="text/markdown">` or content negotiation | A clean `.md` version of each page for agent ingestion. |
+| **Feed discovery** | `<link rel="alternate">` | RSS / Atom / JSON feeds as a machine-readable content stream. |
+| **MCP server card** | `/.well-known/mcp/server-card.json` | Discoverable MCP server (name, version, transport, endpoint, tools). |
+| **A2A agent card** | `/.well-known/agent-card.json` | Agent-to-agent discovery with declared skills. |
+| **Agent-Skills index** | `/.well-known/agent-skills/index.json` | Reusable agent skills exposed with digests. |
+| **NLWeb endpoint** | `<link rel="nlweb">` or `Link` header | Natural-language query endpoint (conventionally `/ask`). |
+| **Schemamap** | `/schemamap.xml` or `<link rel="schemamap">` | Per-resource JSON-LD (`.jsonld`) endpoints for agent-friendly structured data. |
+Content-Signal, HTTP Link discovery, Markdown source endpoints, llms-full.txt, and feed discovery feed into the **Machine Readability** category; the MCP/A2A/Agent-Skills cards, NLWeb, and schemamap feed into **Agent Interactivity**. The raw findings are also returned under an `agentReadiness` object in `output_format="json"` results.
+---
 ## Rate Limiting
 To prevent overwhelming target servers during batch operations, the MCP server enforces per-domain rate limiting:
@@ -689,6 +715,7 @@ research-mcp/
    - Homepage HTML (static fetch first, Chrome fallback if bot-blocked)
    - sitemap.xml
    - UCP profile (/.well-known/ucp)
+   - Agent-readiness discovery surfaces: /llms-full.txt, /.well-known/mcp/server-card.json, /.well-known/agent-card.json, /.well-known/agent-skills/index.json, /schemamap.xml
 2. **Parse HTML with cheerio** (server-side DOM)
@@ -734,7 +761,7 @@ echo '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":
 **Cause:** Invalid or expired license key.
-**Fix:** Get a valid key at [glippy.dev](https://glippy.dev).
+**Fix:** Get a valid key at [glippy.dev](https://www.glippy.dev).
 ### "Could not reach license server"
@@ -795,16 +822,16 @@ The server checks access rules for these AI crawlers in robots.txt:
 ## License
-See LICENSE file for licensing terms. Get your license key at [glippy.dev](https://glippy.dev).
+See LICENSE file for licensing terms. Get your license key at [glippy.dev](https://www.glippy.dev).
 ---
 ## Support
 - **Integration Guide:** [docs/INTEGRATIONS.md](docs/INTEGRATIONS.md)
-- **Online Documentation:** [glippy.dev/docs](https://glippy.dev)
+- **Online Documentation:** [glippy.dev/docs](https://www.glippy.dev)
 - **Issues:** [github.com/jbobbink/glippy/issues](https://github.com/jbobbink/glippy/issues)
-- **Homepage:** [glippy.dev](https://glippy.dev)
+- **Homepage:** [glippy.dev](https://www.glippy.dev)
 ---

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "glippy-mcp",
-  "version": "0.3.2",
+  "version": "0.4.0",
   "description": "MCP server for GEO (Generative Engine Optimization) analysis — check any domain's AI-readiness",
   "main": "src/index.js",
   "type": "module",
@@ -26,7 +26,7 @@
     "claude",
     "glippy"
   ],
-  "homepage": "https://glippy.dev",
+  "homepage": "https://www.glippy.dev",
   "repository": {
     "type": "git",
     "url": "git+https://github.com/jbobbink/glippy.git"

package/src/geo-checker.js CHANGED Viewed

@@ -430,18 +430,29 @@ function analyseRobotsTxt(text) {
     blocksCrawlers: {},
     hasWildcardDisallow: false,
     sitemapUrls: [],
+    // Content-Signal directive (specification.website / Cloudflare content
+    // signals). Parsed into { search, ai-input, ai-train } -> 'yes'|'no'.
+    contentSignals: null,
   };
   if (!text) return result;
   const lines = text.split(/\r?\n/);
-  // Collect sitemap references.
+  // Collect sitemap references and Content-Signal directives.
   for (const line of lines) {
     const sitemapMatch = line.match(/^\s*Sitemap\s*:\s*(.+)/i);
     if (sitemapMatch) {
       result.sitemapUrls.push(sitemapMatch[1].trim());
     }
+    const signalMatch = line.replace(/#.*$/, '').match(/^\s*Content-Signal\s*:\s*(.+)/i);
+    if (signalMatch) {
+      if (!result.contentSignals) result.contentSignals = {};
+      for (const pair of signalMatch[1].split(',')) {
+        const [k, v] = pair.split('=').map((s) => (s || '').trim().toLowerCase());
+        if (k && v) result.contentSignals[k] = v;
+      }
+    }
   }
   // Build a minimal per-user-agent rule map.
@@ -1922,11 +1933,21 @@ function checkMeta($, currentUrl) {
 // CHECK CATEGORY 6: Machine Readability
 // ---------------------------------------------------------------------------
-function checkMachineReadability($, robotsTxtData, llmsTxtData, responseHeaders) {
+function checkMachineReadability($, robotsTxtData, llmsTxtData, responseHeaders, agentReadiness = null) {
   const checks = [];
   let score = 0;
   let maxScore = 0;
+  // Case-insensitive response header lookup (responseHeaders may use any casing).
+  const getHeader = (name) => {
+    if (!responseHeaders) return '';
+    const lower = name.toLowerCase();
+    for (const k of Object.keys(responseHeaders)) {
+      if (k.toLowerCase() === lower) return String(responseHeaders[k] || '');
+    }
+    return '';
+  };
   // Content in initial HTML (SSR check)
   const mainEl = $('main, [role="main"], article');
   const textContent = (mainEl.length > 0 ? mainEl.first().text() : $('body').text() || '').trim();
@@ -2058,7 +2079,7 @@ function checkMachineReadability($, robotsTxtData, llmsTxtData, responseHeaders)
   // X-Robots-Tag header
   if (responseHeaders) {
-    const xRobotsTag = responseHeaders['x-robots-tag'] || '';
+    const xRobotsTag = getHeader('x-robots-tag');
     if (xRobotsTag) {
       if (xRobotsTag.includes('noindex')) {
         checks.push({ status: 'fail', label: 'X-Robots-Tag header: NOINDEX', detail: `"${xRobotsTag}" - page won't be indexed via header!` });
@@ -2070,6 +2091,92 @@ function checkMachineReadability($, robotsTxtData, llmsTxtData, responseHeaders)
     }
   }
+  // ── Agent-readiness discovery (emerging standards: bonus scoring, no penalty ──
+  // for absence so the long tail of sites that haven't adopted them isn't punished).
+  // Content-Signal directive in robots.txt (search / ai-input / ai-train).
+  if (robotsTxtData && robotsTxtData.contentSignals) {
+    const cs = robotsTxtData.contentSignals;
+    maxScore += 4;
+    score += 4;
+    const summary = Object.entries(cs).map(([k, v]) => `${k}=${v}`).join(', ');
+    // Only ai-input=no hurts AI citation/answer visibility; ai-train=no is a
+    // training-only preference (no citation impact), consistent with how Glippy
+    // treats training vs citation crawlers elsewhere.
+    if (cs['ai-input'] === 'no') {
+      checks.push({ status: 'info', label: 'robots.txt Content-Signal present (restrictive)', detail: `Declares AI usage preferences: ${summary}. ai-input=no asks AI engines not to use the page for live answers.`, found: [summary] });
+    } else {
+      checks.push({ status: 'pass', label: 'robots.txt Content-Signal present', detail: `Machine-readable AI usage preferences: ${summary}`, found: [summary] });
+    }
+  } else if (robotsTxtData && robotsTxtData.exists) {
+    checks.push({ status: 'info', label: 'No Content-Signal directive in robots.txt', detail: 'Add a Content-Signal line (e.g. "Content-Signal: search=yes, ai-input=yes, ai-train=no") to declare AI usage preferences (specification.website)' });
+  }
+  // HTTP Link header discovery (rel=describedby / api-catalog / sitemap / mcp / service-desc).
+  {
+    const linkHeader = getHeader('link');
+    const agentRels = ['describedby', 'api-catalog', 'sitemap', 'mcp', 'service-desc', 'nlweb'];
+    const foundRels = agentRels.filter((rel) => new RegExp(`rel\\s*=\\s*"?${rel}\\b`, 'i').test(linkHeader));
+    if (foundRels.length > 0) {
+      maxScore += 4;
+      score += 4;
+      checks.push({ status: 'pass', label: `HTTP Link header discovery: ${foundRels.join(', ')}`, detail: 'Agents can discover resources from response headers without parsing HTML', found: foundRels });
+    } else {
+      checks.push({ status: 'info', label: 'No agent-discovery HTTP Link headers', detail: 'Expose discovery via Link headers, e.g. Link: </llms.txt>; rel="describedby"; type="text/markdown" (specification.website)' });
+    }
+  }
+  // Per-page Markdown source endpoint (link rel=alternate type=text/markdown, or content negotiation).
+  {
+    const mdLink = $('link[rel="alternate"][type="text/markdown"]').attr('href');
+    const vary = getHeader('vary').toLowerCase();
+    const contentLocation = getHeader('content-location');
+    const negotiated = vary.includes('accept') && /\.md(\?|$)/i.test(contentLocation);
+    if (mdLink || negotiated) {
+      maxScore += 4;
+      score += 4;
+      checks.push({ status: 'pass', label: 'Markdown source endpoint advertised', detail: mdLink ? `<link rel="alternate" type="text/markdown" href="${mdLink}">` : 'Served via content negotiation (Vary: Accept + Content-Location .md)', found: mdLink ? [mdLink] : undefined });
+    } else {
+      checks.push({ status: 'info', label: 'No Markdown source endpoint', detail: 'Serve a .md version of each page and advertise it with <link rel="alternate" type="text/markdown"> for clean agent ingestion (specification.website)' });
+    }
+  }
+  // llms-full.txt (concatenated markdown of the llms.txt pages).
+  if (agentReadiness && agentReadiness.llmsFullTxt) {
+    const lf = agentReadiness.llmsFullTxt;
+    if (lf.exists && !lf.isHtml) {
+      maxScore += 4;
+      const tooBig = lf.sizeBytes > 5 * 1024 * 1024;
+      if (tooBig) {
+        score += 2;
+        checks.push({ status: 'warn', label: 'llms-full.txt found but very large', detail: `${Math.round(lf.sizeBytes / 1024)} KB - over a couple of MB is suspect and may exceed agent context windows` });
+      } else {
+        score += 4;
+        checks.push({ status: 'pass', label: 'llms-full.txt found', detail: `Concatenated markdown corpus for LLMs (${Math.round(lf.sizeBytes / 1024)} KB)` });
+      }
+    } else if (lf.exists && lf.isHtml) {
+      maxScore += 4;
+      checks.push({ status: 'warn', label: 'llms-full.txt served as HTML', detail: 'Serve llms-full.txt as text/markdown or text/plain, not HTML' });
+    } else {
+      checks.push({ status: 'info', label: 'No llms-full.txt found', detail: 'Add /llms-full.txt with the concatenated markdown of pages in llms.txt for full-context AI ingestion (specification.website)' });
+    }
+  }
+  // Feed discovery (RSS / Atom / JSON Feed) - machine-readable formats.
+  {
+    const feedSelectors = 'link[rel="alternate"][type="application/rss+xml"], link[rel="alternate"][type="application/atom+xml"], link[rel="alternate"][type="application/feed+json"], link[rel="alternate"][type="application/json"]';
+    const feeds = $(feedSelectors);
+    if (feeds.length > 0) {
+      maxScore += 3;
+      score += 3;
+      const hrefs = [];
+      feeds.each((_, el) => { const h = $(el).attr('href'); if (h) hrefs.push(h); });
+      checks.push({ status: 'pass', label: `Feed discovery: ${feeds.length} feed(s)`, detail: 'RSS/Atom/JSON feeds give agents a machine-readable content stream', found: hrefs.slice(0, 5) });
+    } else {
+      checks.push({ status: 'info', label: 'No discoverable feed', detail: 'Advertise an RSS/Atom/JSON feed via <link rel="alternate"> for machine-readable content updates (specification.website)' });
+    }
+  }
   return { checks, score: maxScore > 0 ? Math.round((score / maxScore) * 100) : 0, category: 'Machine Readability' };
 }
@@ -3217,11 +3324,21 @@ function checkPerformance($) {
 // (signing_keys, order webhook_url, etc. become required at this version).
 const LATEST_UCP_VERSION = '2026-04-08';
-function checkWebMCP($, pageType, ucpData) {
+function checkWebMCP($, pageType, ucpData, responseHeaders = null, agentReadiness = null) {
   const checks = [];
   let score = 0;
   let maxScore = 0;
+  // Case-insensitive response header lookup.
+  const getHeaderWebMCP = (name) => {
+    if (!responseHeaders) return '';
+    const lower = name.toLowerCase();
+    for (const k of Object.keys(responseHeaders)) {
+      if (k.toLowerCase() === lower) return String(responseHeaders[k] || '');
+    }
+    return '';
+  };
   // ── CHECK 1: Declarative WebMCP Tool Detection (DOM-based) ──
   const webmcpForms = $('form[toolname]');
   const toolCount = webmcpForms.length;
@@ -3869,9 +3986,92 @@ function checkWebMCP($, pageType, ucpData) {
     checks.push({ status: 'info', label: 'Shopify-hosted: dual UCP surface expected', detail: 'Per-shop endpoint at /api/ucp/mcp; global catalog at https://discover.shopifyapps.com/global/mcp' });
   }
+  // ══════════════════════════════════════════════════════
+  // AGENT DISCOVERY SURFACES (specification.website Agent Readiness)
+  // Emerging standards: bonus scoring (credit when present, info when absent).
+  // ══════════════════════════════════════════════════════
+  let hasDiscoverySurface = false;
+  const ar = agentReadiness || {};
+  const linkHeaderWebMCP = getHeaderWebMCP('link');
+  // MCP server discovery: /.well-known/mcp/server-card.json (+ Link rel="mcp").
+  {
+    const card = ar.mcpServerCard;
+    const linkAdvertised = /rel\s*=\s*"?mcp\b/i.test(linkHeaderWebMCP);
+    if (card && card.exists && card.valid) {
+      hasDiscoverySurface = true;
+      maxScore += 5; score += 5;
+      checks.push({ status: 'pass', label: 'MCP server card found', detail: `/.well-known/mcp/server-card.json is published${linkAdvertised ? ' and advertised via Link header' : ''}` });
+    } else if (card && card.exists) {
+      maxScore += 5; score += 2;
+      checks.push({ status: 'warn', label: 'MCP server card invalid JSON', detail: '/.well-known/mcp/server-card.json was reachable but did not parse as JSON' });
+    } else {
+      checks.push({ status: 'info', label: 'No MCP server card', detail: 'Publish /.well-known/mcp/server-card.json (name, version, transport, endpoint, tools) so agents can discover your MCP server (specification.website)' });
+    }
+  }
+  // A2A agent card: /.well-known/agent-card.json.
+  {
+    const card = ar.a2aAgentCard;
+    if (card && card.exists && card.valid) {
+      hasDiscoverySurface = true;
+      maxScore += 4; score += 4;
+      checks.push({ status: 'pass', label: 'A2A agent card found', detail: `/.well-known/agent-card.json is valid${card.skillsCount ? ` with ${card.skillsCount} skill(s)` : ''}` });
+    } else if (card && card.exists) {
+      maxScore += 4; score += 2;
+      checks.push({ status: 'warn', label: 'A2A agent card incomplete', detail: 'agent-card.json is missing required fields (name, description, version) or skills' });
+    } else {
+      checks.push({ status: 'info', label: 'No A2A agent card', detail: 'Publish /.well-known/agent-card.json to let other agents discover and call your services (specification.website)' });
+    }
+  }
+  // Agent Skills discovery: /.well-known/agent-skills/index.json.
+  {
+    const sk = ar.agentSkills;
+    if (sk && sk.exists && sk.valid && sk.schemaOk) {
+      hasDiscoverySurface = true;
+      maxScore += 4; score += 4;
+      checks.push({ status: 'pass', label: 'Agent Skills index found', detail: `/.well-known/agent-skills/index.json published with ${sk.skillsCount} skill(s)` });
+    } else if (sk && sk.exists) {
+      maxScore += 4; score += 2;
+      checks.push({ status: 'warn', label: 'Agent Skills index incomplete', detail: 'index.json should set $schema to the agentskills discovery schema and list skills with digests' });
+    } else {
+      checks.push({ status: 'info', label: 'No Agent Skills discovery', detail: 'Publish /.well-known/agent-skills/index.json to expose reusable agent skills (specification.website)' });
+    }
+  }
+  // NLWeb conversational endpoint (link rel="nlweb" or Link header).
+  {
+    const nlwebLink = $('link[rel="nlweb"]').attr('href');
+    const nlwebHeader = /rel\s*=\s*"?nlweb\b/i.test(linkHeaderWebMCP);
+    if (nlwebLink || nlwebHeader) {
+      hasDiscoverySurface = true;
+      maxScore += 3; score += 3;
+      checks.push({ status: 'pass', label: 'NLWeb endpoint advertised', detail: nlwebLink ? `<link rel="nlweb" href="${nlwebLink}">` : 'Advertised via Link: rel="nlweb"' });
+    } else {
+      checks.push({ status: 'info', label: 'No NLWeb endpoint', detail: 'Expose a natural-language query endpoint (by convention /ask) and advertise it with <link rel="nlweb"> (specification.website)' });
+    }
+  }
+  // Schemamap: /schemamap.xml + per-resource JSON-LD endpoints (link rel="schemamap").
+  {
+    const schemamapLink = $('link[rel="schemamap"]').attr('href');
+    const sm = ar.schemamap;
+    if ((sm && sm.exists && sm.valid) || schemamapLink) {
+      hasDiscoverySurface = true;
+      maxScore += 3; score += 3;
+      const detail = sm && sm.exists
+        ? `/schemamap.xml published${sm.resourceCount ? ` with ${sm.resourceCount} resource(s)` : ''}`
+        : `Advertised via <link rel="schemamap" href="${schemamapLink}">`;
+      checks.push({ status: 'pass', label: 'Schemamap found', detail });
+    } else {
+      checks.push({ status: 'info', label: 'No schemamap', detail: 'Publish /schemamap.xml listing per-resource JSON-LD (.jsonld) endpoints for agent-friendly structured data (specification.website)' });
+    }
+  }
   // Baseline credit for purely informational pages.
-  // If the page has no forms, no WebMCP signals, no UCP profile, and no Shopify
-  // surface, there's nothing for it to expose to agents - WebMCP/UCP are N/A here.
+  // If the page has no forms, no WebMCP signals, no UCP profile, no discovery
+  // surface, and no Shopify surface, there's nothing for it to expose to agents.
   // Without this, content-only pages are capped well below 100 even when there's
   // nothing to fix, dragging the overall score unfairly.
   const totalForms = $('form').length;
@@ -3884,7 +4084,8 @@ function checkWebMCP($, pageType, ucpData) {
     !webmcpSDKFound &&
     !hasSchemaActions &&
     !hasUcp &&
-    !hasShopify;
+    !hasShopify &&
+    !hasDiscoverySurface;
   if (hasNoInteractiveSurface) {
     checks.push({
@@ -4993,7 +5194,7 @@ function checkMultimodal($, jsonLdData) {
  *   hasStructuredData: boolean
  * }}
  */
-function analyseHTML(html, domain, robotsTxtData, llmsTxtData, responseHeaders, pathname = '/', ucpData = null) {
+function analyseHTML(html, domain, robotsTxtData, llmsTxtData, responseHeaders, pathname = '/', ucpData = null, agentReadiness = null) {
   const result = {
     pageType: 'generic',
     categories: [],
@@ -5085,11 +5286,11 @@ function analyseHTML(html, domain, robotsTxtData, llmsTxtData, responseHeaders,
     checkAccessibility($),
     checkInternalLinking($, domain),
     checkMeta($, currentUrl),
-    checkMachineReadability($, robotsTxtData, llmsTxtData, responseHeaders),
+    checkMachineReadability($, robotsTxtData, llmsTxtData, responseHeaders, agentReadiness),
     checkEntity($, jsonLdData),
     checkCitability($),
     checkPerformance($),
-    checkWebMCP($, pageType, ucpData),
+    checkWebMCP($, pageType, ucpData, responseHeaders, agentReadiness),
     checkContentPositioning($),
     checkContentFreshness($, jsonLdData),
     checkInformationDensity($),
@@ -5222,6 +5423,14 @@ async function checkGEO(domain, options = {}) {
       content: null,
       error: null,
     },
+    // Agent-readiness discovery surfaces (specification.website Agent Readiness).
+    agentReadiness: {
+      llmsFullTxt: { exists: false, url: null, sizeBytes: 0, isHtml: false },
+      mcpServerCard: { exists: false, url: null, valid: false },
+      a2aAgentCard: { exists: false, url: null, valid: false, skillsCount: 0 },
+      agentSkills: { exists: false, url: null, valid: false, schemaOk: false, skillsCount: 0 },
+      schemamap: { exists: false, url: null, valid: false, resourceCount: 0 },
+    },
     securityHeaders: {},
     // Multi-page crawl results
     multiPageCrawl: {
@@ -5266,6 +5475,12 @@ async function checkGEO(domain, options = {}) {
   const homepageUrl = `${baseUrl}/`;
   const sitemapUrl = `${baseUrl}/sitemap.xml`;
   const ucpUrl = `${baseUrl}/.well-known/ucp`;
+  // Agent-readiness discovery resources (specification.website / Agent Readiness).
+  const llmsFullUrl = `${baseUrl}/llms-full.txt`;
+  const mcpCardUrl = `${baseUrl}/.well-known/mcp/server-card.json`;
+  const agentCardUrl = `${baseUrl}/.well-known/agent-card.json`;
+  const agentSkillsUrl = `${baseUrl}/.well-known/agent-skills/index.json`;
+  const schemamapUrl = `${baseUrl}/schemamap.xml`;
   output.robotsTxt.url = robotsUrl;
   output.llmsTxt.url = llmsUrl;
@@ -5274,9 +5489,11 @@ async function checkGEO(domain, options = {}) {
   output.ucpProfile.url = ucpUrl;
   let robotsRes, llmsRes, homepageRes, sitemapRes, ucpRes;
+  let llmsFullRes, mcpCardRes, agentCardRes, agentSkillsRes, schemamapRes;
   try {
-    [robotsRes, llmsRes, homepageRes, sitemapRes, ucpRes] = await Promise.all([
+    [robotsRes, llmsRes, homepageRes, sitemapRes, ucpRes,
+      llmsFullRes, mcpCardRes, agentCardRes, agentSkillsRes, schemamapRes] = await Promise.all([
       throttledFetchUrl(robotsUrl, FETCH_TIMEOUT_MS, MAX_TEXT_BODY_SIZE).catch(() => ({ body: null, statusCode: null, headers: {} })),
       throttledFetchUrl(llmsUrl, FETCH_TIMEOUT_MS, MAX_TEXT_BODY_SIZE).catch(() => ({ body: null, statusCode: null, headers: {} })),
       renderMode === 'chrome'
@@ -5284,6 +5501,11 @@ async function checkGEO(domain, options = {}) {
         : throttledFetchUrl(homepageUrl).catch(() => ({ body: null, statusCode: null, headers: {} })),
       throttledFetchUrl(sitemapUrl, FETCH_TIMEOUT_MS, MAX_TEXT_BODY_SIZE).catch(() => ({ body: null, statusCode: null, headers: {} })),
       throttledFetchUrl(ucpUrl, FETCH_TIMEOUT_MS, MAX_TEXT_BODY_SIZE).catch(() => ({ body: null, statusCode: null, headers: {} })),
+      throttledFetchUrl(llmsFullUrl, FETCH_TIMEOUT_MS, MAX_TEXT_BODY_SIZE).catch(() => ({ body: null, statusCode: null, headers: {} })),
+      throttledFetchUrl(mcpCardUrl, FETCH_TIMEOUT_MS, MAX_TEXT_BODY_SIZE).catch(() => ({ body: null, statusCode: null, headers: {} })),
+      throttledFetchUrl(agentCardUrl, FETCH_TIMEOUT_MS, MAX_TEXT_BODY_SIZE).catch(() => ({ body: null, statusCode: null, headers: {} })),
+      throttledFetchUrl(agentSkillsUrl, FETCH_TIMEOUT_MS, MAX_TEXT_BODY_SIZE).catch(() => ({ body: null, statusCode: null, headers: {} })),
+      throttledFetchUrl(schemamapUrl, FETCH_TIMEOUT_MS, MAX_TEXT_BODY_SIZE).catch(() => ({ body: null, statusCode: null, headers: {} })),
     ]);
   } catch (err) {
     output.error = `Failed to fetch resources: ${err.message}`;
@@ -5324,6 +5546,7 @@ async function checkGEO(domain, options = {}) {
       output.robotsTxt.blocksCrawlers = analysis.blocksCrawlers;
       output.robotsTxt.hasWildcardDisallow = analysis.hasWildcardDisallow;
       output.robotsTxt.sitemapReferences = analysis.sitemapUrls;
+      output.robotsTxt.contentSignals = analysis.contentSignals;
     }
   } catch (err) {
     output.robotsTxt.error = err.message;
@@ -5339,6 +5562,66 @@ async function checkGEO(domain, options = {}) {
     output.llmsTxt.error = err.message;
   }
+  // --- Agent-readiness discovery surfaces ---
+  try {
+    const ar = output.agentReadiness;
+    // /llms-full.txt: concatenated markdown of the pages in llms.txt.
+    ar.llmsFullTxt.url = llmsFullUrl;
+    if (llmsFullRes.statusCode === 200 && llmsFullRes.body) {
+      const body = llmsFullRes.body;
+      const trimmed = body.trimStart().toLowerCase();
+      ar.llmsFullTxt.exists = true;
+      ar.llmsFullTxt.sizeBytes = Buffer.byteLength(body);
+      ar.llmsFullTxt.isHtml = trimmed.startsWith('<!') || trimmed.startsWith('<html');
+    }
+    // /.well-known/mcp/server-card.json: MCP server discovery.
+    ar.mcpServerCard.url = mcpCardUrl;
+    if (mcpCardRes.statusCode === 200 && mcpCardRes.body) {
+      ar.mcpServerCard.exists = true;
+      try { JSON.parse(mcpCardRes.body); ar.mcpServerCard.valid = true; } catch { /* invalid json */ }
+    }
+    // /.well-known/agent-card.json: A2A agent card.
+    ar.a2aAgentCard.url = agentCardUrl;
+    if (agentCardRes.statusCode === 200 && agentCardRes.body) {
+      ar.a2aAgentCard.exists = true;
+      try {
+        const card = JSON.parse(agentCardRes.body);
+        const requiredOk = !!(card && card.name && card.description && (card.version || card.protocolVersion));
+        ar.a2aAgentCard.valid = requiredOk;
+        ar.a2aAgentCard.skillsCount = Array.isArray(card && card.skills) ? card.skills.length : 0;
+      } catch { /* invalid json */ }
+    }
+    // /.well-known/agent-skills/index.json: Agent Skills discovery.
+    ar.agentSkills.url = agentSkillsUrl;
+    if (agentSkillsRes.statusCode === 200 && agentSkillsRes.body) {
+      ar.agentSkills.exists = true;
+      try {
+        const idx = JSON.parse(agentSkillsRes.body);
+        ar.agentSkills.valid = true;
+        ar.agentSkills.schemaOk = typeof idx.$schema === 'string' && idx.$schema.includes('agentskills');
+        ar.agentSkills.skillsCount = Array.isArray(idx.skills) ? idx.skills.length : 0;
+      } catch { /* invalid json */ }
+    }
+    // /schemamap.xml: discoverable JSON-LD endpoints per resource.
+    ar.schemamap.url = schemamapUrl;
+    if (schemamapRes.statusCode === 200 && schemamapRes.body) {
+      ar.schemamap.exists = true;
+      const body = schemamapRes.body;
+      if (body.includes('<schemamap') || body.includes('<resource')) {
+        ar.schemamap.valid = true;
+        const matches = body.match(/<resource[\s>]/g);
+        ar.schemamap.resourceCount = matches ? matches.length : 0;
+      }
+    }
+  } catch (err) {
+    // Non-critical: leave defaults.
+  }
   // --- /.well-known/ucp ---
   try {
     if (ucpRes.statusCode === 200 && ucpRes.body) {
@@ -5410,6 +5693,7 @@ async function checkGEO(domain, options = {}) {
         homepageRes.headers || {},
         '/',
         output.ucpProfile,
+        output.agentReadiness,
       );
     } else {
       output.homepage.error =
@@ -5511,6 +5795,7 @@ async function checkGEO(domain, options = {}) {
                 res.headers || {},
                 pathname,
                 output.ucpProfile,
+                output.agentReadiness,
               );
               return { url: pageUrl, analysis, error: null };
             }

package/src/index.js CHANGED Viewed

@@ -1609,7 +1609,7 @@ function bulkHTMLScript() {
 const server = new McpServer({
   name: "glippy-geo",
-  version: "0.1.0",
+  version: "0.4.0",
 });
 // ---------------------------------------------------------------------------
@@ -2137,15 +2137,16 @@ server.tool(
   "Analyse multiple domains in parallel and compare their GEO scores side by side. " +
     "Returns a comparison table with overall scores, per-category breakdowns, and a ranked summary. " +
     "Useful for competitive analysis or auditing a portfolio of sites. " +
+    "Accepts up to 50 domains per call - for larger lists, split them across multiple runs and merge the results. " +
     "Requires Pro or Agency tier. " +
     "Use output_format='json' to get raw results that can be passed to export_bulk_report.",
   {
     domains: z
       .array(z.string())
       .min(2)
-      .max(10)
+      .max(50, "compare_domains accepts at most 50 domains per call. Split larger lists across multiple runs and merge the results.")
       .describe(
-        'List of domains to compare, e.g. ["example.com", "competitor.com"]. Do not include https:// prefix.'
+        'List of 2-50 domains to compare, e.g. ["example.com", "competitor.com"]. Do not include https:// prefix. For more than 50 domains, run multiple times and combine the output.'
       ),
     max_pages: z
       .number()
@@ -2178,15 +2179,22 @@ server.tool(
     const maxPages = max_pages ?? 10;
     const renderMode = render_mode ?? "static";
-    // Run all analyses in parallel
-    const results = await Promise.allSettled(
-      domains.map((domain) =>
-        checkGEO(domain, { maxPages, renderMode }).then((result) => ({
-          domain,
-          result,
-        }))
-      )
-    );
+    // Cap concurrent domain analyses so a 50-domain × 10-page run does not
+    // fan out into 500 simultaneous fetches.
+    const DOMAIN_CONCURRENCY = 10;
+    const results = [];
+    for (let i = 0; i < domains.length; i += DOMAIN_CONCURRENCY) {
+      const batch = domains.slice(i, i + DOMAIN_CONCURRENCY);
+      const batchResults = await Promise.allSettled(
+        batch.map((domain) =>
+          checkGEO(domain, { maxPages, renderMode }).then((result) => ({
+            domain,
+            result,
+          }))
+        )
+      );
+      results.push(...batchResults);
+    }
     // JSON output mode - return raw results for use with export_bulk_report
     if (output_format === "json") {
@@ -3085,10 +3093,10 @@ server.tool(
     domains: z
       .array(z.string())
       .min(2)
-      .max(10)
+      .max(50, "export_bulk_report accepts at most 50 domains per call. Split larger lists across multiple runs.")
       .optional()
       .describe(
-        'Compare multiple domains. E.g. ["example.com", "competitor.com"]. Do not include https://.'
+        'Compare 2-50 domains. E.g. ["example.com", "competitor.com"]. Do not include https://. For more than 50, run multiple times.'
       ),
     urls: z
       .array(z.string())
@@ -3258,14 +3266,20 @@ server.tool(
       // ------------------------------------------------------------------
       if (domains) {
         const maxPages = max_pages ?? 10;
-        const results = await Promise.allSettled(
-          domains.map((domain) =>
-            checkGEO(domain, { maxPages, renderMode }).then((result) => ({
-              domain,
-              result,
-            }))
-          )
-        );
+        const DOMAIN_CONCURRENCY = 10;
+        const results = [];
+        for (let i = 0; i < domains.length; i += DOMAIN_CONCURRENCY) {
+          const batch = domains.slice(i, i + DOMAIN_CONCURRENCY);
+          const batchResults = await Promise.allSettled(
+            batch.map((domain) =>
+              checkGEO(domain, { maxPages, renderMode }).then((result) => ({
+                domain,
+                result,
+              }))
+            )
+          );
+          results.push(...batchResults);
+        }
         const entries = [];
         for (const r of results) {