glippy-mcp 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +2 -2
- package/README.md +36 -9
- package/package.json +2 -2
- package/src/geo-checker.js +296 -11
- package/src/index.js +36 -22
package/LICENSE
CHANGED
|
@@ -14,7 +14,7 @@ you a non-exclusive, non-transferable, revocable license to install and
|
|
|
14
14
|
use the Software solely for your own internal business or personal use.
|
|
15
15
|
|
|
16
16
|
A valid, paid license key (format: GLMCP-XXXX-XXXX-XXXX) is required to
|
|
17
|
-
use the Software. License keys may be purchased at https://glippy.dev.
|
|
17
|
+
use the Software. License keys may be purchased at https://www.glippy.dev.
|
|
18
18
|
Running the Software without a valid license key, or in a manner that
|
|
19
19
|
circumvents license verification, is not permitted.
|
|
20
20
|
|
|
@@ -61,4 +61,4 @@ THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
61
61
|
This Agreement is governed by the laws of the Netherlands, without regard
|
|
62
62
|
to its conflict of laws principles.
|
|
63
63
|
|
|
64
|
-
For license purchases or enquiries: https://glippy.dev
|
|
64
|
+
For license purchases or enquiries: https://www.glippy.dev
|
package/README.md
CHANGED
|
@@ -12,6 +12,7 @@ It wraps the Glippy desktop app's server-side analysis engine (`geo-checker.js`)
|
|
|
12
12
|
- Full 16-category GEO analysis with weighted scoring
|
|
13
13
|
- robots.txt AI crawler access detection
|
|
14
14
|
- llms.txt file discovery and parsing
|
|
15
|
+
- **Agent-readiness discovery** - detects emerging agent standards (Content-Signal, llms-full.txt, MCP/A2A/Agent-Skills cards, schemamap, NLWeb, feed discovery)
|
|
15
16
|
- Sitemap crawling and multi-page analysis
|
|
16
17
|
- Domain comparison and competitive analysis
|
|
17
18
|
- Export to styled Markdown or HTML reports
|
|
@@ -40,6 +41,7 @@ It wraps the Glippy desktop app's server-side analysis engine (`geo-checker.js`)
|
|
|
40
41
|
- [export_report](#export_report)
|
|
41
42
|
- [export_bulk_report](#export_bulk_report)
|
|
42
43
|
- [GEO Scoring Categories](#geo-scoring-categories)
|
|
44
|
+
- [Agent-Readiness Discovery](#agent-readiness-discovery)
|
|
43
45
|
- [Rate Limiting](#rate-limiting)
|
|
44
46
|
- [Output Formats](#output-formats)
|
|
45
47
|
- [Chrome Rendering Fallback](#chrome-rendering-fallback)
|
|
@@ -78,7 +80,7 @@ npx -y glippy-mcp
|
|
|
78
80
|
|
|
79
81
|
### License Key
|
|
80
82
|
|
|
81
|
-
A valid Glippy MCP license key (`GLMCP-XXXX-XXXX-XXXX`) is required. Get one at [glippy.dev](https://glippy.dev).
|
|
83
|
+
A valid Glippy MCP license key (`GLMCP-XXXX-XXXX-XXXX`) is required. Get one at [glippy.dev](https://www.glippy.dev).
|
|
82
84
|
|
|
83
85
|
The server validates the key against the Glippy API on first use and caches the result for 24 hours. **Analysis runs locally on your machine** — only the license check calls the server.
|
|
84
86
|
|
|
@@ -234,6 +236,7 @@ Check which AI crawlers are blocked on example.com
|
|
|
234
236
|
- AmazonBot
|
|
235
237
|
- cohere-ai
|
|
236
238
|
- Sitemap references found in robots.txt
|
|
239
|
+
- Content-Signal directive (`search` / `ai-input` / `ai-train` preferences), when present
|
|
237
240
|
|
|
238
241
|
---
|
|
239
242
|
|
|
@@ -300,7 +303,7 @@ Analyse multiple domains in parallel and compare scores.
|
|
|
300
303
|
|
|
301
304
|
| Parameter | Type | Required | Description |
|
|
302
305
|
|-----------|------|----------|-------------|
|
|
303
|
-
| `domains` | array[string] | Yes | List of 2-
|
|
306
|
+
| `domains` | array[string] | Yes | List of 2-50 domains to compare, e.g. `["example.com", "competitor.com"]`. Do not include `https://` prefix. For more than 50 domains, split across multiple runs and merge the results. |
|
|
304
307
|
| `max_pages` | integer | No | Maximum pages to crawl per domain (1-10). Default: `10`. |
|
|
305
308
|
| `render_mode` | enum | No | `"static"` (default), `"auto"` (static with Chrome fallback on bot-block), or `"chrome"` (always Chrome). See [Chrome Rendering Fallback](#chrome-rendering-fallback). |
|
|
306
309
|
| `output_format` | enum | No | `"text"` (default) for comparison table, `"json"` for raw results to pass to `export_bulk_report`. |
|
|
@@ -428,7 +431,7 @@ Generate a styled report for bulk analysis.
|
|
|
428
431
|
| Parameter | Type | Required | Description |
|
|
429
432
|
|-----------|------|----------|-------------|
|
|
430
433
|
| `format` | enum | Yes | Report format: `"markdown"` or `"html"` |
|
|
431
|
-
| `domains` | array[string] | No* | Compare 2-
|
|
434
|
+
| `domains` | array[string] | No* | Compare 2-50 domains. Do not include `https://`. For more than 50, run multiple times. |
|
|
432
435
|
| `urls` | array[string] | No* | Analyse 1-50,000 specific URLs. Include `https://`. |
|
|
433
436
|
| `sitemap_url` | string | No* | Crawl a sitemap URL. |
|
|
434
437
|
| `analysis_results` | object | No* | Pre-computed results from `compare_domains`, `analyze_urls`, or `analyze_sitemap` (with `output_format="json"`). |
|
|
@@ -470,11 +473,11 @@ The analysis evaluates 16 categories, each with a weight reflecting its importan
|
|
|
470
473
|
| 3 | **Accessibility for Agents** | 1.0x | Lang attribute, alt text on images, ARIA labels, descriptive link text |
|
|
471
474
|
| 4 | **Internal Linking** | 1.0x | Link density, navigation structure, breadcrumb markup |
|
|
472
475
|
| 5 | **Meta & Discoverability** | 1.0x | Title, meta description, canonical URL, Open Graph tags, hreflang |
|
|
473
|
-
| 6 | **Machine Readability** | 1.5x | SSR detection, bot blocking checks, robots.txt rules, llms.txt presence
|
|
476
|
+
| 6 | **Machine Readability** | 1.5x | SSR detection, bot blocking checks, robots.txt rules, llms.txt presence*, robots.txt Content-Signal directive, llms-full.txt, HTTP Link discovery headers, Markdown source endpoints, RSS/Atom/JSON feed discovery |
|
|
474
477
|
| 7 | **Entity & Authority** | 1.0x | Author info, publication dates, organization schema, E-E-A-T signals, credentials, editorial policy, contact completeness |
|
|
475
478
|
| 8 | **Citability & Answer-Readiness** | 1.3x | FAQ content, data tables, lists, lead paragraph quality |
|
|
476
479
|
| 9 | **Performance & Crawlability** | 0.3x | Image dimensions, lazy loading, resource hints |
|
|
477
|
-
| 10 | **Agent Interactivity** | 0.2x | WebMCP tools, form annotations, agent-callable actions |
|
|
480
|
+
| 10 | **Agent Interactivity** | 0.2x | WebMCP tools, form annotations, agent-callable actions, MCP server card (`/.well-known/mcp/server-card.json`), A2A agent card, Agent-Skills index, NLWeb endpoint, schemamap |
|
|
478
481
|
| 11 | **Content Positioning** | 1.2x | Brand differentiation, proof points, social proof |
|
|
479
482
|
| 12 | **Content Freshness** | 0.8x | Date signals, content age, temporal language |
|
|
480
483
|
| 13 | **Information Density** | 1.0x | Substantive-to-filler ratio, section depth, claim-evidence pairing |
|
|
@@ -492,6 +495,29 @@ The analysis evaluates 16 categories, each with a weight reflecting its importan
|
|
|
492
495
|
|
|
493
496
|
---
|
|
494
497
|
|
|
498
|
+
## Agent-Readiness Discovery
|
|
499
|
+
|
|
500
|
+
Alongside the established checks, the server probes a set of **emerging agent-readiness standards** (largely from [specification.website](https://specification.website)). These surfaces let agents discover and consume a site without scraping HTML.
|
|
501
|
+
|
|
502
|
+
These checks are **bonus-scored**: a site gets credit when a surface is present, but absence is reported as informational guidance rather than a penalty. This keeps the long tail of sites that have not adopted these new standards from being unfairly marked down, while still rewarding early adopters.
|
|
503
|
+
|
|
504
|
+
| Surface | Where it's checked | What it signals |
|
|
505
|
+
|---------|--------------------|-----------------|
|
|
506
|
+
| **Content-Signal** | robots.txt directive | Machine-readable AI usage preferences (`search` / `ai-input` / `ai-train`). Only `ai-input=no` affects AI answer visibility; `ai-train=no` is treated as a training-only preference with no citation impact. |
|
|
507
|
+
| **llms-full.txt** | `/llms-full.txt` | Concatenated Markdown corpus of the pages listed in llms.txt, for full-context ingestion. Very large files (>5 MB) are flagged. |
|
|
508
|
+
| **HTTP Link discovery** | response `Link` header | Resource discovery via headers (`rel="describedby"`, `api-catalog`, `sitemap`, `mcp`, `service-desc`, `nlweb`) without parsing HTML. |
|
|
509
|
+
| **Markdown source endpoint** | `<link rel="alternate" type="text/markdown">` or content negotiation | A clean `.md` version of each page for agent ingestion. |
|
|
510
|
+
| **Feed discovery** | `<link rel="alternate">` | RSS / Atom / JSON feeds as a machine-readable content stream. |
|
|
511
|
+
| **MCP server card** | `/.well-known/mcp/server-card.json` | Discoverable MCP server (name, version, transport, endpoint, tools). |
|
|
512
|
+
| **A2A agent card** | `/.well-known/agent-card.json` | Agent-to-agent discovery with declared skills. |
|
|
513
|
+
| **Agent-Skills index** | `/.well-known/agent-skills/index.json` | Reusable agent skills exposed with digests. |
|
|
514
|
+
| **NLWeb endpoint** | `<link rel="nlweb">` or `Link` header | Natural-language query endpoint (conventionally `/ask`). |
|
|
515
|
+
| **Schemamap** | `/schemamap.xml` or `<link rel="schemamap">` | Per-resource JSON-LD (`.jsonld`) endpoints for agent-friendly structured data. |
|
|
516
|
+
|
|
517
|
+
Content-Signal, HTTP Link discovery, Markdown source endpoints, llms-full.txt, and feed discovery feed into the **Machine Readability** category; the MCP/A2A/Agent-Skills cards, NLWeb, and schemamap feed into **Agent Interactivity**. The raw findings are also returned under an `agentReadiness` object in `output_format="json"` results.
|
|
518
|
+
|
|
519
|
+
---
|
|
520
|
+
|
|
495
521
|
## Rate Limiting
|
|
496
522
|
|
|
497
523
|
To prevent overwhelming target servers during batch operations, the MCP server enforces per-domain rate limiting:
|
|
@@ -689,6 +715,7 @@ research-mcp/
|
|
|
689
715
|
- Homepage HTML (static fetch first, Chrome fallback if bot-blocked)
|
|
690
716
|
- sitemap.xml
|
|
691
717
|
- UCP profile (/.well-known/ucp)
|
|
718
|
+
- Agent-readiness discovery surfaces: /llms-full.txt, /.well-known/mcp/server-card.json, /.well-known/agent-card.json, /.well-known/agent-skills/index.json, /schemamap.xml
|
|
692
719
|
|
|
693
720
|
2. **Parse HTML with cheerio** (server-side DOM)
|
|
694
721
|
|
|
@@ -734,7 +761,7 @@ echo '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":
|
|
|
734
761
|
|
|
735
762
|
**Cause:** Invalid or expired license key.
|
|
736
763
|
|
|
737
|
-
**Fix:** Get a valid key at [glippy.dev](https://glippy.dev).
|
|
764
|
+
**Fix:** Get a valid key at [glippy.dev](https://www.glippy.dev).
|
|
738
765
|
|
|
739
766
|
### "Could not reach license server"
|
|
740
767
|
|
|
@@ -795,16 +822,16 @@ The server checks access rules for these AI crawlers in robots.txt:
|
|
|
795
822
|
|
|
796
823
|
## License
|
|
797
824
|
|
|
798
|
-
See LICENSE file for licensing terms. Get your license key at [glippy.dev](https://glippy.dev).
|
|
825
|
+
See LICENSE file for licensing terms. Get your license key at [glippy.dev](https://www.glippy.dev).
|
|
799
826
|
|
|
800
827
|
---
|
|
801
828
|
|
|
802
829
|
## Support
|
|
803
830
|
|
|
804
831
|
- **Integration Guide:** [docs/INTEGRATIONS.md](docs/INTEGRATIONS.md)
|
|
805
|
-
- **Online Documentation:** [glippy.dev/docs](https://glippy.dev)
|
|
832
|
+
- **Online Documentation:** [glippy.dev/docs](https://www.glippy.dev)
|
|
806
833
|
- **Issues:** [github.com/jbobbink/glippy/issues](https://github.com/jbobbink/glippy/issues)
|
|
807
|
-
- **Homepage:** [glippy.dev](https://glippy.dev)
|
|
834
|
+
- **Homepage:** [glippy.dev](https://www.glippy.dev)
|
|
808
835
|
|
|
809
836
|
---
|
|
810
837
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "glippy-mcp",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.0",
|
|
4
4
|
"description": "MCP server for GEO (Generative Engine Optimization) analysis — check any domain's AI-readiness",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"type": "module",
|
|
@@ -26,7 +26,7 @@
|
|
|
26
26
|
"claude",
|
|
27
27
|
"glippy"
|
|
28
28
|
],
|
|
29
|
-
"homepage": "https://glippy.dev",
|
|
29
|
+
"homepage": "https://www.glippy.dev",
|
|
30
30
|
"repository": {
|
|
31
31
|
"type": "git",
|
|
32
32
|
"url": "git+https://github.com/jbobbink/glippy.git"
|
package/src/geo-checker.js
CHANGED
|
@@ -430,18 +430,29 @@ function analyseRobotsTxt(text) {
|
|
|
430
430
|
blocksCrawlers: {},
|
|
431
431
|
hasWildcardDisallow: false,
|
|
432
432
|
sitemapUrls: [],
|
|
433
|
+
// Content-Signal directive (specification.website / Cloudflare content
|
|
434
|
+
// signals). Parsed into { search, ai-input, ai-train } -> 'yes'|'no'.
|
|
435
|
+
contentSignals: null,
|
|
433
436
|
};
|
|
434
437
|
|
|
435
438
|
if (!text) return result;
|
|
436
439
|
|
|
437
440
|
const lines = text.split(/\r?\n/);
|
|
438
441
|
|
|
439
|
-
// Collect sitemap references.
|
|
442
|
+
// Collect sitemap references and Content-Signal directives.
|
|
440
443
|
for (const line of lines) {
|
|
441
444
|
const sitemapMatch = line.match(/^\s*Sitemap\s*:\s*(.+)/i);
|
|
442
445
|
if (sitemapMatch) {
|
|
443
446
|
result.sitemapUrls.push(sitemapMatch[1].trim());
|
|
444
447
|
}
|
|
448
|
+
const signalMatch = line.replace(/#.*$/, '').match(/^\s*Content-Signal\s*:\s*(.+)/i);
|
|
449
|
+
if (signalMatch) {
|
|
450
|
+
if (!result.contentSignals) result.contentSignals = {};
|
|
451
|
+
for (const pair of signalMatch[1].split(',')) {
|
|
452
|
+
const [k, v] = pair.split('=').map((s) => (s || '').trim().toLowerCase());
|
|
453
|
+
if (k && v) result.contentSignals[k] = v;
|
|
454
|
+
}
|
|
455
|
+
}
|
|
445
456
|
}
|
|
446
457
|
|
|
447
458
|
// Build a minimal per-user-agent rule map.
|
|
@@ -1922,11 +1933,21 @@ function checkMeta($, currentUrl) {
|
|
|
1922
1933
|
// CHECK CATEGORY 6: Machine Readability
|
|
1923
1934
|
// ---------------------------------------------------------------------------
|
|
1924
1935
|
|
|
1925
|
-
function checkMachineReadability($, robotsTxtData, llmsTxtData, responseHeaders) {
|
|
1936
|
+
function checkMachineReadability($, robotsTxtData, llmsTxtData, responseHeaders, agentReadiness = null) {
|
|
1926
1937
|
const checks = [];
|
|
1927
1938
|
let score = 0;
|
|
1928
1939
|
let maxScore = 0;
|
|
1929
1940
|
|
|
1941
|
+
// Case-insensitive response header lookup (responseHeaders may use any casing).
|
|
1942
|
+
const getHeader = (name) => {
|
|
1943
|
+
if (!responseHeaders) return '';
|
|
1944
|
+
const lower = name.toLowerCase();
|
|
1945
|
+
for (const k of Object.keys(responseHeaders)) {
|
|
1946
|
+
if (k.toLowerCase() === lower) return String(responseHeaders[k] || '');
|
|
1947
|
+
}
|
|
1948
|
+
return '';
|
|
1949
|
+
};
|
|
1950
|
+
|
|
1930
1951
|
// Content in initial HTML (SSR check)
|
|
1931
1952
|
const mainEl = $('main, [role="main"], article');
|
|
1932
1953
|
const textContent = (mainEl.length > 0 ? mainEl.first().text() : $('body').text() || '').trim();
|
|
@@ -2058,7 +2079,7 @@ function checkMachineReadability($, robotsTxtData, llmsTxtData, responseHeaders)
|
|
|
2058
2079
|
|
|
2059
2080
|
// X-Robots-Tag header
|
|
2060
2081
|
if (responseHeaders) {
|
|
2061
|
-
const xRobotsTag =
|
|
2082
|
+
const xRobotsTag = getHeader('x-robots-tag');
|
|
2062
2083
|
if (xRobotsTag) {
|
|
2063
2084
|
if (xRobotsTag.includes('noindex')) {
|
|
2064
2085
|
checks.push({ status: 'fail', label: 'X-Robots-Tag header: NOINDEX', detail: `"${xRobotsTag}" - page won't be indexed via header!` });
|
|
@@ -2070,6 +2091,92 @@ function checkMachineReadability($, robotsTxtData, llmsTxtData, responseHeaders)
|
|
|
2070
2091
|
}
|
|
2071
2092
|
}
|
|
2072
2093
|
|
|
2094
|
+
// ── Agent-readiness discovery (emerging standards: bonus scoring, no penalty ──
|
|
2095
|
+
// for absence so the long tail of sites that haven't adopted them isn't punished).
|
|
2096
|
+
|
|
2097
|
+
// Content-Signal directive in robots.txt (search / ai-input / ai-train).
|
|
2098
|
+
if (robotsTxtData && robotsTxtData.contentSignals) {
|
|
2099
|
+
const cs = robotsTxtData.contentSignals;
|
|
2100
|
+
maxScore += 4;
|
|
2101
|
+
score += 4;
|
|
2102
|
+
const summary = Object.entries(cs).map(([k, v]) => `${k}=${v}`).join(', ');
|
|
2103
|
+
// Only ai-input=no hurts AI citation/answer visibility; ai-train=no is a
|
|
2104
|
+
// training-only preference (no citation impact), consistent with how Glippy
|
|
2105
|
+
// treats training vs citation crawlers elsewhere.
|
|
2106
|
+
if (cs['ai-input'] === 'no') {
|
|
2107
|
+
checks.push({ status: 'info', label: 'robots.txt Content-Signal present (restrictive)', detail: `Declares AI usage preferences: ${summary}. ai-input=no asks AI engines not to use the page for live answers.`, found: [summary] });
|
|
2108
|
+
} else {
|
|
2109
|
+
checks.push({ status: 'pass', label: 'robots.txt Content-Signal present', detail: `Machine-readable AI usage preferences: ${summary}`, found: [summary] });
|
|
2110
|
+
}
|
|
2111
|
+
} else if (robotsTxtData && robotsTxtData.exists) {
|
|
2112
|
+
checks.push({ status: 'info', label: 'No Content-Signal directive in robots.txt', detail: 'Add a Content-Signal line (e.g. "Content-Signal: search=yes, ai-input=yes, ai-train=no") to declare AI usage preferences (specification.website)' });
|
|
2113
|
+
}
|
|
2114
|
+
|
|
2115
|
+
// HTTP Link header discovery (rel=describedby / api-catalog / sitemap / mcp / service-desc).
|
|
2116
|
+
{
|
|
2117
|
+
const linkHeader = getHeader('link');
|
|
2118
|
+
const agentRels = ['describedby', 'api-catalog', 'sitemap', 'mcp', 'service-desc', 'nlweb'];
|
|
2119
|
+
const foundRels = agentRels.filter((rel) => new RegExp(`rel\\s*=\\s*"?${rel}\\b`, 'i').test(linkHeader));
|
|
2120
|
+
if (foundRels.length > 0) {
|
|
2121
|
+
maxScore += 4;
|
|
2122
|
+
score += 4;
|
|
2123
|
+
checks.push({ status: 'pass', label: `HTTP Link header discovery: ${foundRels.join(', ')}`, detail: 'Agents can discover resources from response headers without parsing HTML', found: foundRels });
|
|
2124
|
+
} else {
|
|
2125
|
+
checks.push({ status: 'info', label: 'No agent-discovery HTTP Link headers', detail: 'Expose discovery via Link headers, e.g. Link: </llms.txt>; rel="describedby"; type="text/markdown" (specification.website)' });
|
|
2126
|
+
}
|
|
2127
|
+
}
|
|
2128
|
+
|
|
2129
|
+
// Per-page Markdown source endpoint (link rel=alternate type=text/markdown, or content negotiation).
|
|
2130
|
+
{
|
|
2131
|
+
const mdLink = $('link[rel="alternate"][type="text/markdown"]').attr('href');
|
|
2132
|
+
const vary = getHeader('vary').toLowerCase();
|
|
2133
|
+
const contentLocation = getHeader('content-location');
|
|
2134
|
+
const negotiated = vary.includes('accept') && /\.md(\?|$)/i.test(contentLocation);
|
|
2135
|
+
if (mdLink || negotiated) {
|
|
2136
|
+
maxScore += 4;
|
|
2137
|
+
score += 4;
|
|
2138
|
+
checks.push({ status: 'pass', label: 'Markdown source endpoint advertised', detail: mdLink ? `<link rel="alternate" type="text/markdown" href="${mdLink}">` : 'Served via content negotiation (Vary: Accept + Content-Location .md)', found: mdLink ? [mdLink] : undefined });
|
|
2139
|
+
} else {
|
|
2140
|
+
checks.push({ status: 'info', label: 'No Markdown source endpoint', detail: 'Serve a .md version of each page and advertise it with <link rel="alternate" type="text/markdown"> for clean agent ingestion (specification.website)' });
|
|
2141
|
+
}
|
|
2142
|
+
}
|
|
2143
|
+
|
|
2144
|
+
// llms-full.txt (concatenated markdown of the llms.txt pages).
|
|
2145
|
+
if (agentReadiness && agentReadiness.llmsFullTxt) {
|
|
2146
|
+
const lf = agentReadiness.llmsFullTxt;
|
|
2147
|
+
if (lf.exists && !lf.isHtml) {
|
|
2148
|
+
maxScore += 4;
|
|
2149
|
+
const tooBig = lf.sizeBytes > 5 * 1024 * 1024;
|
|
2150
|
+
if (tooBig) {
|
|
2151
|
+
score += 2;
|
|
2152
|
+
checks.push({ status: 'warn', label: 'llms-full.txt found but very large', detail: `${Math.round(lf.sizeBytes / 1024)} KB - over a couple of MB is suspect and may exceed agent context windows` });
|
|
2153
|
+
} else {
|
|
2154
|
+
score += 4;
|
|
2155
|
+
checks.push({ status: 'pass', label: 'llms-full.txt found', detail: `Concatenated markdown corpus for LLMs (${Math.round(lf.sizeBytes / 1024)} KB)` });
|
|
2156
|
+
}
|
|
2157
|
+
} else if (lf.exists && lf.isHtml) {
|
|
2158
|
+
maxScore += 4;
|
|
2159
|
+
checks.push({ status: 'warn', label: 'llms-full.txt served as HTML', detail: 'Serve llms-full.txt as text/markdown or text/plain, not HTML' });
|
|
2160
|
+
} else {
|
|
2161
|
+
checks.push({ status: 'info', label: 'No llms-full.txt found', detail: 'Add /llms-full.txt with the concatenated markdown of pages in llms.txt for full-context AI ingestion (specification.website)' });
|
|
2162
|
+
}
|
|
2163
|
+
}
|
|
2164
|
+
|
|
2165
|
+
// Feed discovery (RSS / Atom / JSON Feed) - machine-readable formats.
|
|
2166
|
+
{
|
|
2167
|
+
const feedSelectors = 'link[rel="alternate"][type="application/rss+xml"], link[rel="alternate"][type="application/atom+xml"], link[rel="alternate"][type="application/feed+json"], link[rel="alternate"][type="application/json"]';
|
|
2168
|
+
const feeds = $(feedSelectors);
|
|
2169
|
+
if (feeds.length > 0) {
|
|
2170
|
+
maxScore += 3;
|
|
2171
|
+
score += 3;
|
|
2172
|
+
const hrefs = [];
|
|
2173
|
+
feeds.each((_, el) => { const h = $(el).attr('href'); if (h) hrefs.push(h); });
|
|
2174
|
+
checks.push({ status: 'pass', label: `Feed discovery: ${feeds.length} feed(s)`, detail: 'RSS/Atom/JSON feeds give agents a machine-readable content stream', found: hrefs.slice(0, 5) });
|
|
2175
|
+
} else {
|
|
2176
|
+
checks.push({ status: 'info', label: 'No discoverable feed', detail: 'Advertise an RSS/Atom/JSON feed via <link rel="alternate"> for machine-readable content updates (specification.website)' });
|
|
2177
|
+
}
|
|
2178
|
+
}
|
|
2179
|
+
|
|
2073
2180
|
return { checks, score: maxScore > 0 ? Math.round((score / maxScore) * 100) : 0, category: 'Machine Readability' };
|
|
2074
2181
|
}
|
|
2075
2182
|
|
|
@@ -3217,11 +3324,21 @@ function checkPerformance($) {
|
|
|
3217
3324
|
// (signing_keys, order webhook_url, etc. become required at this version).
|
|
3218
3325
|
const LATEST_UCP_VERSION = '2026-04-08';
|
|
3219
3326
|
|
|
3220
|
-
function checkWebMCP($, pageType, ucpData) {
|
|
3327
|
+
function checkWebMCP($, pageType, ucpData, responseHeaders = null, agentReadiness = null) {
|
|
3221
3328
|
const checks = [];
|
|
3222
3329
|
let score = 0;
|
|
3223
3330
|
let maxScore = 0;
|
|
3224
3331
|
|
|
3332
|
+
// Case-insensitive response header lookup.
|
|
3333
|
+
const getHeaderWebMCP = (name) => {
|
|
3334
|
+
if (!responseHeaders) return '';
|
|
3335
|
+
const lower = name.toLowerCase();
|
|
3336
|
+
for (const k of Object.keys(responseHeaders)) {
|
|
3337
|
+
if (k.toLowerCase() === lower) return String(responseHeaders[k] || '');
|
|
3338
|
+
}
|
|
3339
|
+
return '';
|
|
3340
|
+
};
|
|
3341
|
+
|
|
3225
3342
|
// ── CHECK 1: Declarative WebMCP Tool Detection (DOM-based) ──
|
|
3226
3343
|
const webmcpForms = $('form[toolname]');
|
|
3227
3344
|
const toolCount = webmcpForms.length;
|
|
@@ -3869,9 +3986,92 @@ function checkWebMCP($, pageType, ucpData) {
|
|
|
3869
3986
|
checks.push({ status: 'info', label: 'Shopify-hosted: dual UCP surface expected', detail: 'Per-shop endpoint at /api/ucp/mcp; global catalog at https://discover.shopifyapps.com/global/mcp' });
|
|
3870
3987
|
}
|
|
3871
3988
|
|
|
3989
|
+
// ══════════════════════════════════════════════════════
|
|
3990
|
+
// AGENT DISCOVERY SURFACES (specification.website Agent Readiness)
|
|
3991
|
+
// Emerging standards: bonus scoring (credit when present, info when absent).
|
|
3992
|
+
// ══════════════════════════════════════════════════════
|
|
3993
|
+
let hasDiscoverySurface = false;
|
|
3994
|
+
const ar = agentReadiness || {};
|
|
3995
|
+
const linkHeaderWebMCP = getHeaderWebMCP('link');
|
|
3996
|
+
|
|
3997
|
+
// MCP server discovery: /.well-known/mcp/server-card.json (+ Link rel="mcp").
|
|
3998
|
+
{
|
|
3999
|
+
const card = ar.mcpServerCard;
|
|
4000
|
+
const linkAdvertised = /rel\s*=\s*"?mcp\b/i.test(linkHeaderWebMCP);
|
|
4001
|
+
if (card && card.exists && card.valid) {
|
|
4002
|
+
hasDiscoverySurface = true;
|
|
4003
|
+
maxScore += 5; score += 5;
|
|
4004
|
+
checks.push({ status: 'pass', label: 'MCP server card found', detail: `/.well-known/mcp/server-card.json is published${linkAdvertised ? ' and advertised via Link header' : ''}` });
|
|
4005
|
+
} else if (card && card.exists) {
|
|
4006
|
+
maxScore += 5; score += 2;
|
|
4007
|
+
checks.push({ status: 'warn', label: 'MCP server card invalid JSON', detail: '/.well-known/mcp/server-card.json was reachable but did not parse as JSON' });
|
|
4008
|
+
} else {
|
|
4009
|
+
checks.push({ status: 'info', label: 'No MCP server card', detail: 'Publish /.well-known/mcp/server-card.json (name, version, transport, endpoint, tools) so agents can discover your MCP server (specification.website)' });
|
|
4010
|
+
}
|
|
4011
|
+
}
|
|
4012
|
+
|
|
4013
|
+
// A2A agent card: /.well-known/agent-card.json.
|
|
4014
|
+
{
|
|
4015
|
+
const card = ar.a2aAgentCard;
|
|
4016
|
+
if (card && card.exists && card.valid) {
|
|
4017
|
+
hasDiscoverySurface = true;
|
|
4018
|
+
maxScore += 4; score += 4;
|
|
4019
|
+
checks.push({ status: 'pass', label: 'A2A agent card found', detail: `/.well-known/agent-card.json is valid${card.skillsCount ? ` with ${card.skillsCount} skill(s)` : ''}` });
|
|
4020
|
+
} else if (card && card.exists) {
|
|
4021
|
+
maxScore += 4; score += 2;
|
|
4022
|
+
checks.push({ status: 'warn', label: 'A2A agent card incomplete', detail: 'agent-card.json is missing required fields (name, description, version) or skills' });
|
|
4023
|
+
} else {
|
|
4024
|
+
checks.push({ status: 'info', label: 'No A2A agent card', detail: 'Publish /.well-known/agent-card.json to let other agents discover and call your services (specification.website)' });
|
|
4025
|
+
}
|
|
4026
|
+
}
|
|
4027
|
+
|
|
4028
|
+
// Agent Skills discovery: /.well-known/agent-skills/index.json.
|
|
4029
|
+
{
|
|
4030
|
+
const sk = ar.agentSkills;
|
|
4031
|
+
if (sk && sk.exists && sk.valid && sk.schemaOk) {
|
|
4032
|
+
hasDiscoverySurface = true;
|
|
4033
|
+
maxScore += 4; score += 4;
|
|
4034
|
+
checks.push({ status: 'pass', label: 'Agent Skills index found', detail: `/.well-known/agent-skills/index.json published with ${sk.skillsCount} skill(s)` });
|
|
4035
|
+
} else if (sk && sk.exists) {
|
|
4036
|
+
maxScore += 4; score += 2;
|
|
4037
|
+
checks.push({ status: 'warn', label: 'Agent Skills index incomplete', detail: 'index.json should set $schema to the agentskills discovery schema and list skills with digests' });
|
|
4038
|
+
} else {
|
|
4039
|
+
checks.push({ status: 'info', label: 'No Agent Skills discovery', detail: 'Publish /.well-known/agent-skills/index.json to expose reusable agent skills (specification.website)' });
|
|
4040
|
+
}
|
|
4041
|
+
}
|
|
4042
|
+
|
|
4043
|
+
// NLWeb conversational endpoint (link rel="nlweb" or Link header).
|
|
4044
|
+
{
|
|
4045
|
+
const nlwebLink = $('link[rel="nlweb"]').attr('href');
|
|
4046
|
+
const nlwebHeader = /rel\s*=\s*"?nlweb\b/i.test(linkHeaderWebMCP);
|
|
4047
|
+
if (nlwebLink || nlwebHeader) {
|
|
4048
|
+
hasDiscoverySurface = true;
|
|
4049
|
+
maxScore += 3; score += 3;
|
|
4050
|
+
checks.push({ status: 'pass', label: 'NLWeb endpoint advertised', detail: nlwebLink ? `<link rel="nlweb" href="${nlwebLink}">` : 'Advertised via Link: rel="nlweb"' });
|
|
4051
|
+
} else {
|
|
4052
|
+
checks.push({ status: 'info', label: 'No NLWeb endpoint', detail: 'Expose a natural-language query endpoint (by convention /ask) and advertise it with <link rel="nlweb"> (specification.website)' });
|
|
4053
|
+
}
|
|
4054
|
+
}
|
|
4055
|
+
|
|
4056
|
+
// Schemamap: /schemamap.xml + per-resource JSON-LD endpoints (link rel="schemamap").
|
|
4057
|
+
{
|
|
4058
|
+
const schemamapLink = $('link[rel="schemamap"]').attr('href');
|
|
4059
|
+
const sm = ar.schemamap;
|
|
4060
|
+
if ((sm && sm.exists && sm.valid) || schemamapLink) {
|
|
4061
|
+
hasDiscoverySurface = true;
|
|
4062
|
+
maxScore += 3; score += 3;
|
|
4063
|
+
const detail = sm && sm.exists
|
|
4064
|
+
? `/schemamap.xml published${sm.resourceCount ? ` with ${sm.resourceCount} resource(s)` : ''}`
|
|
4065
|
+
: `Advertised via <link rel="schemamap" href="${schemamapLink}">`;
|
|
4066
|
+
checks.push({ status: 'pass', label: 'Schemamap found', detail });
|
|
4067
|
+
} else {
|
|
4068
|
+
checks.push({ status: 'info', label: 'No schemamap', detail: 'Publish /schemamap.xml listing per-resource JSON-LD (.jsonld) endpoints for agent-friendly structured data (specification.website)' });
|
|
4069
|
+
}
|
|
4070
|
+
}
|
|
4071
|
+
|
|
3872
4072
|
// Baseline credit for purely informational pages.
|
|
3873
|
-
// If the page has no forms, no WebMCP signals, no UCP profile,
|
|
3874
|
-
// surface, there's nothing for it to expose to agents
|
|
4073
|
+
// If the page has no forms, no WebMCP signals, no UCP profile, no discovery
|
|
4074
|
+
// surface, and no Shopify surface, there's nothing for it to expose to agents.
|
|
3875
4075
|
// Without this, content-only pages are capped well below 100 even when there's
|
|
3876
4076
|
// nothing to fix, dragging the overall score unfairly.
|
|
3877
4077
|
const totalForms = $('form').length;
|
|
@@ -3884,7 +4084,8 @@ function checkWebMCP($, pageType, ucpData) {
|
|
|
3884
4084
|
!webmcpSDKFound &&
|
|
3885
4085
|
!hasSchemaActions &&
|
|
3886
4086
|
!hasUcp &&
|
|
3887
|
-
!hasShopify
|
|
4087
|
+
!hasShopify &&
|
|
4088
|
+
!hasDiscoverySurface;
|
|
3888
4089
|
|
|
3889
4090
|
if (hasNoInteractiveSurface) {
|
|
3890
4091
|
checks.push({
|
|
@@ -4993,7 +5194,7 @@ function checkMultimodal($, jsonLdData) {
|
|
|
4993
5194
|
* hasStructuredData: boolean
|
|
4994
5195
|
* }}
|
|
4995
5196
|
*/
|
|
4996
|
-
function analyseHTML(html, domain, robotsTxtData, llmsTxtData, responseHeaders, pathname = '/', ucpData = null) {
|
|
5197
|
+
function analyseHTML(html, domain, robotsTxtData, llmsTxtData, responseHeaders, pathname = '/', ucpData = null, agentReadiness = null) {
|
|
4997
5198
|
const result = {
|
|
4998
5199
|
pageType: 'generic',
|
|
4999
5200
|
categories: [],
|
|
@@ -5085,11 +5286,11 @@ function analyseHTML(html, domain, robotsTxtData, llmsTxtData, responseHeaders,
|
|
|
5085
5286
|
checkAccessibility($),
|
|
5086
5287
|
checkInternalLinking($, domain),
|
|
5087
5288
|
checkMeta($, currentUrl),
|
|
5088
|
-
checkMachineReadability($, robotsTxtData, llmsTxtData, responseHeaders),
|
|
5289
|
+
checkMachineReadability($, robotsTxtData, llmsTxtData, responseHeaders, agentReadiness),
|
|
5089
5290
|
checkEntity($, jsonLdData),
|
|
5090
5291
|
checkCitability($),
|
|
5091
5292
|
checkPerformance($),
|
|
5092
|
-
checkWebMCP($, pageType, ucpData),
|
|
5293
|
+
checkWebMCP($, pageType, ucpData, responseHeaders, agentReadiness),
|
|
5093
5294
|
checkContentPositioning($),
|
|
5094
5295
|
checkContentFreshness($, jsonLdData),
|
|
5095
5296
|
checkInformationDensity($),
|
|
@@ -5222,6 +5423,14 @@ async function checkGEO(domain, options = {}) {
|
|
|
5222
5423
|
content: null,
|
|
5223
5424
|
error: null,
|
|
5224
5425
|
},
|
|
5426
|
+
// Agent-readiness discovery surfaces (specification.website Agent Readiness).
|
|
5427
|
+
agentReadiness: {
|
|
5428
|
+
llmsFullTxt: { exists: false, url: null, sizeBytes: 0, isHtml: false },
|
|
5429
|
+
mcpServerCard: { exists: false, url: null, valid: false },
|
|
5430
|
+
a2aAgentCard: { exists: false, url: null, valid: false, skillsCount: 0 },
|
|
5431
|
+
agentSkills: { exists: false, url: null, valid: false, schemaOk: false, skillsCount: 0 },
|
|
5432
|
+
schemamap: { exists: false, url: null, valid: false, resourceCount: 0 },
|
|
5433
|
+
},
|
|
5225
5434
|
securityHeaders: {},
|
|
5226
5435
|
// Multi-page crawl results
|
|
5227
5436
|
multiPageCrawl: {
|
|
@@ -5266,6 +5475,12 @@ async function checkGEO(domain, options = {}) {
|
|
|
5266
5475
|
const homepageUrl = `${baseUrl}/`;
|
|
5267
5476
|
const sitemapUrl = `${baseUrl}/sitemap.xml`;
|
|
5268
5477
|
const ucpUrl = `${baseUrl}/.well-known/ucp`;
|
|
5478
|
+
// Agent-readiness discovery resources (specification.website / Agent Readiness).
|
|
5479
|
+
const llmsFullUrl = `${baseUrl}/llms-full.txt`;
|
|
5480
|
+
const mcpCardUrl = `${baseUrl}/.well-known/mcp/server-card.json`;
|
|
5481
|
+
const agentCardUrl = `${baseUrl}/.well-known/agent-card.json`;
|
|
5482
|
+
const agentSkillsUrl = `${baseUrl}/.well-known/agent-skills/index.json`;
|
|
5483
|
+
const schemamapUrl = `${baseUrl}/schemamap.xml`;
|
|
5269
5484
|
|
|
5270
5485
|
output.robotsTxt.url = robotsUrl;
|
|
5271
5486
|
output.llmsTxt.url = llmsUrl;
|
|
@@ -5274,9 +5489,11 @@ async function checkGEO(domain, options = {}) {
|
|
|
5274
5489
|
output.ucpProfile.url = ucpUrl;
|
|
5275
5490
|
|
|
5276
5491
|
let robotsRes, llmsRes, homepageRes, sitemapRes, ucpRes;
|
|
5492
|
+
let llmsFullRes, mcpCardRes, agentCardRes, agentSkillsRes, schemamapRes;
|
|
5277
5493
|
|
|
5278
5494
|
try {
|
|
5279
|
-
[robotsRes, llmsRes, homepageRes, sitemapRes, ucpRes
|
|
5495
|
+
[robotsRes, llmsRes, homepageRes, sitemapRes, ucpRes,
|
|
5496
|
+
llmsFullRes, mcpCardRes, agentCardRes, agentSkillsRes, schemamapRes] = await Promise.all([
|
|
5280
5497
|
throttledFetchUrl(robotsUrl, FETCH_TIMEOUT_MS, MAX_TEXT_BODY_SIZE).catch(() => ({ body: null, statusCode: null, headers: {} })),
|
|
5281
5498
|
throttledFetchUrl(llmsUrl, FETCH_TIMEOUT_MS, MAX_TEXT_BODY_SIZE).catch(() => ({ body: null, statusCode: null, headers: {} })),
|
|
5282
5499
|
renderMode === 'chrome'
|
|
@@ -5284,6 +5501,11 @@ async function checkGEO(domain, options = {}) {
|
|
|
5284
5501
|
: throttledFetchUrl(homepageUrl).catch(() => ({ body: null, statusCode: null, headers: {} })),
|
|
5285
5502
|
throttledFetchUrl(sitemapUrl, FETCH_TIMEOUT_MS, MAX_TEXT_BODY_SIZE).catch(() => ({ body: null, statusCode: null, headers: {} })),
|
|
5286
5503
|
throttledFetchUrl(ucpUrl, FETCH_TIMEOUT_MS, MAX_TEXT_BODY_SIZE).catch(() => ({ body: null, statusCode: null, headers: {} })),
|
|
5504
|
+
throttledFetchUrl(llmsFullUrl, FETCH_TIMEOUT_MS, MAX_TEXT_BODY_SIZE).catch(() => ({ body: null, statusCode: null, headers: {} })),
|
|
5505
|
+
throttledFetchUrl(mcpCardUrl, FETCH_TIMEOUT_MS, MAX_TEXT_BODY_SIZE).catch(() => ({ body: null, statusCode: null, headers: {} })),
|
|
5506
|
+
throttledFetchUrl(agentCardUrl, FETCH_TIMEOUT_MS, MAX_TEXT_BODY_SIZE).catch(() => ({ body: null, statusCode: null, headers: {} })),
|
|
5507
|
+
throttledFetchUrl(agentSkillsUrl, FETCH_TIMEOUT_MS, MAX_TEXT_BODY_SIZE).catch(() => ({ body: null, statusCode: null, headers: {} })),
|
|
5508
|
+
throttledFetchUrl(schemamapUrl, FETCH_TIMEOUT_MS, MAX_TEXT_BODY_SIZE).catch(() => ({ body: null, statusCode: null, headers: {} })),
|
|
5287
5509
|
]);
|
|
5288
5510
|
} catch (err) {
|
|
5289
5511
|
output.error = `Failed to fetch resources: ${err.message}`;
|
|
@@ -5324,6 +5546,7 @@ async function checkGEO(domain, options = {}) {
|
|
|
5324
5546
|
output.robotsTxt.blocksCrawlers = analysis.blocksCrawlers;
|
|
5325
5547
|
output.robotsTxt.hasWildcardDisallow = analysis.hasWildcardDisallow;
|
|
5326
5548
|
output.robotsTxt.sitemapReferences = analysis.sitemapUrls;
|
|
5549
|
+
output.robotsTxt.contentSignals = analysis.contentSignals;
|
|
5327
5550
|
}
|
|
5328
5551
|
} catch (err) {
|
|
5329
5552
|
output.robotsTxt.error = err.message;
|
|
@@ -5339,6 +5562,66 @@ async function checkGEO(domain, options = {}) {
|
|
|
5339
5562
|
output.llmsTxt.error = err.message;
|
|
5340
5563
|
}
|
|
5341
5564
|
|
|
5565
|
+
// --- Agent-readiness discovery surfaces ---
|
|
5566
|
+
try {
|
|
5567
|
+
const ar = output.agentReadiness;
|
|
5568
|
+
|
|
5569
|
+
// /llms-full.txt: concatenated markdown of the pages in llms.txt.
|
|
5570
|
+
ar.llmsFullTxt.url = llmsFullUrl;
|
|
5571
|
+
if (llmsFullRes.statusCode === 200 && llmsFullRes.body) {
|
|
5572
|
+
const body = llmsFullRes.body;
|
|
5573
|
+
const trimmed = body.trimStart().toLowerCase();
|
|
5574
|
+
ar.llmsFullTxt.exists = true;
|
|
5575
|
+
ar.llmsFullTxt.sizeBytes = Buffer.byteLength(body);
|
|
5576
|
+
ar.llmsFullTxt.isHtml = trimmed.startsWith('<!') || trimmed.startsWith('<html');
|
|
5577
|
+
}
|
|
5578
|
+
|
|
5579
|
+
// /.well-known/mcp/server-card.json: MCP server discovery.
|
|
5580
|
+
ar.mcpServerCard.url = mcpCardUrl;
|
|
5581
|
+
if (mcpCardRes.statusCode === 200 && mcpCardRes.body) {
|
|
5582
|
+
ar.mcpServerCard.exists = true;
|
|
5583
|
+
try { JSON.parse(mcpCardRes.body); ar.mcpServerCard.valid = true; } catch { /* invalid json */ }
|
|
5584
|
+
}
|
|
5585
|
+
|
|
5586
|
+
// /.well-known/agent-card.json: A2A agent card.
|
|
5587
|
+
ar.a2aAgentCard.url = agentCardUrl;
|
|
5588
|
+
if (agentCardRes.statusCode === 200 && agentCardRes.body) {
|
|
5589
|
+
ar.a2aAgentCard.exists = true;
|
|
5590
|
+
try {
|
|
5591
|
+
const card = JSON.parse(agentCardRes.body);
|
|
5592
|
+
const requiredOk = !!(card && card.name && card.description && (card.version || card.protocolVersion));
|
|
5593
|
+
ar.a2aAgentCard.valid = requiredOk;
|
|
5594
|
+
ar.a2aAgentCard.skillsCount = Array.isArray(card && card.skills) ? card.skills.length : 0;
|
|
5595
|
+
} catch { /* invalid json */ }
|
|
5596
|
+
}
|
|
5597
|
+
|
|
5598
|
+
// /.well-known/agent-skills/index.json: Agent Skills discovery.
|
|
5599
|
+
ar.agentSkills.url = agentSkillsUrl;
|
|
5600
|
+
if (agentSkillsRes.statusCode === 200 && agentSkillsRes.body) {
|
|
5601
|
+
ar.agentSkills.exists = true;
|
|
5602
|
+
try {
|
|
5603
|
+
const idx = JSON.parse(agentSkillsRes.body);
|
|
5604
|
+
ar.agentSkills.valid = true;
|
|
5605
|
+
ar.agentSkills.schemaOk = typeof idx.$schema === 'string' && idx.$schema.includes('agentskills');
|
|
5606
|
+
ar.agentSkills.skillsCount = Array.isArray(idx.skills) ? idx.skills.length : 0;
|
|
5607
|
+
} catch { /* invalid json */ }
|
|
5608
|
+
}
|
|
5609
|
+
|
|
5610
|
+
// /schemamap.xml: discoverable JSON-LD endpoints per resource.
|
|
5611
|
+
ar.schemamap.url = schemamapUrl;
|
|
5612
|
+
if (schemamapRes.statusCode === 200 && schemamapRes.body) {
|
|
5613
|
+
ar.schemamap.exists = true;
|
|
5614
|
+
const body = schemamapRes.body;
|
|
5615
|
+
if (body.includes('<schemamap') || body.includes('<resource')) {
|
|
5616
|
+
ar.schemamap.valid = true;
|
|
5617
|
+
const matches = body.match(/<resource[\s>]/g);
|
|
5618
|
+
ar.schemamap.resourceCount = matches ? matches.length : 0;
|
|
5619
|
+
}
|
|
5620
|
+
}
|
|
5621
|
+
} catch (err) {
|
|
5622
|
+
// Non-critical: leave defaults.
|
|
5623
|
+
}
|
|
5624
|
+
|
|
5342
5625
|
// --- /.well-known/ucp ---
|
|
5343
5626
|
try {
|
|
5344
5627
|
if (ucpRes.statusCode === 200 && ucpRes.body) {
|
|
@@ -5410,6 +5693,7 @@ async function checkGEO(domain, options = {}) {
|
|
|
5410
5693
|
homepageRes.headers || {},
|
|
5411
5694
|
'/',
|
|
5412
5695
|
output.ucpProfile,
|
|
5696
|
+
output.agentReadiness,
|
|
5413
5697
|
);
|
|
5414
5698
|
} else {
|
|
5415
5699
|
output.homepage.error =
|
|
@@ -5511,6 +5795,7 @@ async function checkGEO(domain, options = {}) {
|
|
|
5511
5795
|
res.headers || {},
|
|
5512
5796
|
pathname,
|
|
5513
5797
|
output.ucpProfile,
|
|
5798
|
+
output.agentReadiness,
|
|
5514
5799
|
);
|
|
5515
5800
|
return { url: pageUrl, analysis, error: null };
|
|
5516
5801
|
}
|
package/src/index.js
CHANGED
|
@@ -1609,7 +1609,7 @@ function bulkHTMLScript() {
|
|
|
1609
1609
|
|
|
1610
1610
|
const server = new McpServer({
|
|
1611
1611
|
name: "glippy-geo",
|
|
1612
|
-
version: "0.
|
|
1612
|
+
version: "0.4.0",
|
|
1613
1613
|
});
|
|
1614
1614
|
|
|
1615
1615
|
// ---------------------------------------------------------------------------
|
|
@@ -2137,15 +2137,16 @@ server.tool(
|
|
|
2137
2137
|
"Analyse multiple domains in parallel and compare their GEO scores side by side. " +
|
|
2138
2138
|
"Returns a comparison table with overall scores, per-category breakdowns, and a ranked summary. " +
|
|
2139
2139
|
"Useful for competitive analysis or auditing a portfolio of sites. " +
|
|
2140
|
+
"Accepts up to 50 domains per call - for larger lists, split them across multiple runs and merge the results. " +
|
|
2140
2141
|
"Requires Pro or Agency tier. " +
|
|
2141
2142
|
"Use output_format='json' to get raw results that can be passed to export_bulk_report.",
|
|
2142
2143
|
{
|
|
2143
2144
|
domains: z
|
|
2144
2145
|
.array(z.string())
|
|
2145
2146
|
.min(2)
|
|
2146
|
-
.max(
|
|
2147
|
+
.max(50, "compare_domains accepts at most 50 domains per call. Split larger lists across multiple runs and merge the results.")
|
|
2147
2148
|
.describe(
|
|
2148
|
-
'List of domains to compare, e.g. ["example.com", "competitor.com"]. Do not include https:// prefix.'
|
|
2149
|
+
'List of 2-50 domains to compare, e.g. ["example.com", "competitor.com"]. Do not include https:// prefix. For more than 50 domains, run multiple times and combine the output.'
|
|
2149
2150
|
),
|
|
2150
2151
|
max_pages: z
|
|
2151
2152
|
.number()
|
|
@@ -2178,15 +2179,22 @@ server.tool(
|
|
|
2178
2179
|
const maxPages = max_pages ?? 10;
|
|
2179
2180
|
const renderMode = render_mode ?? "static";
|
|
2180
2181
|
|
|
2181
|
-
//
|
|
2182
|
-
|
|
2183
|
-
|
|
2184
|
-
|
|
2185
|
-
|
|
2186
|
-
|
|
2187
|
-
|
|
2188
|
-
|
|
2189
|
-
|
|
2182
|
+
// Cap concurrent domain analyses so a 50-domain × 10-page run does not
|
|
2183
|
+
// fan out into 500 simultaneous fetches.
|
|
2184
|
+
const DOMAIN_CONCURRENCY = 10;
|
|
2185
|
+
const results = [];
|
|
2186
|
+
for (let i = 0; i < domains.length; i += DOMAIN_CONCURRENCY) {
|
|
2187
|
+
const batch = domains.slice(i, i + DOMAIN_CONCURRENCY);
|
|
2188
|
+
const batchResults = await Promise.allSettled(
|
|
2189
|
+
batch.map((domain) =>
|
|
2190
|
+
checkGEO(domain, { maxPages, renderMode }).then((result) => ({
|
|
2191
|
+
domain,
|
|
2192
|
+
result,
|
|
2193
|
+
}))
|
|
2194
|
+
)
|
|
2195
|
+
);
|
|
2196
|
+
results.push(...batchResults);
|
|
2197
|
+
}
|
|
2190
2198
|
|
|
2191
2199
|
// JSON output mode - return raw results for use with export_bulk_report
|
|
2192
2200
|
if (output_format === "json") {
|
|
@@ -3085,10 +3093,10 @@ server.tool(
|
|
|
3085
3093
|
domains: z
|
|
3086
3094
|
.array(z.string())
|
|
3087
3095
|
.min(2)
|
|
3088
|
-
.max(
|
|
3096
|
+
.max(50, "export_bulk_report accepts at most 50 domains per call. Split larger lists across multiple runs.")
|
|
3089
3097
|
.optional()
|
|
3090
3098
|
.describe(
|
|
3091
|
-
'Compare
|
|
3099
|
+
'Compare 2-50 domains. E.g. ["example.com", "competitor.com"]. Do not include https://. For more than 50, run multiple times.'
|
|
3092
3100
|
),
|
|
3093
3101
|
urls: z
|
|
3094
3102
|
.array(z.string())
|
|
@@ -3258,14 +3266,20 @@ server.tool(
|
|
|
3258
3266
|
// ------------------------------------------------------------------
|
|
3259
3267
|
if (domains) {
|
|
3260
3268
|
const maxPages = max_pages ?? 10;
|
|
3261
|
-
const
|
|
3262
|
-
|
|
3263
|
-
|
|
3264
|
-
|
|
3265
|
-
|
|
3266
|
-
|
|
3267
|
-
|
|
3268
|
-
|
|
3269
|
+
const DOMAIN_CONCURRENCY = 10;
|
|
3270
|
+
const results = [];
|
|
3271
|
+
for (let i = 0; i < domains.length; i += DOMAIN_CONCURRENCY) {
|
|
3272
|
+
const batch = domains.slice(i, i + DOMAIN_CONCURRENCY);
|
|
3273
|
+
const batchResults = await Promise.allSettled(
|
|
3274
|
+
batch.map((domain) =>
|
|
3275
|
+
checkGEO(domain, { maxPages, renderMode }).then((result) => ({
|
|
3276
|
+
domain,
|
|
3277
|
+
result,
|
|
3278
|
+
}))
|
|
3279
|
+
)
|
|
3280
|
+
);
|
|
3281
|
+
results.push(...batchResults);
|
|
3282
|
+
}
|
|
3269
3283
|
|
|
3270
3284
|
const entries = [];
|
|
3271
3285
|
for (const r of results) {
|