glippy-mcp 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +2 -2
- package/README.md +7 -7
- package/package.json +2 -2
- package/src/geo-checker.js +350 -56
- package/src/index.js +36 -22
package/LICENSE
CHANGED
|
@@ -14,7 +14,7 @@ you a non-exclusive, non-transferable, revocable license to install and
|
|
|
14
14
|
use the Software solely for your own internal business or personal use.
|
|
15
15
|
|
|
16
16
|
A valid, paid license key (format: GLMCP-XXXX-XXXX-XXXX) is required to
|
|
17
|
-
use the Software. License keys may be purchased at https://glippy.dev.
|
|
17
|
+
use the Software. License keys may be purchased at https://www.glippy.dev.
|
|
18
18
|
Running the Software without a valid license key, or in a manner that
|
|
19
19
|
circumvents license verification, is not permitted.
|
|
20
20
|
|
|
@@ -61,4 +61,4 @@ THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
61
61
|
This Agreement is governed by the laws of the Netherlands, without regard
|
|
62
62
|
to its conflict of laws principles.
|
|
63
63
|
|
|
64
|
-
For license purchases or enquiries: https://glippy.dev
|
|
64
|
+
For license purchases or enquiries: https://www.glippy.dev
|
package/README.md
CHANGED
|
@@ -78,7 +78,7 @@ npx -y glippy-mcp
|
|
|
78
78
|
|
|
79
79
|
### License Key
|
|
80
80
|
|
|
81
|
-
A valid Glippy MCP license key (`GLMCP-XXXX-XXXX-XXXX`) is required. Get one at [glippy.dev](https://glippy.dev).
|
|
81
|
+
A valid Glippy MCP license key (`GLMCP-XXXX-XXXX-XXXX`) is required. Get one at [glippy.dev](https://www.glippy.dev).
|
|
82
82
|
|
|
83
83
|
The server validates the key against the Glippy API on first use and caches the result for 24 hours. **Analysis runs locally on your machine** — only the license check calls the server.
|
|
84
84
|
|
|
@@ -300,7 +300,7 @@ Analyse multiple domains in parallel and compare scores.
|
|
|
300
300
|
|
|
301
301
|
| Parameter | Type | Required | Description |
|
|
302
302
|
|-----------|------|----------|-------------|
|
|
303
|
-
| `domains` | array[string] | Yes | List of 2-
|
|
303
|
+
| `domains` | array[string] | Yes | List of 2-50 domains to compare, e.g. `["example.com", "competitor.com"]`. Do not include `https://` prefix. For more than 50 domains, split across multiple runs and merge the results. |
|
|
304
304
|
| `max_pages` | integer | No | Maximum pages to crawl per domain (1-10). Default: `10`. |
|
|
305
305
|
| `render_mode` | enum | No | `"static"` (default), `"auto"` (static with Chrome fallback on bot-block), or `"chrome"` (always Chrome). See [Chrome Rendering Fallback](#chrome-rendering-fallback). |
|
|
306
306
|
| `output_format` | enum | No | `"text"` (default) for comparison table, `"json"` for raw results to pass to `export_bulk_report`. |
|
|
@@ -428,7 +428,7 @@ Generate a styled report for bulk analysis.
|
|
|
428
428
|
| Parameter | Type | Required | Description |
|
|
429
429
|
|-----------|------|----------|-------------|
|
|
430
430
|
| `format` | enum | Yes | Report format: `"markdown"` or `"html"` |
|
|
431
|
-
| `domains` | array[string] | No* | Compare 2-
|
|
431
|
+
| `domains` | array[string] | No* | Compare 2-50 domains. Do not include `https://`. For more than 50, run multiple times. |
|
|
432
432
|
| `urls` | array[string] | No* | Analyse 1-50,000 specific URLs. Include `https://`. |
|
|
433
433
|
| `sitemap_url` | string | No* | Crawl a sitemap URL. |
|
|
434
434
|
| `analysis_results` | object | No* | Pre-computed results from `compare_domains`, `analyze_urls`, or `analyze_sitemap` (with `output_format="json"`). |
|
|
@@ -734,7 +734,7 @@ echo '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":
|
|
|
734
734
|
|
|
735
735
|
**Cause:** Invalid or expired license key.
|
|
736
736
|
|
|
737
|
-
**Fix:** Get a valid key at [glippy.dev](https://glippy.dev).
|
|
737
|
+
**Fix:** Get a valid key at [glippy.dev](https://www.glippy.dev).
|
|
738
738
|
|
|
739
739
|
### "Could not reach license server"
|
|
740
740
|
|
|
@@ -795,16 +795,16 @@ The server checks access rules for these AI crawlers in robots.txt:
|
|
|
795
795
|
|
|
796
796
|
## License
|
|
797
797
|
|
|
798
|
-
See LICENSE file for licensing terms. Get your license key at [glippy.dev](https://glippy.dev).
|
|
798
|
+
See LICENSE file for licensing terms. Get your license key at [glippy.dev](https://www.glippy.dev).
|
|
799
799
|
|
|
800
800
|
---
|
|
801
801
|
|
|
802
802
|
## Support
|
|
803
803
|
|
|
804
804
|
- **Integration Guide:** [docs/INTEGRATIONS.md](docs/INTEGRATIONS.md)
|
|
805
|
-
- **Online Documentation:** [glippy.dev/docs](https://glippy.dev)
|
|
805
|
+
- **Online Documentation:** [glippy.dev/docs](https://www.glippy.dev)
|
|
806
806
|
- **Issues:** [github.com/jbobbink/glippy/issues](https://github.com/jbobbink/glippy/issues)
|
|
807
|
-
- **Homepage:** [glippy.dev](https://glippy.dev)
|
|
807
|
+
- **Homepage:** [glippy.dev](https://www.glippy.dev)
|
|
808
808
|
|
|
809
809
|
---
|
|
810
810
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "glippy-mcp",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.3",
|
|
4
4
|
"description": "MCP server for GEO (Generative Engine Optimization) analysis — check any domain's AI-readiness",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"type": "module",
|
|
@@ -26,7 +26,7 @@
|
|
|
26
26
|
"claude",
|
|
27
27
|
"glippy"
|
|
28
28
|
],
|
|
29
|
-
"homepage": "https://glippy.dev",
|
|
29
|
+
"homepage": "https://www.glippy.dev",
|
|
30
30
|
"repository": {
|
|
31
31
|
"type": "git",
|
|
32
32
|
"url": "git+https://github.com/jbobbink/glippy.git"
|
package/src/geo-checker.js
CHANGED
|
@@ -31,22 +31,105 @@ function looksBotBlocked(res) {
|
|
|
31
31
|
const FETCH_TIMEOUT_MS = 15_000;
|
|
32
32
|
|
|
33
33
|
/**
|
|
34
|
-
*
|
|
35
|
-
*
|
|
34
|
+
* Training-only crawlers. Blocking these is informational: it keeps content
|
|
35
|
+
* out of LLM training corpora but does not affect AI citation surfaces.
|
|
36
36
|
*/
|
|
37
|
-
const
|
|
37
|
+
const TRAINING_CRAWLERS = Object.freeze([
|
|
38
38
|
'GPTBot',
|
|
39
|
-
'Google-Extended',
|
|
40
|
-
'CCBot',
|
|
41
|
-
'anthropic-ai',
|
|
42
39
|
'ClaudeBot',
|
|
40
|
+
'anthropic-ai',
|
|
41
|
+
'CCBot',
|
|
42
|
+
'Google-Extended',
|
|
43
|
+
'Applebot-Extended',
|
|
43
44
|
'Bytespider',
|
|
44
|
-
'
|
|
45
|
-
'
|
|
46
|
-
'AmazonBot',
|
|
45
|
+
'FacebookBot',
|
|
46
|
+
'Meta-ExternalAgent',
|
|
47
47
|
'cohere-ai',
|
|
48
|
+
'Diffbot',
|
|
49
|
+
'Omgili',
|
|
50
|
+
'Amazonbot',
|
|
51
|
+
'Timpibot',
|
|
52
|
+
'ImageSiftBot',
|
|
53
|
+
// Broadened: SEO/search/training crawlers commonly named in robots.txt.
|
|
54
|
+
'PetalBot',
|
|
55
|
+
'MJ12bot',
|
|
56
|
+
'AwarioBot',
|
|
57
|
+
'AhrefsBot',
|
|
58
|
+
'SemrushBot',
|
|
59
|
+
'DotBot',
|
|
60
|
+
'SeznamBot',
|
|
61
|
+
'magpie-crawler',
|
|
62
|
+
'DataForSeoBot',
|
|
63
|
+
'iaskbot',
|
|
64
|
+
'Pangu_Bot',
|
|
65
|
+
'claude-web',
|
|
66
|
+
'cohere-training-data-crawler',
|
|
67
|
+
'meta-externalfetcher',
|
|
68
|
+
]);
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Citation/retrieval crawlers. Blocking these directly hurts AI visibility
|
|
72
|
+
* because answer engines cannot fetch content for inline citation.
|
|
73
|
+
*/
|
|
74
|
+
const CITATION_CRAWLERS = Object.freeze([
|
|
75
|
+
'OAI-SearchBot',
|
|
76
|
+
'ChatGPT-User',
|
|
77
|
+
'PerplexityBot',
|
|
78
|
+
'Perplexity-User',
|
|
79
|
+
'Applebot',
|
|
80
|
+
'Bingbot',
|
|
81
|
+
'Googlebot',
|
|
82
|
+
'DuckDuckBot',
|
|
83
|
+
'YouBot',
|
|
84
|
+
// Broadened: alternative answer engines and search crawlers.
|
|
85
|
+
'MistralAI-User',
|
|
86
|
+
'PhindBot',
|
|
87
|
+
'Komo',
|
|
88
|
+
'AndiBot',
|
|
89
|
+
'BraveBot',
|
|
90
|
+
'KagiBot',
|
|
91
|
+
'Yep',
|
|
92
|
+
'NeevaBot',
|
|
93
|
+
'Exabot',
|
|
94
|
+
'Qwantify',
|
|
95
|
+
'Seznam',
|
|
96
|
+
'GoogleOther',
|
|
97
|
+
'Google-CloudVertexBot',
|
|
98
|
+
'BingPreview',
|
|
48
99
|
]);
|
|
49
100
|
|
|
101
|
+
/**
|
|
102
|
+
* Match a User-Agent token against a list of known crawlers using a
|
|
103
|
+
* case-insensitive longest-prefix match. This prevents short prefixes like
|
|
104
|
+
* "applebot" from incorrectly absorbing "applebot-extended" matches.
|
|
105
|
+
*
|
|
106
|
+
* @param {string} ua - User-Agent token from robots.txt or meta tag.
|
|
107
|
+
* @param {readonly string[]} crawlers - Crawler list to match against.
|
|
108
|
+
* @returns {string|null} - The matched crawler name (original casing) or null.
|
|
109
|
+
*/
|
|
110
|
+
function matchCrawler(ua, crawlers) {
|
|
111
|
+
if (!ua) return null;
|
|
112
|
+
const lower = ua.toLowerCase();
|
|
113
|
+
let bestMatch = null;
|
|
114
|
+
let bestLen = 0;
|
|
115
|
+
for (const c of crawlers) {
|
|
116
|
+
const cl = c.toLowerCase();
|
|
117
|
+
if (lower === cl || lower.startsWith(cl) || lower.includes(cl)) {
|
|
118
|
+
if (cl.length > bestLen) {
|
|
119
|
+
bestLen = cl.length;
|
|
120
|
+
bestMatch = c;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
return bestMatch;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Combined AI crawler list, kept for back-compat with downstream callers
|
|
129
|
+
* that iterate the union (e.g. robots.txt block detection per crawler).
|
|
130
|
+
*/
|
|
131
|
+
const AI_CRAWLERS = Object.freeze([...TRAINING_CRAWLERS, ...CITATION_CRAWLERS]);
|
|
132
|
+
|
|
50
133
|
/** Maximum number of redirects to follow when fetching a resource. */
|
|
51
134
|
const MAX_REDIRECTS = 5;
|
|
52
135
|
|
|
@@ -759,7 +842,7 @@ function aggregatePageScores(pageResults) {
|
|
|
759
842
|
function detectPageType($, schemaTypes, pathname) {
|
|
760
843
|
// Check JSON-LD schema types first (most reliable signal).
|
|
761
844
|
// A page can carry FAQPage schema for a small FAQ section while being a long-form
|
|
762
|
-
// guide. Only classify as "faq" when FAQPage is the dominant structure
|
|
845
|
+
// guide. Only classify as "faq" when FAQPage is the dominant structure, otherwise
|
|
763
846
|
// a 6,400-word guide with a FAQ at the bottom gets penalized as exceeding FAQ length.
|
|
764
847
|
const allH2s = $('h2');
|
|
765
848
|
const h2Count = allH2s.length;
|
|
@@ -772,11 +855,52 @@ function detectPageType($, schemaTypes, pathname) {
|
|
|
772
855
|
});
|
|
773
856
|
const isDominantlyFaq = h2Count > 0 && questionH2Count >= h2Count * 0.7;
|
|
774
857
|
|
|
775
|
-
|
|
858
|
+
// Word count for length-based reclassification of FAQ-tagged guides.
|
|
859
|
+
const mainElForCount = $('main, article, [role="main"]');
|
|
860
|
+
const mainTextForCount = (mainElForCount.length > 0 ? mainElForCount.text() : $('body').text() || '').trim();
|
|
861
|
+
const wordCountForType = mainTextForCount.split(/\s+/).filter(w => w.length > 0).length;
|
|
862
|
+
|
|
863
|
+
// Non-FAQ schema types that, when coexisting with FAQPage, signal a hybrid
|
|
864
|
+
// guide rather than a pure FAQ page.
|
|
865
|
+
const NON_FAQ_GUIDE_TYPES = [
|
|
866
|
+
'Article', 'NewsArticle', 'BlogPosting', 'TechArticle', 'HowTo', 'Product',
|
|
867
|
+
'Dataset', 'Report', 'WebPage',
|
|
868
|
+
// Broadened: more schema types that imply guide/long-form rather than pure FAQ.
|
|
869
|
+
'Recipe', 'ScholarlyArticle', 'Guide', 'Course', 'Service',
|
|
870
|
+
'MedicalEntity', 'MedicalGuideline', 'Book', 'Chapter',
|
|
871
|
+
'LearningResource', 'Review', 'CollectionPage', 'ItemPage',
|
|
872
|
+
];
|
|
873
|
+
const hasNonFaqGuideType = NON_FAQ_GUIDE_TYPES.some((t) => schemaTypes.has(t));
|
|
874
|
+
|
|
875
|
+
// Heuristic guide-title overrides: title or H1 phrasing strongly implies a guide.
|
|
876
|
+
const titleText = ($('title').text() || '').trim();
|
|
877
|
+
const h1Text = ($('h1').first().text() || '').trim();
|
|
878
|
+
const titleAndH1 = `${titleText} ${h1Text}`;
|
|
879
|
+
const GUIDE_TITLE_RE = /\b(?:complete|ultimate|definitive|comprehensive)?\s*guide\b/i;
|
|
880
|
+
const EVERYTHING_RE = /everything you need/i;
|
|
881
|
+
const HOW_TO_TITLE_RE = /how to/i;
|
|
882
|
+
const STEP_BY_STEP_RE = /step[- ]by[- ]step/i;
|
|
883
|
+
const matchesGuideTitle = GUIDE_TITLE_RE.test(titleAndH1)
|
|
884
|
+
|| EVERYTHING_RE.test(titleAndH1)
|
|
885
|
+
|| HOW_TO_TITLE_RE.test(titleAndH1)
|
|
886
|
+
|| STEP_BY_STEP_RE.test(titleAndH1);
|
|
887
|
+
|
|
888
|
+
// Definition-list + multiple H2 sections is a strong guide signal.
|
|
889
|
+
const hasDefinitionListGuide = $('dl').length > 0 && h2Count >= 2;
|
|
890
|
+
|
|
891
|
+
// Long-form / heading-rich pages should never classify as pure FAQ.
|
|
892
|
+
const tooLongForFaq = wordCountForType > 2000;
|
|
893
|
+
const tooManyH2sForFaq = h2Count > 8;
|
|
894
|
+
|
|
895
|
+
if (matchesGuideTitle || hasDefinitionListGuide) return 'article';
|
|
896
|
+
if (schemaTypes.has('FAQPage') && isDominantlyFaq && !hasNonFaqGuideType
|
|
897
|
+
&& wordCountForType <= 1500 && !tooManyH2sForFaq && !tooLongForFaq) return 'faq';
|
|
776
898
|
if (['Article', 'NewsArticle', 'BlogPosting', 'TechArticle'].some((t) => schemaTypes.has(t))) return 'article';
|
|
777
|
-
// FAQPage schema present but page also
|
|
778
|
-
|
|
779
|
-
if (schemaTypes.has('FAQPage')) return '
|
|
899
|
+
// FAQPage schema present but page is also long-form or carries another guide-type schema:
|
|
900
|
+
// treat as article so guide-style word/heading expectations apply.
|
|
901
|
+
if (schemaTypes.has('FAQPage') && (hasNonFaqGuideType || wordCountForType > 1500 || h2Count >= 6 || tooManyH2sForFaq || tooLongForFaq)) return 'article';
|
|
902
|
+
if (schemaTypes.has('FAQPage') && !tooManyH2sForFaq && !tooLongForFaq) return 'faq';
|
|
903
|
+
if (schemaTypes.has('FAQPage')) return 'article';
|
|
780
904
|
if (['Product', 'Offer'].some((t) => schemaTypes.has(t))) return 'product';
|
|
781
905
|
if (['LocalBusiness', 'Restaurant', 'Store'].some((t) => schemaTypes.has(t))) return 'local-business';
|
|
782
906
|
|
|
@@ -908,7 +1032,9 @@ function checkStructuredData($, pageType, jsonLdData, jsonLdValid, jsonLdInvalid
|
|
|
908
1032
|
checks.push({ status: 'pass', label: `GEO-critical schema types present (${foundImportant.length})`, detail: foundImportant.join(', ') });
|
|
909
1033
|
} else if (foundImportant.length > 0) {
|
|
910
1034
|
score += 5;
|
|
911
|
-
|
|
1035
|
+
const suggestions = ['FAQPage', 'HowTo', 'Article', 'BreadcrumbList'].filter((t) => !schemaTypes.has(t));
|
|
1036
|
+
const consider = suggestions.length > 0 ? `. Consider adding: ${suggestions.join(', ')}` : '';
|
|
1037
|
+
checks.push({ status: 'warn', label: `Only ${foundImportant.length} GEO-critical schema type(s)`, detail: `Found: ${foundImportant.join(', ')}${consider}` });
|
|
912
1038
|
} else {
|
|
913
1039
|
checks.push({ status: 'fail', label: 'No GEO-critical schema types', detail: 'Add FAQPage, Article, Organization, BreadcrumbList, etc.' });
|
|
914
1040
|
}
|
|
@@ -1853,38 +1979,66 @@ function checkMachineReadability($, robotsTxtData, llmsTxtData, responseHeaders)
|
|
|
1853
1979
|
checks.push({ status: 'pass', label: 'No restrictive robots meta', detail: 'Page is open for indexing' });
|
|
1854
1980
|
}
|
|
1855
1981
|
|
|
1856
|
-
// Check for specific AI bot meta tags
|
|
1857
|
-
|
|
1858
|
-
|
|
1982
|
+
// Check for specific AI bot meta tags. Split blocked bots into training-only
|
|
1983
|
+
// (informational) vs citation crawlers (real penalty) so a noindex on GPTBot
|
|
1984
|
+
// is not weighted the same as a noindex on Googlebot.
|
|
1985
|
+
const trainingBotMeta = TRAINING_CRAWLERS.map(c => c.toLowerCase());
|
|
1986
|
+
const citationBotMeta = CITATION_CRAWLERS.map(c => c.toLowerCase()).concat(['claude-web']);
|
|
1987
|
+
const aiBotMeta = [...new Set([...trainingBotMeta, ...citationBotMeta])];
|
|
1988
|
+
const blockedTrainingBots = [];
|
|
1989
|
+
const blockedCitationBots = [];
|
|
1859
1990
|
aiBotMeta.forEach((bot) => {
|
|
1860
1991
|
const content = $(`meta[name="${bot}"]`).attr('content') || '';
|
|
1861
1992
|
if (content.includes('noindex')) {
|
|
1862
|
-
|
|
1993
|
+
if (citationBotMeta.includes(bot)) {
|
|
1994
|
+
blockedCitationBots.push(bot);
|
|
1995
|
+
} else {
|
|
1996
|
+
blockedTrainingBots.push(bot);
|
|
1997
|
+
}
|
|
1863
1998
|
}
|
|
1864
1999
|
});
|
|
1865
2000
|
|
|
1866
2001
|
maxScore += 15;
|
|
1867
|
-
if (
|
|
2002
|
+
if (blockedCitationBots.length === 0 && blockedTrainingBots.length === 0) {
|
|
1868
2003
|
score += 15;
|
|
1869
2004
|
checks.push({ status: 'pass', label: 'No AI bot restrictions in meta', detail: 'No specific bot blocking detected in page HTML' });
|
|
2005
|
+
} else if (blockedCitationBots.length === 0) {
|
|
2006
|
+
score += 15;
|
|
2007
|
+
checks.push({ status: 'info', label: `Training crawler meta blocks: ${blockedTrainingBots.join(', ')}`, detail: 'Training-only blocks do not affect AI citation visibility', found: blockedTrainingBots });
|
|
1870
2008
|
} else {
|
|
1871
|
-
|
|
2009
|
+
score += Math.max(0, 15 - blockedCitationBots.length * 3);
|
|
2010
|
+
checks.push({ status: 'warn', label: `Citation crawler meta blocks: ${blockedCitationBots.join(', ')}`, detail: 'These citation crawlers are blocked via meta tags', found: blockedCitationBots });
|
|
2011
|
+
if (blockedTrainingBots.length > 0) {
|
|
2012
|
+
checks.push({ status: 'info', label: `Training crawler meta blocks: ${blockedTrainingBots.join(', ')}`, detail: 'Training-only blocks are informational', found: blockedTrainingBots });
|
|
2013
|
+
}
|
|
1872
2014
|
}
|
|
1873
2015
|
|
|
1874
2016
|
// robots.txt integration (from server-side fetch)
|
|
1875
2017
|
if (robotsTxtData) {
|
|
1876
2018
|
maxScore += 10;
|
|
1877
2019
|
if (robotsTxtData.exists) {
|
|
1878
|
-
const
|
|
1879
|
-
|
|
2020
|
+
const blocks = robotsTxtData.blocksCrawlers || {};
|
|
2021
|
+
const trainingLowercase = new Set(TRAINING_CRAWLERS.map(c => c.toLowerCase()));
|
|
2022
|
+
const citationLowercase = new Set(CITATION_CRAWLERS.map(c => c.toLowerCase()));
|
|
2023
|
+
const blockedAll = Object.entries(blocks).filter(([, v]) => v).map(([k]) => k);
|
|
2024
|
+
const blockedTraining = blockedAll.filter(k => trainingLowercase.has(k.toLowerCase()));
|
|
2025
|
+
const blockedCitation = blockedAll.filter(k => citationLowercase.has(k.toLowerCase()));
|
|
2026
|
+
|
|
2027
|
+
if (blockedCitation.length === 0 && blockedTraining.length === 0) {
|
|
2028
|
+
score += 10;
|
|
2029
|
+
checks.push({ status: 'pass', label: 'robots.txt: no AI crawlers blocked', detail: 'All known training and citation crawlers are allowed' });
|
|
2030
|
+
} else if (blockedCitation.length === 0) {
|
|
1880
2031
|
score += 10;
|
|
1881
|
-
checks.push({ status: '
|
|
2032
|
+
checks.push({ status: 'info', label: `robots.txt: ${blockedTraining.length} training crawler(s) blocked, citation crawlers allowed`, detail: 'Training-only blocks do not affect AI citation visibility', found: blockedTraining });
|
|
1882
2033
|
} else {
|
|
1883
|
-
score += Math.max(0, 10 -
|
|
1884
|
-
checks.push({ status: 'warn', label: `robots.txt: ${
|
|
2034
|
+
score += Math.max(0, 10 - blockedCitation.length * 2);
|
|
2035
|
+
checks.push({ status: 'warn', label: `robots.txt: ${blockedCitation.length} citation crawler(s) blocked`, detail: 'Blocking citation crawlers prevents inline AI citations', found: blockedCitation });
|
|
2036
|
+
if (blockedTraining.length > 0) {
|
|
2037
|
+
checks.push({ status: 'info', label: `robots.txt: ${blockedTraining.length} training crawler(s) blocked`, detail: 'Training-only blocks are informational and do not affect AI citation visibility', found: blockedTraining });
|
|
2038
|
+
}
|
|
1885
2039
|
}
|
|
1886
2040
|
if (robotsTxtData.hasWildcardDisallow) {
|
|
1887
|
-
checks.push({ status: 'warn', label: 'robots.txt: wildcard Disallow: /', detail: 'All crawlers are blocked by default
|
|
2041
|
+
checks.push({ status: 'warn', label: 'robots.txt: wildcard Disallow: /', detail: 'All crawlers are blocked by default, only overridden by specific Allow rules' });
|
|
1888
2042
|
}
|
|
1889
2043
|
} else {
|
|
1890
2044
|
checks.push({ status: 'warn', label: 'No robots.txt found', detail: 'robots.txt helps control crawler access' });
|
|
@@ -4036,24 +4190,68 @@ function checkContentFreshness($, jsonLdData) {
|
|
|
4036
4190
|
}
|
|
4037
4191
|
|
|
4038
4192
|
// 12d. Copyright Year & Footer Freshness (10 pts)
|
|
4039
|
-
// Year ranges ("
|
|
4040
|
-
// year as the freshness signal, not the founding year.
|
|
4193
|
+
// Year ranges ("(c) 1997 - 2026") signal a founding year + current year, take
|
|
4194
|
+
// the END year as the freshness signal, not the founding year.
|
|
4195
|
+
// Also handles enumerated lists like "(c) 2010, 2015, 2026" by taking the max
|
|
4196
|
+
// of all years in the same line as a copyright marker.
|
|
4041
4197
|
const footerEl = $('footer');
|
|
4042
4198
|
maxScore += 10;
|
|
4043
4199
|
if (footerEl.length > 0) {
|
|
4044
|
-
|
|
4045
|
-
const
|
|
4046
|
-
const
|
|
4047
|
-
|
|
4048
|
-
|
|
4049
|
-
|
|
4200
|
+
// Strip "All Rights Reserved" boilerplate (en/fr/de) before parsing.
|
|
4201
|
+
const rawFooterText = footerEl.text();
|
|
4202
|
+
const footerText = rawFooterText
|
|
4203
|
+
.replace(/all\s+rights\s+reserved/gi, '')
|
|
4204
|
+
.replace(/tous\s+droits\s+r[ée]serv[ée]s/gi, '')
|
|
4205
|
+
.replace(/alle\s+rechte\s+vorbehalten/gi, '');
|
|
4206
|
+
// Broader prefix list: includes bracket variants and "Copyright ©" double prefix.
|
|
4207
|
+
const COPYRIGHT_PREFIX = /(?:©|\(c\)|\(C\)|\[c\]|\[C\]|©|copyright(?:\s*©)?)/i;
|
|
4208
|
+
// Exclude founding year markers so "Est. 1998" / "Since 2001" do not get
|
|
4209
|
+
// mistaken for a copyright year when no actual copyright marker is present.
|
|
4210
|
+
const FOUNDING_PREFIX = /\b(?:est(?:ablished|\.)?|since|founded(?:\s+in)?)\s+\d{4}\b/i;
|
|
4211
|
+
let copyrightYear = null;
|
|
4212
|
+
// Sweep each line for a copyright marker; take the max year found on that line.
|
|
4213
|
+
const lines = footerText.split(/\r?\n|<br\s*\/?>/i);
|
|
4214
|
+
for (const rawLine of lines) {
|
|
4215
|
+
const line = rawLine.trim();
|
|
4216
|
+
if (!line) continue;
|
|
4217
|
+
if (!COPYRIGHT_PREFIX.test(line)) continue;
|
|
4218
|
+
// Skip lines that look like founding-year statements without a real © marker.
|
|
4219
|
+
const hasRealMarker = /(?:©|\(c\)|\(C\)|\[c\]|\[C\]|©|copyright)/i.test(line);
|
|
4220
|
+
if (!hasRealMarker && FOUNDING_PREFIX.test(line)) continue;
|
|
4221
|
+
const yearMatches = line.match(/\b(19|20)\d{2}\b/g);
|
|
4222
|
+
if (yearMatches && yearMatches.length > 0) {
|
|
4223
|
+
const maxYear = Math.max(...yearMatches.map(y => parseInt(y, 10)));
|
|
4224
|
+
if (copyrightYear === null || maxYear > copyrightYear) copyrightYear = maxYear;
|
|
4225
|
+
}
|
|
4226
|
+
}
|
|
4227
|
+
// Fallback: if the footer is a single blob without line breaks, sweep the
|
|
4228
|
+
// whole text but only when a copyright marker exists.
|
|
4229
|
+
if (copyrightYear === null && COPYRIGHT_PREFIX.test(footerText)) {
|
|
4230
|
+
const yearMatches = footerText.match(/\b(19|20)\d{2}\b/g);
|
|
4231
|
+
if (yearMatches && yearMatches.length > 0) {
|
|
4232
|
+
copyrightYear = Math.max(...yearMatches.map(y => parseInt(y, 10)));
|
|
4233
|
+
}
|
|
4234
|
+
}
|
|
4235
|
+
// Supplemental freshness signal: <time datetime="YYYY"> inside <footer>.
|
|
4236
|
+
if (copyrightYear === null) {
|
|
4237
|
+
footerEl.find('time[datetime]').each((_i, tEl) => {
|
|
4238
|
+
const dt = ($(tEl).attr('datetime') || '').trim();
|
|
4239
|
+
const ym = dt.match(/^(\d{4})/);
|
|
4240
|
+
if (ym) {
|
|
4241
|
+
const ty = parseInt(ym[1], 10);
|
|
4242
|
+
if (copyrightYear === null || ty > copyrightYear) copyrightYear = ty;
|
|
4243
|
+
}
|
|
4244
|
+
});
|
|
4245
|
+
}
|
|
4246
|
+
if (copyrightYear !== null) {
|
|
4247
|
+
if (copyrightYear >= currentYear - 1) {
|
|
4050
4248
|
score += 10;
|
|
4051
4249
|
checks.push({ status: 'pass', label: `Copyright year current (${copyrightYear})`, detail: `Footer copyright is ${copyrightYear}` });
|
|
4052
|
-
} else if (copyrightYear === currentYear -
|
|
4250
|
+
} else if (copyrightYear === currentYear - 2) {
|
|
4053
4251
|
score += 5;
|
|
4054
|
-
checks.push({ status: 'warn', label: `Copyright year slightly old (${copyrightYear})`, detail: `Footer shows ${copyrightYear}
|
|
4252
|
+
checks.push({ status: 'warn', label: `Copyright year slightly old (${copyrightYear})`, detail: `Footer shows ${copyrightYear}, update to ${currentYear}` });
|
|
4055
4253
|
} else {
|
|
4056
|
-
checks.push({ status: 'fail', label: `Copyright year outdated (${copyrightYear})`, detail: `Footer shows ${copyrightYear}
|
|
4254
|
+
checks.push({ status: 'fail', label: `Copyright year outdated (${copyrightYear})`, detail: `Footer shows ${copyrightYear}, update to ${currentYear}` });
|
|
4057
4255
|
}
|
|
4058
4256
|
} else {
|
|
4059
4257
|
checks.push({ status: 'info', label: 'No copyright year in footer', detail: 'Add a copyright year to signal maintenance' });
|
|
@@ -4292,6 +4490,18 @@ function checkVerifiability($, domain) {
|
|
|
4292
4490
|
const contentText = (mainEl.length > 0 ? mainEl.text() : $('body').text() || '').trim();
|
|
4293
4491
|
const sentences = contentText.split(/[.!?]+/).filter(s => s.trim().length > 10);
|
|
4294
4492
|
|
|
4493
|
+
// Visible body text (paragraphs, list items, blockquotes) for attribution
|
|
4494
|
+
// patterns that often span sentence boundaries or live in elements that
|
|
4495
|
+
// are tricky to split on punctuation alone.
|
|
4496
|
+
const bodyTextEls = mainEl.length > 0
|
|
4497
|
+
? mainEl.find('p, li, blockquote, td, dd')
|
|
4498
|
+
: $('p, li, blockquote, td, dd');
|
|
4499
|
+
const bodyTextChunks = [];
|
|
4500
|
+
bodyTextEls.each((_i, el) => {
|
|
4501
|
+
const t = ($(el).text() || '').trim();
|
|
4502
|
+
if (t.length > 0) bodyTextChunks.push(t);
|
|
4503
|
+
});
|
|
4504
|
+
|
|
4295
4505
|
// 14a. External Citation Links (30 pts)
|
|
4296
4506
|
const AUTHORITY_DOMAINS = ['.gov', '.edu', '.org', 'scholar.google', 'pubmed', 'arxiv.org', 'doi.org'];
|
|
4297
4507
|
const externalLinks = mainEl.length > 0 ? mainEl.find('a[href^="http"]') : $('a[href^="http"]');
|
|
@@ -4312,7 +4522,7 @@ function checkVerifiability($, domain) {
|
|
|
4312
4522
|
checks.push({ status: 'pass', label: `Strong citations (${totalExternalLinks} external, ${authorityLinks} authority)`, detail: `${totalExternalLinks} external links including ${authorityLinks} authority sources` });
|
|
4313
4523
|
} else if (totalExternalLinks >= 1) {
|
|
4314
4524
|
score += 15;
|
|
4315
|
-
checks.push({ status: 'warn', label: `Some citations (${totalExternalLinks} external)`, detail: `${totalExternalLinks} external links
|
|
4525
|
+
checks.push({ status: 'warn', label: `Some citations (${totalExternalLinks} external)`, detail: `${totalExternalLinks} external links, add authority sources (.gov, .edu)` });
|
|
4316
4526
|
} else {
|
|
4317
4527
|
score += 5;
|
|
4318
4528
|
checks.push({ status: 'fail', label: 'No external citations', detail: 'Add external links to authoritative sources' });
|
|
@@ -4320,25 +4530,58 @@ function checkVerifiability($, domain) {
|
|
|
4320
4530
|
|
|
4321
4531
|
// 14b. Source Attribution in Text (25 pts)
|
|
4322
4532
|
const SOURCE_ATTRIBUTION_PATTERNS = [
|
|
4323
|
-
/\baccording to\s+[A-Z]
|
|
4324
|
-
/\
|
|
4325
|
-
/\b(
|
|
4326
|
-
/\b(
|
|
4327
|
-
/\b(
|
|
4533
|
+
/\baccording to\s+(?:the\s+|a\s+|an\s+)?[A-Z][\w'.-]*(?:\s+(?:of|for|on|and|the|de|van)\s+)?[A-Z\w'.-]*/,
|
|
4534
|
+
/\b(?:a|an|the|new|recent|latest|major|landmark)?\s*(?:study|report|survey|analysis|paper|whitepaper|brief)\s+(?:by|from|published by)\b/i,
|
|
4535
|
+
/\b(?:research|data|figures|statistics|findings)\s+(?:by|from|of|published by)\b/i,
|
|
4536
|
+
/\b(?:published in|cited in|reported by|noted by|observed by)\b/i,
|
|
4537
|
+
/\b(?:source|data from|based on)\s*:/i,
|
|
4538
|
+
/\b(?:report|study|analysis)\s+(?:by|from)\b/i,
|
|
4539
|
+
/\b[A-Z][\w'.-]+(?:\s+[A-Z][\w'.-]+){0,4}\s+(?:says|states|reports|found|concluded|notes|observed|estimates)\b/,
|
|
4328
4540
|
/\[\d+\]/,
|
|
4329
|
-
/\b(et al\.?|ibid\.?)\b/,
|
|
4541
|
+
/\b(?:et al\.?|ibid\.?)\b/,
|
|
4542
|
+
// Broadened patterns: "as reported by", "as documented in", etc.
|
|
4543
|
+
/\bas\s+(?:reported|noted|stated|cited|documented|shown|described|outlined)\s+(?:by|in|on)\b/i,
|
|
4544
|
+
// "per the WHO", "per CDC"
|
|
4545
|
+
/\bper\s+(?:the\s+)?[A-Z]/,
|
|
4546
|
+
// Possessive: "WHO's data", "CDC's findings"
|
|
4547
|
+
/\b[A-Z][A-Za-z.&'-]+(?:'s|’s)\s+(?:data|report|study|analysis|findings|guidance|recommendations|guidelines)\b/,
|
|
4548
|
+
// Parenthetical citation: "(source: ...)", "(via: ...)"
|
|
4549
|
+
/\((?:source|src|via|cf|see)\s*:\s*[^)]+\)/i,
|
|
4550
|
+
// DOI references
|
|
4551
|
+
/\bdoi:\s*10\.\d+/i,
|
|
4552
|
+
// Numeric brackets variants: "[1, 2]", "[1-3]"
|
|
4553
|
+
/\[\d+(?:[,-]\s*\d+)*\]/,
|
|
4554
|
+
// Author-year: "(Smith, 2023)", "(Smith et al., 2023)", "(Smith and Jones, 2023)"
|
|
4555
|
+
/\([A-Z][a-zA-Z]+(?:\s+(?:et\s+al\.?|and\s+[A-Z][a-zA-Z]+))?,\s*\d{4}[a-z]?\)/,
|
|
4556
|
+
// "<Org> data shows/reveals/indicates/suggests/confirms"
|
|
4557
|
+
/\b[A-Z][\w'.-]+(?:\s+[A-Z][\w'.-]+){0,3}\s+data\s+(?:shows|reveals|indicates|suggests|confirms)\b/,
|
|
4558
|
+
// "<Org> figures/findings show/reveal/indicate"
|
|
4559
|
+
/\b[A-Z][\w'.-]+(?:\s+[A-Z][\w'.-]+){0,3}\s+(?:figures|findings)\s+(?:show|reveal|indicate)\b/,
|
|
4560
|
+
// "in a recent study", "in a landmark report"
|
|
4561
|
+
/\bin\s+(?:a|an)\s+(?:recent|new|landmark|seminal)\s+(?:study|report|survey|paper|analysis)\b/i,
|
|
4562
|
+
// "verified by", "confirmed by", "documented in/by"
|
|
4563
|
+
/\b(?:verified|confirmed)\s+by\b/i,
|
|
4564
|
+
/\bdocumented\s+(?:in|by)\b/i,
|
|
4565
|
+
// Government/regulatory bodies: "Department of Health", "Centers for Disease Control"
|
|
4566
|
+
/\b(?:U\.?S\.?\s+)?(?:Department\s+of|Ministry\s+of|Office\s+of|Bureau\s+of|Centers\s+for|Federal|National|Royal)\s+[A-Z]/,
|
|
4330
4567
|
];
|
|
4331
4568
|
let attrCount = 0;
|
|
4332
4569
|
sentences.forEach(s => {
|
|
4333
4570
|
if (SOURCE_ATTRIBUTION_PATTERNS.some(p => p.test(s))) attrCount++;
|
|
4334
4571
|
});
|
|
4572
|
+
bodyTextChunks.forEach(t => {
|
|
4573
|
+
if (SOURCE_ATTRIBUTION_PATTERNS.some(p => p.test(t))) attrCount++;
|
|
4574
|
+
});
|
|
4335
4575
|
maxScore += 25;
|
|
4336
4576
|
if (attrCount >= 3) {
|
|
4337
4577
|
score += 25;
|
|
4338
4578
|
checks.push({ status: 'pass', label: `Strong source attribution (${attrCount})`, detail: `${attrCount} source attribution patterns detected` });
|
|
4579
|
+
} else if (attrCount >= 2) {
|
|
4580
|
+
score += 18;
|
|
4581
|
+
checks.push({ status: 'pass', label: `Source attribution found (${attrCount})`, detail: `${attrCount} attribution patterns detected` });
|
|
4339
4582
|
} else if (attrCount >= 1) {
|
|
4340
|
-
score +=
|
|
4341
|
-
checks.push({ status: 'warn', label: `Some source attribution (${attrCount})`, detail: `${attrCount} attribution(s)
|
|
4583
|
+
score += 10;
|
|
4584
|
+
checks.push({ status: 'warn', label: `Some source attribution (${attrCount})`, detail: `${attrCount} attribution(s), add more source references` });
|
|
4342
4585
|
} else {
|
|
4343
4586
|
score += 5;
|
|
4344
4587
|
checks.push({ status: 'info', label: 'No source attribution detected', detail: 'Add "according to", "study by", or citation markers' });
|
|
@@ -4577,6 +4820,48 @@ function checkMultimodal($, jsonLdData) {
|
|
|
4577
4820
|
}
|
|
4578
4821
|
|
|
4579
4822
|
// 16b. Figure/Figcaption Usage (25 pts)
|
|
4823
|
+
// Only evaluate coverage against content images. Decorative images (empty
|
|
4824
|
+
// alt, presentation role, callouts, headshots, seals, logos, icons, small
|
|
4825
|
+
// images, content nested in <aside>) are excluded from the denominator.
|
|
4826
|
+
const DECORATIVE_CLASS_HINTS = /(callout|note|highlight|decorative|icon|headshot|avatar|seal|logo|badge|sidebar|bullet|arrow|divider|separator|spacer|pixel|tracking|analytics|placeholder|flag|star|rating)/i;
|
|
4827
|
+
// Filename-style alt text like "img-23.jpg" / "photo.png" indicates a non-descriptive alt.
|
|
4828
|
+
const FILENAME_ALT_RE = /^(?:img|image|photo|picture)?[-_ ]?\d*\.(?:jpg|jpeg|png|gif|svg|webp)$/i;
|
|
4829
|
+
// Tracking pixel hints in src.
|
|
4830
|
+
const TRACKING_SRC_RE = /(?:pixel|beacon|track|analytics)/i;
|
|
4831
|
+
function isDecorativeImage(imgEl) {
|
|
4832
|
+
const $img = $(imgEl);
|
|
4833
|
+
const role = ($img.attr('role') || '').toLowerCase();
|
|
4834
|
+
if (role === 'presentation' || role === 'none') return true;
|
|
4835
|
+
// Explicit decorative attributes.
|
|
4836
|
+
const ariaHidden = ($img.attr('aria-hidden') || '').toLowerCase();
|
|
4837
|
+
if (ariaHidden === 'true') return true;
|
|
4838
|
+
const dataDecorative = ($img.attr('data-decorative') || '').toLowerCase();
|
|
4839
|
+
if (dataDecorative === 'true') return true;
|
|
4840
|
+
const alt = $img.attr('alt');
|
|
4841
|
+
if (alt !== undefined && alt.trim() === '') return true;
|
|
4842
|
+
// Filename-style alt text is non-descriptive and treated as decorative.
|
|
4843
|
+
if (alt !== undefined && FILENAME_ALT_RE.test(alt.trim())) return true;
|
|
4844
|
+
if ($img.closest('aside').length > 0) return true;
|
|
4845
|
+
// Broader ancestor selectors: chrome regions and ad/banner containers.
|
|
4846
|
+
if ($img.closest('header, nav, footer, button, [role="banner"], [role="navigation"], [role="contentinfo"], .ad, .advertisement, .banner').length > 0) return true;
|
|
4847
|
+
const cls = $img.attr('class') || '';
|
|
4848
|
+
if (DECORATIVE_CLASS_HINTS.test(cls)) return true;
|
|
4849
|
+
if ($img.closest(`[class*="callout"], [class*="note"], [class*="highlight"], [class*="decorative"], [class*="seal"], [class*="logo"], [class*="headshot"], [class*="avatar"], [class*="icon"]`).length > 0) return true;
|
|
4850
|
+
const w = parseInt($img.attr('width'), 10);
|
|
4851
|
+
const h = parseInt($img.attr('height'), 10);
|
|
4852
|
+
// Tracking pixel: 1x1 (or 1xN/Nx1) images.
|
|
4853
|
+
if ((Number.isFinite(w) && w === 1) || (Number.isFinite(h) && h === 1)) return true;
|
|
4854
|
+
if (Number.isFinite(w) && w > 0 && w <= 100) return true;
|
|
4855
|
+
if (Number.isFinite(h) && h > 0 && h <= 100) return true;
|
|
4856
|
+
const src = $img.attr('src') || '';
|
|
4857
|
+
if (src && TRACKING_SRC_RE.test(src)) return true;
|
|
4858
|
+
return false;
|
|
4859
|
+
}
|
|
4860
|
+
let contentImageCount = 0;
|
|
4861
|
+
fallbackImages.each((_i, imgEl) => {
|
|
4862
|
+
if (!isDecorativeImage(imgEl)) contentImageCount++;
|
|
4863
|
+
});
|
|
4864
|
+
|
|
4580
4865
|
const mainFigures = $('main figure, article figure, [role="main"] figure');
|
|
4581
4866
|
const fallbackFigures = mainFigures.length > 0 ? mainFigures : $('figure');
|
|
4582
4867
|
let figuresWithCaption = 0;
|
|
@@ -4587,16 +4872,19 @@ function checkMultimodal($, jsonLdData) {
|
|
|
4587
4872
|
if (fallbackImages.length === 0) {
|
|
4588
4873
|
score += 25;
|
|
4589
4874
|
checks.push({ status: 'info', label: 'No images for figure evaluation', detail: 'No images found on page' });
|
|
4875
|
+
} else if (contentImageCount === 0) {
|
|
4876
|
+
score += 25;
|
|
4877
|
+
checks.push({ status: 'info', label: 'Only decorative images detected', detail: 'No content images require figure/figcaption markup' });
|
|
4590
4878
|
} else {
|
|
4591
|
-
const figPct =
|
|
4592
|
-
if (figPct
|
|
4879
|
+
const figPct = Math.round((figuresWithCaption / contentImageCount) * 100);
|
|
4880
|
+
if (figPct >= 50) {
|
|
4593
4881
|
score += 25;
|
|
4594
|
-
checks.push({ status: 'pass', label: `Good figure/caption usage (${figPct}%)`, detail: `${
|
|
4882
|
+
checks.push({ status: 'pass', label: `Good figure/caption usage (${figPct}%)`, detail: `${figuresWithCaption} of ${contentImageCount} content images wrapped in <figure> with <figcaption>` });
|
|
4595
4883
|
} else if (figuresWithCaption > 0) {
|
|
4596
4884
|
score += 12;
|
|
4597
|
-
checks.push({ status: 'warn', label: 'Some figure/caption usage', detail:
|
|
4885
|
+
checks.push({ status: 'warn', label: 'Some figure/caption usage', detail: `${figuresWithCaption} of ${contentImageCount} content images wrapped, extend to remaining content images` });
|
|
4598
4886
|
} else {
|
|
4599
|
-
checks.push({ status: 'info', label: 'No figure/caption usage', detail: 'Wrap images in <figure> with <figcaption> for better context' });
|
|
4887
|
+
checks.push({ status: 'info', label: 'No figure/caption usage', detail: 'Wrap content images in <figure> with <figcaption> for better context' });
|
|
4600
4888
|
}
|
|
4601
4889
|
}
|
|
4602
4890
|
|
|
@@ -5299,11 +5587,13 @@ function calculateGeoScore(data) {
|
|
|
5299
5587
|
total += robotsScore;
|
|
5300
5588
|
maxPossible += 5;
|
|
5301
5589
|
|
|
5302
|
-
// 2. AI crawlers NOT blocked
|
|
5590
|
+
// 2. AI crawlers NOT blocked. Only citation crawlers (real impact on AI
|
|
5591
|
+
// visibility) contribute to the score. Training-crawler blocks are reported
|
|
5592
|
+
// in the detail string for transparency but do not deduct points.
|
|
5303
5593
|
let crawlerScore = 0;
|
|
5304
5594
|
const blocked = data.robotsTxt.blocksCrawlers || {};
|
|
5305
5595
|
const crawlerDetails = [];
|
|
5306
|
-
for (const crawler of
|
|
5596
|
+
for (const crawler of CITATION_CRAWLERS) {
|
|
5307
5597
|
if (blocked[crawler] === false || blocked[crawler] === undefined) {
|
|
5308
5598
|
crawlerScore += 1;
|
|
5309
5599
|
crawlerDetails.push(`${crawler}: allowed`);
|
|
@@ -5311,9 +5601,13 @@ function calculateGeoScore(data) {
|
|
|
5311
5601
|
crawlerDetails.push(`${crawler}: BLOCKED`);
|
|
5312
5602
|
}
|
|
5313
5603
|
}
|
|
5314
|
-
|
|
5604
|
+
for (const crawler of TRAINING_CRAWLERS) {
|
|
5605
|
+
const status = (blocked[crawler] === false || blocked[crawler] === undefined) ? 'allowed' : 'blocked (training-only, informational)';
|
|
5606
|
+
crawlerDetails.push(`${crawler}: ${status}`);
|
|
5607
|
+
}
|
|
5608
|
+
breakdown.aiCrawlerAccess = { score: crawlerScore, max: CITATION_CRAWLERS.length, detail: crawlerDetails.join('; ') };
|
|
5315
5609
|
total += crawlerScore;
|
|
5316
|
-
maxPossible +=
|
|
5610
|
+
maxPossible += CITATION_CRAWLERS.length;
|
|
5317
5611
|
|
|
5318
5612
|
// 3. llms.txt exists (10 pts)
|
|
5319
5613
|
const llmsScore = data.llmsTxt.exists ? 10 : 0;
|
package/src/index.js
CHANGED
|
@@ -1609,7 +1609,7 @@ function bulkHTMLScript() {
|
|
|
1609
1609
|
|
|
1610
1610
|
const server = new McpServer({
|
|
1611
1611
|
name: "glippy-geo",
|
|
1612
|
-
version: "0.
|
|
1612
|
+
version: "0.3.3",
|
|
1613
1613
|
});
|
|
1614
1614
|
|
|
1615
1615
|
// ---------------------------------------------------------------------------
|
|
@@ -2137,15 +2137,16 @@ server.tool(
|
|
|
2137
2137
|
"Analyse multiple domains in parallel and compare their GEO scores side by side. " +
|
|
2138
2138
|
"Returns a comparison table with overall scores, per-category breakdowns, and a ranked summary. " +
|
|
2139
2139
|
"Useful for competitive analysis or auditing a portfolio of sites. " +
|
|
2140
|
+
"Accepts up to 50 domains per call - for larger lists, split them across multiple runs and merge the results. " +
|
|
2140
2141
|
"Requires Pro or Agency tier. " +
|
|
2141
2142
|
"Use output_format='json' to get raw results that can be passed to export_bulk_report.",
|
|
2142
2143
|
{
|
|
2143
2144
|
domains: z
|
|
2144
2145
|
.array(z.string())
|
|
2145
2146
|
.min(2)
|
|
2146
|
-
.max(
|
|
2147
|
+
.max(50, "compare_domains accepts at most 50 domains per call. Split larger lists across multiple runs and merge the results.")
|
|
2147
2148
|
.describe(
|
|
2148
|
-
'List of domains to compare, e.g. ["example.com", "competitor.com"]. Do not include https:// prefix.'
|
|
2149
|
+
'List of 2-50 domains to compare, e.g. ["example.com", "competitor.com"]. Do not include https:// prefix. For more than 50 domains, run multiple times and combine the output.'
|
|
2149
2150
|
),
|
|
2150
2151
|
max_pages: z
|
|
2151
2152
|
.number()
|
|
@@ -2178,15 +2179,22 @@ server.tool(
|
|
|
2178
2179
|
const maxPages = max_pages ?? 10;
|
|
2179
2180
|
const renderMode = render_mode ?? "static";
|
|
2180
2181
|
|
|
2181
|
-
//
|
|
2182
|
-
|
|
2183
|
-
|
|
2184
|
-
|
|
2185
|
-
|
|
2186
|
-
|
|
2187
|
-
|
|
2188
|
-
|
|
2189
|
-
|
|
2182
|
+
// Cap concurrent domain analyses so a 50-domain × 10-page run does not
|
|
2183
|
+
// fan out into 500 simultaneous fetches.
|
|
2184
|
+
const DOMAIN_CONCURRENCY = 10;
|
|
2185
|
+
const results = [];
|
|
2186
|
+
for (let i = 0; i < domains.length; i += DOMAIN_CONCURRENCY) {
|
|
2187
|
+
const batch = domains.slice(i, i + DOMAIN_CONCURRENCY);
|
|
2188
|
+
const batchResults = await Promise.allSettled(
|
|
2189
|
+
batch.map((domain) =>
|
|
2190
|
+
checkGEO(domain, { maxPages, renderMode }).then((result) => ({
|
|
2191
|
+
domain,
|
|
2192
|
+
result,
|
|
2193
|
+
}))
|
|
2194
|
+
)
|
|
2195
|
+
);
|
|
2196
|
+
results.push(...batchResults);
|
|
2197
|
+
}
|
|
2190
2198
|
|
|
2191
2199
|
// JSON output mode - return raw results for use with export_bulk_report
|
|
2192
2200
|
if (output_format === "json") {
|
|
@@ -3085,10 +3093,10 @@ server.tool(
|
|
|
3085
3093
|
domains: z
|
|
3086
3094
|
.array(z.string())
|
|
3087
3095
|
.min(2)
|
|
3088
|
-
.max(
|
|
3096
|
+
.max(50, "export_bulk_report accepts at most 50 domains per call. Split larger lists across multiple runs.")
|
|
3089
3097
|
.optional()
|
|
3090
3098
|
.describe(
|
|
3091
|
-
'Compare
|
|
3099
|
+
'Compare 2-50 domains. E.g. ["example.com", "competitor.com"]. Do not include https://. For more than 50, run multiple times.'
|
|
3092
3100
|
),
|
|
3093
3101
|
urls: z
|
|
3094
3102
|
.array(z.string())
|
|
@@ -3258,14 +3266,20 @@ server.tool(
|
|
|
3258
3266
|
// ------------------------------------------------------------------
|
|
3259
3267
|
if (domains) {
|
|
3260
3268
|
const maxPages = max_pages ?? 10;
|
|
3261
|
-
const
|
|
3262
|
-
|
|
3263
|
-
|
|
3264
|
-
|
|
3265
|
-
|
|
3266
|
-
|
|
3267
|
-
|
|
3268
|
-
|
|
3269
|
+
const DOMAIN_CONCURRENCY = 10;
|
|
3270
|
+
const results = [];
|
|
3271
|
+
for (let i = 0; i < domains.length; i += DOMAIN_CONCURRENCY) {
|
|
3272
|
+
const batch = domains.slice(i, i + DOMAIN_CONCURRENCY);
|
|
3273
|
+
const batchResults = await Promise.allSettled(
|
|
3274
|
+
batch.map((domain) =>
|
|
3275
|
+
checkGEO(domain, { maxPages, renderMode }).then((result) => ({
|
|
3276
|
+
domain,
|
|
3277
|
+
result,
|
|
3278
|
+
}))
|
|
3279
|
+
)
|
|
3280
|
+
);
|
|
3281
|
+
results.push(...batchResults);
|
|
3282
|
+
}
|
|
3269
3283
|
|
|
3270
3284
|
const entries = [];
|
|
3271
3285
|
for (const r of results) {
|