glippy-mcp 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +90 -13
- package/package.json +2 -1
- package/src/chrome-fetcher.js +213 -0
- package/src/geo-checker.js +480 -30
- package/src/index.js +168 -24
package/src/geo-checker.js
CHANGED
|
@@ -9,6 +9,19 @@ import http from 'node:http';
|
|
|
9
9
|
import https from 'node:https';
|
|
10
10
|
import { URL } from 'node:url';
|
|
11
11
|
import * as cheerio from 'cheerio';
|
|
12
|
+
import { chromeFetch } from './chrome-fetcher.js';
|
|
13
|
+
|
|
14
|
+
// Status codes that indicate the server is refusing or stalling a bot-shaped
|
|
15
|
+
// request rather than serving real content. 202 (Amazon) and 400 (Douglas)
|
|
16
|
+
// sit here because in practice those are only returned to non-browser UAs.
|
|
17
|
+
const BOT_BLOCK_STATUS = new Set([202, 400, 401, 403, 407, 429, 503]);
|
|
18
|
+
function looksBotBlocked(res) {
|
|
19
|
+
if (!res) return true;
|
|
20
|
+
if (res.statusCode == null) return true;
|
|
21
|
+
if (BOT_BLOCK_STATUS.has(res.statusCode)) return true;
|
|
22
|
+
if (res.statusCode >= 200 && res.statusCode < 300 && !res.body) return true;
|
|
23
|
+
return false;
|
|
24
|
+
}
|
|
12
25
|
|
|
13
26
|
// ---------------------------------------------------------------------------
|
|
14
27
|
// Constants
|
|
@@ -750,8 +763,11 @@ function detectPageType($, schemaTypes, pathname) {
|
|
|
750
763
|
if (['Article', 'NewsArticle', 'BlogPosting', 'TechArticle'].some((t) => schemaTypes.has(t))) return 'article';
|
|
751
764
|
if (['LocalBusiness', 'Restaurant', 'Store'].some((t) => schemaTypes.has(t))) return 'local-business';
|
|
752
765
|
|
|
753
|
-
// Heuristic: homepage detection
|
|
754
|
-
|
|
766
|
+
// Heuristic: homepage detection (including language/locale-prefixed homepages like /en/, /de-DE/, /nl/)
|
|
767
|
+
// Strip a leading language or locale segment before checking so multilingual
|
|
768
|
+
// sites hosting their homepage at /en/ or /nl-NL/ are not treated as generic.
|
|
769
|
+
const normalizedPath = pathname.replace(/^\/[a-z]{2}(?:[-_][a-z]{2,3})?\/?$/i, '/');
|
|
770
|
+
if (normalizedPath === '/' || normalizedPath === '/index.html' || normalizedPath === '/index.php' || normalizedPath === '') return 'homepage';
|
|
755
771
|
|
|
756
772
|
// Heuristic: FAQ page via DOM
|
|
757
773
|
const faqIndicators = $('[class*="faq"], [id*="faq"], details, [class*="accordion"]');
|
|
@@ -1439,7 +1455,7 @@ function checkAccessibility($) {
|
|
|
1439
1455
|
const unlabeledInputList = [];
|
|
1440
1456
|
inputs.each((_, el) => {
|
|
1441
1457
|
const id = $(el).attr('id');
|
|
1442
|
-
const hasLabel = id && $(`label[for="${id}"]`).length > 0;
|
|
1458
|
+
const hasLabel = id && $(`label[for="${id.replace(/(["\\])/g, '\\$1')}"]`).length > 0;
|
|
1443
1459
|
const hasAriaLabel = $(el).attr('aria-label') || $(el).attr('aria-labelledby');
|
|
1444
1460
|
const wrappedInLabel = $(el).closest('label').length > 0;
|
|
1445
1461
|
const hasPlaceholder = $(el).attr('placeholder');
|
|
@@ -1885,6 +1901,63 @@ function checkMachineReadability($, robotsTxtData, llmsTxtData, responseHeaders)
|
|
|
1885
1901
|
return { checks, score: maxScore > 0 ? Math.round((score / maxScore) * 100) : 0, category: 'Machine Readability' };
|
|
1886
1902
|
}
|
|
1887
1903
|
|
|
1904
|
+
// ---------------------------------------------------------------------------
|
|
1905
|
+
// Trust signal evidence extractor
|
|
1906
|
+
// ---------------------------------------------------------------------------
|
|
1907
|
+
|
|
1908
|
+
/**
|
|
1909
|
+
* Extract raw nav/header/footer links plus language signals. Hardcoded pattern
|
|
1910
|
+
* lists cannot keep up with ~100 languages and typos; instead we surface the
|
|
1911
|
+
* raw anchor text + href so the calling LLM (or downstream consumer) can
|
|
1912
|
+
* classify trust signals (about / contact / legal / imprint / cookies)
|
|
1913
|
+
* semantically in whatever language the site uses.
|
|
1914
|
+
*
|
|
1915
|
+
* @param {cheerio.CheerioAPI} $
|
|
1916
|
+
* @returns {{
|
|
1917
|
+
* htmlLang: string|null,
|
|
1918
|
+
* hreflangs: string[],
|
|
1919
|
+
* navLinks: Array<{href: string, text: string, rel: string|null}>,
|
|
1920
|
+
* footerLinks: Array<{href: string, text: string, rel: string|null}>,
|
|
1921
|
+
* }}
|
|
1922
|
+
*/
|
|
1923
|
+
function extractTrustSignals($) {
|
|
1924
|
+
const PER_LOCATION_LIMIT = 80;
|
|
1925
|
+
const MAX_TEXT_LEN = 120;
|
|
1926
|
+
|
|
1927
|
+
function collect(selector) {
|
|
1928
|
+
const out = [];
|
|
1929
|
+
const seen = new Set();
|
|
1930
|
+
$(selector).find('a[href]').each((_, el) => {
|
|
1931
|
+
if (out.length >= PER_LOCATION_LIMIT) return false;
|
|
1932
|
+
const $el = $(el);
|
|
1933
|
+
const href = ($el.attr('href') || '').trim();
|
|
1934
|
+
if (!href || href.startsWith('#') || href.toLowerCase().startsWith('javascript:')) return;
|
|
1935
|
+
const text = $el.text().trim().replace(/\s+/g, ' ').slice(0, MAX_TEXT_LEN);
|
|
1936
|
+
const key = `${href}|${text}`;
|
|
1937
|
+
if (seen.has(key)) return;
|
|
1938
|
+
seen.add(key);
|
|
1939
|
+
out.push({ href, text, rel: $el.attr('rel') || null });
|
|
1940
|
+
});
|
|
1941
|
+
return out;
|
|
1942
|
+
}
|
|
1943
|
+
|
|
1944
|
+
const navLinks = collect('header, nav, [role="navigation"], [class*="menu" i], [class*="navigation" i], [id*="menu" i], [id*="nav" i]');
|
|
1945
|
+
const footerLinks = collect('footer, [role="contentinfo"], [class*="footer" i], [id*="footer" i]');
|
|
1946
|
+
|
|
1947
|
+
const hreflangs = [];
|
|
1948
|
+
$('link[rel="alternate"][hreflang]').each((_, el) => {
|
|
1949
|
+
const hl = $(el).attr('hreflang');
|
|
1950
|
+
if (hl) hreflangs.push(hl);
|
|
1951
|
+
});
|
|
1952
|
+
|
|
1953
|
+
return {
|
|
1954
|
+
htmlLang: $('html').attr('lang') || null,
|
|
1955
|
+
hreflangs,
|
|
1956
|
+
navLinks,
|
|
1957
|
+
footerLinks,
|
|
1958
|
+
};
|
|
1959
|
+
}
|
|
1960
|
+
|
|
1888
1961
|
// ---------------------------------------------------------------------------
|
|
1889
1962
|
// CHECK CATEGORY 7: Entity & Authority
|
|
1890
1963
|
// ---------------------------------------------------------------------------
|
|
@@ -2464,20 +2537,133 @@ function checkEntity($, jsonLdData) {
|
|
|
2464
2537
|
checks.push({ status: 'info', label: 'No About/Contact page links detected', detail: 'Link to organizational info for E-E-A-T' });
|
|
2465
2538
|
}
|
|
2466
2539
|
|
|
2467
|
-
// Privacy / Terms links (trust signals)
|
|
2468
|
-
|
|
2469
|
-
|
|
2470
|
-
|
|
2471
|
-
const
|
|
2472
|
-
|
|
2473
|
-
|
|
2540
|
+
// Privacy / Terms / Imprint / Cookies links (trust signals, multi-language)
|
|
2541
|
+
// Hardcoded patterns are a fallback heuristic; the extractTrustSignals
|
|
2542
|
+
// evidence payload on the analysis result lets LLM callers reclassify
|
|
2543
|
+
// semantically in any language.
|
|
2544
|
+
const privacyPatterns = [
|
|
2545
|
+
// English
|
|
2546
|
+
'privacy', 'privacy-policy',
|
|
2547
|
+
// Latin-alphabet European languages
|
|
2548
|
+
'datenschutz', 'privatsphaere', 'privatsphare',
|
|
2549
|
+
'privacidad', 'politica-de-privacidad',
|
|
2550
|
+
'privacidade', 'politica-de-privacidade',
|
|
2551
|
+
'confidentialite', 'politique-de-confidentialite', 'vie-privee',
|
|
2552
|
+
'riservatezza', 'privacy-italia',
|
|
2553
|
+
'privacybeleid', 'privacyverklaring',
|
|
2554
|
+
'integritet', 'integritetspolicy',
|
|
2555
|
+
'personvern',
|
|
2556
|
+
'tietosuoja', 'yksityisyys',
|
|
2557
|
+
'persondata', 'fortrolighed',
|
|
2558
|
+
'adatvedelem',
|
|
2559
|
+
'prywatnosc', 'polityka-prywatnosci',
|
|
2560
|
+
'soukromi', 'ochrana-osobnich-udaju',
|
|
2561
|
+
'ochrana-osobnych-udajov',
|
|
2562
|
+
'confidentialitate',
|
|
2563
|
+
'poverljivost', 'privatnost',
|
|
2564
|
+
'zasebnost',
|
|
2565
|
+
'privatesia', 'privatnost-hr',
|
|
2566
|
+
'konfidentsialnost', 'privatnost-ba',
|
|
2567
|
+
'gizlilik',
|
|
2568
|
+
'privatumas', 'privatuma',
|
|
2569
|
+
'yasslilik',
|
|
2570
|
+
// Romanized non-Latin
|
|
2571
|
+
'konfidentsialnost', 'konfidentsialnost-ua', 'konfidentsialnist',
|
|
2572
|
+
'idiotikotita', 'aporrito', 'prostasia-dedomenon',
|
|
2573
|
+
'puraibashi', 'puraibasi-porisi',
|
|
2574
|
+
'geinsajeongbobo', 'gaeinjeongbo',
|
|
2575
|
+
'yinsi', 'yinsi-zhengce',
|
|
2576
|
+
'khasusiyat', 'khososi',
|
|
2577
|
+
'harimiyat',
|
|
2578
|
+
'niji-gopaniyata', 'gopaniyata',
|
|
2579
|
+
'gopniyata',
|
|
2580
|
+
'kerahasiaan', 'privasi',
|
|
2581
|
+
'quyen-rieng-tu', 'bao-mat',
|
|
2582
|
+
'khwam-pen-suanto', 'nayobai-khwampensuntu',
|
|
2583
|
+
];
|
|
2584
|
+
const termsPatterns = [
|
|
2585
|
+
// English
|
|
2586
|
+
'terms', 'terms-of-service', 'terms-of-use', 'terms-conditions', 'tos',
|
|
2587
|
+
// Latin-alphabet European languages
|
|
2588
|
+
'agb', 'nutzungsbedingungen', 'geschaeftsbedingungen',
|
|
2589
|
+
'condiciones', 'terminos', 'terminos-y-condiciones', 'condiciones-de-uso',
|
|
2590
|
+
'termos', 'termos-de-uso', 'termos-de-servico',
|
|
2591
|
+
'conditions-generales', 'cgu', 'cgv', 'mentions-contrat',
|
|
2592
|
+
'condizioni', 'termini', 'termini-e-condizioni',
|
|
2593
|
+
'voorwaarden', 'algemene-voorwaarden', 'gebruiksvoorwaarden',
|
|
2594
|
+
'villkor', 'anvandarvillkor', 'allmanna-villkor',
|
|
2595
|
+
'brukervilkar', 'vilkar',
|
|
2596
|
+
'kayttoehdot', 'ehdot',
|
|
2597
|
+
'betingelser', 'vilkaar', 'handelsbetingelser',
|
|
2598
|
+
'szerzodesi-feltetelek', 'felhasznalasi-feltetelek',
|
|
2599
|
+
'regulamin', 'warunki',
|
|
2600
|
+
'podminky', 'vseobecne-obchodni-podminky', 'obchodni-podminky',
|
|
2601
|
+
'obchodne-podmienky',
|
|
2602
|
+
'termeni-si-conditii', 'termeni',
|
|
2603
|
+
'uslovi', 'uvjeti', 'pogoji',
|
|
2604
|
+
'kosullar', 'kullanim-kosullari',
|
|
2605
|
+
'salygos', 'naudojimo-salygos',
|
|
2606
|
+
'noteikumi',
|
|
2607
|
+
'kasutustingimused',
|
|
2608
|
+
// Romanized non-Latin
|
|
2609
|
+
'usloviya', 'usloviya-ispolzovaniya', 'pravila',
|
|
2610
|
+
'umovy', 'pravyla',
|
|
2611
|
+
'oroi', 'oroi-xrisis',
|
|
2612
|
+
'riyoukiyaku', 'riyou-kiyaku', 'kiyaku',
|
|
2613
|
+
'iyong-yakgwan', 'yakgwan',
|
|
2614
|
+
'tiaokuan', 'fuwu-tiaokuan', 'shiyong-tiaokuan',
|
|
2615
|
+
'shuruth', 'shuroot-alistikhdam',
|
|
2616
|
+
'sharayit-estefadeh', 'sharayet',
|
|
2617
|
+
'niyam-shartein', 'shartein',
|
|
2618
|
+
'sharth-o',
|
|
2619
|
+
'ketentuan', 'syarat-ketentuan',
|
|
2620
|
+
'dieu-khoan', 'dieu-khoan-su-dung',
|
|
2621
|
+
'khoapkamnot', 'ngeuankhai-kan-chai',
|
|
2622
|
+
];
|
|
2623
|
+
const imprintPatterns = [
|
|
2624
|
+
// Legally required in DE/AT/CH, common across DACH + EU
|
|
2625
|
+
'impressum', 'imprint', 'mentions-legales', 'aviso-legal',
|
|
2626
|
+
'note-legali', 'colofon', 'colophon', 'wettelijke-vermelding',
|
|
2627
|
+
'juridisk-information', 'oikeudellinen-huomautus',
|
|
2628
|
+
'aviso-legal-pt', 'noticia-legal',
|
|
2629
|
+
'pravni-udaje', 'pravne-informacie',
|
|
2630
|
+
'yasal-bildirim', 'yasal-uyari',
|
|
2631
|
+
'informacje-prawne',
|
|
2632
|
+
'hukuki-bilgiler',
|
|
2633
|
+
'impresum',
|
|
2634
|
+
];
|
|
2635
|
+
const cookiePatterns = [
|
|
2636
|
+
'cookie', 'cookies', 'cookiebeleid', 'cookie-policy',
|
|
2637
|
+
'politique-cookies', 'politica-cookies', 'politica-de-cookies',
|
|
2638
|
+
'cookierichtlinie', 'cookie-einstellungen',
|
|
2639
|
+
'kekse', 'cookie-instellingen',
|
|
2640
|
+
'soubory-cookie', 'sukromie-cookie',
|
|
2641
|
+
'cerezler', 'gizlilik-cerezler',
|
|
2642
|
+
'pliki-cookie',
|
|
2643
|
+
'fichiers-cookie',
|
|
2644
|
+
'kukit',
|
|
2645
|
+
];
|
|
2646
|
+
const buildSelector = (patterns) => patterns.map((p) => `a[href*="${p}" i]`).join(', ');
|
|
2647
|
+
const privacyLink = $(buildSelector(privacyPatterns));
|
|
2648
|
+
const termsLink = $(buildSelector(termsPatterns));
|
|
2649
|
+
const imprintLink = $(buildSelector(imprintPatterns));
|
|
2650
|
+
const cookieLink = $(buildSelector(cookiePatterns));
|
|
2474
2651
|
|
|
2475
2652
|
maxScore += 5;
|
|
2476
|
-
|
|
2653
|
+
const legalSignals = [];
|
|
2654
|
+
if (privacyLink.length > 0) legalSignals.push('privacy');
|
|
2655
|
+
if (termsLink.length > 0) legalSignals.push('terms');
|
|
2656
|
+
if (imprintLink.length > 0) legalSignals.push('imprint');
|
|
2657
|
+
if (cookieLink.length > 0) legalSignals.push('cookies');
|
|
2658
|
+
|
|
2659
|
+
if (legalSignals.length >= 2) {
|
|
2477
2660
|
score += 5;
|
|
2478
|
-
checks.push({ status: 'pass', label:
|
|
2661
|
+
checks.push({ status: 'pass', label: `Legal pages linked (${legalSignals.length})`, detail: `Detected: ${legalSignals.join(', ')}` });
|
|
2662
|
+
} else if (legalSignals.length === 1) {
|
|
2663
|
+
score += 3;
|
|
2664
|
+
checks.push({ status: 'warn', label: `Only one legal page linked (${legalSignals[0]})`, detail: 'Add the others (privacy, terms, imprint, cookies) for full trust signals. Heuristic may miss non-Latin scripts — check evidence payload.' });
|
|
2479
2665
|
} else {
|
|
2480
|
-
checks.push({ status: 'info', label: 'No
|
|
2666
|
+
checks.push({ status: 'info', label: 'No legal page links detected by heuristic', detail: 'If the site is non-English, verify via the footerLinks evidence payload before treating as missing.' });
|
|
2481
2667
|
}
|
|
2482
2668
|
|
|
2483
2669
|
// E-E-A-T Experience Signals (10 pts)
|
|
@@ -2539,7 +2725,7 @@ function checkEntity($, jsonLdData) {
|
|
|
2539
2725
|
const hasPhone = /(\+?\d{1,3}[-.\s]?)?\(?\d{2,4}\)?[-.\s]?\d{3,4}[-.\s]?\d{3,4}/.test(bodyText);
|
|
2540
2726
|
const hasEmail = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z]{2,}\b/i.test(bodyText);
|
|
2541
2727
|
const hasAddress = $('[itemprop="address"], [class*="address"], address').length > 0;
|
|
2542
|
-
const hasContactPage =
|
|
2728
|
+
const hasContactPage = contactLink.length > 0;
|
|
2543
2729
|
const contactSignals = (hasPhone ? 1 : 0) + (hasEmail ? 1 : 0) + (hasAddress ? 1 : 0) + (hasContactPage ? 1 : 0);
|
|
2544
2730
|
maxScore += 5;
|
|
2545
2731
|
if (contactSignals >= 3) {
|
|
@@ -2820,6 +3006,10 @@ function checkPerformance($) {
|
|
|
2820
3006
|
// CHECK CATEGORY 10: Agent Interactivity (WebMCP + UCP)
|
|
2821
3007
|
// ---------------------------------------------------------------------------
|
|
2822
3008
|
|
|
3009
|
+
// LATEST_UCP_VERSION: gating threshold for 2026-04-08 spec additions
|
|
3010
|
+
// (signing_keys, order webhook_url, etc. become required at this version).
|
|
3011
|
+
const LATEST_UCP_VERSION = '2026-04-08';
|
|
3012
|
+
|
|
2823
3013
|
function checkWebMCP($, pageType, ucpData) {
|
|
2824
3014
|
const checks = [];
|
|
2825
3015
|
let score = 0;
|
|
@@ -2985,7 +3175,7 @@ function checkWebMCP($, pageType, ucpData) {
|
|
|
2985
3175
|
const name = input.attr('name');
|
|
2986
3176
|
const type = input.attr('type');
|
|
2987
3177
|
const id = input.attr('id');
|
|
2988
|
-
const label = id ? $(`label[for="${id}"]`).length > 0 : false;
|
|
3178
|
+
const label = id ? $(`label[for="${id.replace(/(["\\])/g, '\\$1')}"]`).length > 0 : false;
|
|
2989
3179
|
const ariaLabel = input.attr('aria-label');
|
|
2990
3180
|
const placeholder = input.attr('placeholder');
|
|
2991
3181
|
|
|
@@ -3164,6 +3354,40 @@ function checkWebMCP($, pageType, ucpData) {
|
|
|
3164
3354
|
const capabilities = capsArray; // Already normalized above
|
|
3165
3355
|
const transportKeys = ['rest', 'mcp', 'a2a', 'embedded'];
|
|
3166
3356
|
|
|
3357
|
+
// UCP CHECK 2.5: Cache Headers (only when caller passed response headers)
|
|
3358
|
+
const ucpHeaders = ucpData && ucpData.headers ? ucpData.headers : null;
|
|
3359
|
+
if (ucpHeaders) {
|
|
3360
|
+
const headerLookup = (n) => {
|
|
3361
|
+
const lower = n.toLowerCase();
|
|
3362
|
+
for (const k of Object.keys(ucpHeaders)) {
|
|
3363
|
+
if (k.toLowerCase() === lower) return String(ucpHeaders[k] || '');
|
|
3364
|
+
}
|
|
3365
|
+
return '';
|
|
3366
|
+
};
|
|
3367
|
+
const ct = headerLookup('content-type').toLowerCase();
|
|
3368
|
+
const cc = headerLookup('cache-control').toLowerCase();
|
|
3369
|
+
const ctOk = ct.startsWith('application/json');
|
|
3370
|
+
const ccTokens = cc.split(',').map(s => s.trim());
|
|
3371
|
+
const hasPublic = ccTokens.includes('public');
|
|
3372
|
+
const hasBadDirective = ccTokens.some(t => t === 'private' || t === 'no-store' || t === 'no-cache');
|
|
3373
|
+
const maxAgeMatch = cc.match(/max-age=(\d+)/);
|
|
3374
|
+
const maxAge = maxAgeMatch ? parseInt(maxAgeMatch[1], 10) : -1;
|
|
3375
|
+
const ccOk = hasPublic && !hasBadDirective && maxAge >= 60;
|
|
3376
|
+
maxScore += 5;
|
|
3377
|
+
if (ctOk && ccOk) {
|
|
3378
|
+
score += 5;
|
|
3379
|
+
checks.push({ status: 'pass', label: 'UCP profile cache headers OK', detail: `Content-Type application/json with Cache-Control: public, max-age=${maxAge}` });
|
|
3380
|
+
} else {
|
|
3381
|
+
score += 2;
|
|
3382
|
+
const issues = [];
|
|
3383
|
+
if (!ctOk) issues.push(`content-type "${ct || 'missing'}" (expected application/json)`);
|
|
3384
|
+
if (!hasPublic) issues.push('cache-control missing "public"');
|
|
3385
|
+
if (hasBadDirective) issues.push('cache-control contains private/no-store/no-cache');
|
|
3386
|
+
if (maxAge < 60) issues.push(`max-age=${maxAge >= 0 ? maxAge : 'missing'} (expected >=60)`);
|
|
3387
|
+
checks.push({ status: 'warn', label: 'UCP profile cache headers need attention', detail: issues.slice(0, 3).join('; '), found: issues });
|
|
3388
|
+
}
|
|
3389
|
+
}
|
|
3390
|
+
|
|
3167
3391
|
// UCP CHECK 2: Profile Completeness
|
|
3168
3392
|
let completenessIssues = [];
|
|
3169
3393
|
if (!versionDatePattern.test(version)) completenessIssues.push('version not date-formatted (expected YYYY-MM-DD)');
|
|
@@ -3193,25 +3417,94 @@ function checkWebMCP($, pageType, ucpData) {
|
|
|
3193
3417
|
checks.push({ status: 'warn', label: `UCP profile has ${completenessIssues.length} issue(s)`, detail: completenessIssues.slice(0, 3).join('; '), found: completenessIssues.slice(0, 5) });
|
|
3194
3418
|
}
|
|
3195
3419
|
|
|
3196
|
-
// UCP CHECK 3: Capability Coverage
|
|
3420
|
+
// UCP CHECK 3: Capability Coverage (synced with extension processUCPProfile)
|
|
3197
3421
|
const capNames = capabilities.map(c => c.name || '');
|
|
3198
3422
|
const coreCapabilities = {
|
|
3199
3423
|
'dev.ucp.shopping.checkout': 'Checkout',
|
|
3200
3424
|
'dev.ucp.shopping.identity_linking': 'Identity Linking',
|
|
3201
3425
|
'dev.ucp.shopping.order': 'Order Management',
|
|
3426
|
+
'dev.ucp.shopping.cart': 'Cart',
|
|
3202
3427
|
};
|
|
3428
|
+
// Catalog has sub-capabilities; credit if any match the catalog prefix.
|
|
3429
|
+
const hasCatalog = capNames.some(n => n.startsWith('dev.ucp.shopping.catalog'));
|
|
3203
3430
|
const presentCore = Object.keys(coreCapabilities).filter(c => capNames.includes(c));
|
|
3204
|
-
|
|
3431
|
+
if (hasCatalog) presentCore.push('dev.ucp.shopping.catalog');
|
|
3432
|
+
const missingEntries = Object.entries(coreCapabilities).filter(([k]) => !capNames.includes(k));
|
|
3433
|
+
if (!hasCatalog) missingEntries.push(['dev.ucp.shopping.catalog', 'Catalog']);
|
|
3434
|
+
const missingCore = missingEntries.map(([, v]) => v);
|
|
3435
|
+
const totalCore = Object.keys(coreCapabilities).length + 1; // +1 for Catalog
|
|
3205
3436
|
|
|
3206
3437
|
maxScore += 10;
|
|
3207
|
-
if (presentCore.length ===
|
|
3438
|
+
if (presentCore.length === totalCore) {
|
|
3208
3439
|
score += 10;
|
|
3209
|
-
checks.push({ status: 'pass', label:
|
|
3440
|
+
checks.push({ status: 'pass', label: `All ${totalCore} core UCP capabilities declared`, detail: 'Checkout, Identity Linking, Order Management, Cart, and Catalog' });
|
|
3210
3441
|
} else if (presentCore.length > 0) {
|
|
3211
3442
|
score += 5;
|
|
3212
|
-
checks.push({ status: 'warn', label: `${presentCore.length}
|
|
3443
|
+
checks.push({ status: 'warn', label: `${presentCore.length}/${totalCore} core UCP capabilities declared`, detail: `Missing: ${missingCore.join(', ')}`, found: presentCore });
|
|
3213
3444
|
} else {
|
|
3214
|
-
checks.push({ status: 'info', label: 'No core UCP capabilities declared', detail: 'Consider adding checkout, identity_linking, and
|
|
3445
|
+
checks.push({ status: 'info', label: 'No core UCP capabilities declared', detail: 'Consider adding checkout, identity_linking, order, cart, and catalog capabilities' });
|
|
3446
|
+
}
|
|
3447
|
+
|
|
3448
|
+
// 2026-04-08 spec gating: declared version >= 2026-04-08?
|
|
3449
|
+
const isV2 = versionDatePattern.test(version) && version >= LATEST_UCP_VERSION;
|
|
3450
|
+
|
|
3451
|
+
// UCP CHECK 3.5: Signing Keys (RFC 9421 ES256, mandatory in 2026-04-08)
|
|
3452
|
+
const signingKeys = Array.isArray(profile.signing_keys) ? profile.signing_keys : null;
|
|
3453
|
+
if (signingKeys && signingKeys.length > 0) {
|
|
3454
|
+
const malformed = signingKeys.filter(k => !k || !k.kid || k.kty !== 'EC' || k.crv !== 'P-256' || !k.x || !k.y);
|
|
3455
|
+
maxScore += 10;
|
|
3456
|
+
if (malformed.length === 0) {
|
|
3457
|
+
score += 10;
|
|
3458
|
+
checks.push({ status: 'pass', label: `${signingKeys.length} UCP signing key(s) declared`, detail: 'Profile advertises EC P-256 JWK(s) for RFC 9421 message signing' });
|
|
3459
|
+
} else {
|
|
3460
|
+
score += 3;
|
|
3461
|
+
checks.push({ status: 'warn', label: `${malformed.length}/${signingKeys.length} UCP signing key(s) malformed`, detail: 'Each key must have kid, kty=EC, crv=P-256, x, y', found: malformed.map(k => k && k.kid ? k.kid : '<missing kid>').slice(0, 5) });
|
|
3462
|
+
}
|
|
3463
|
+
} else if (isV2) {
|
|
3464
|
+
maxScore += 10;
|
|
3465
|
+
score += 3;
|
|
3466
|
+
checks.push({ status: 'warn', label: 'UCP signing keys missing', detail: 'UCP 2026-04-08 mandates RFC 9421 ES256 signatures; profile must publish signing_keys[]' });
|
|
3467
|
+
} else {
|
|
3468
|
+
checks.push({ status: 'info', label: 'UCP signing keys not declared', detail: 'UCP 2026-04-08 will require signing_keys[]; consider adding for forward compatibility' });
|
|
3469
|
+
}
|
|
3470
|
+
|
|
3471
|
+
// UCP CHECK 3.6: Catalog Sub-Capability Coverage
|
|
3472
|
+
const catalogCaps = capabilities.filter(c => (c.name || '').startsWith('dev.ucp.shopping.catalog'));
|
|
3473
|
+
if (catalogCaps.length > 0) {
|
|
3474
|
+
const subs = ['search', 'lookup', 'get_product'];
|
|
3475
|
+
const presentSubs = subs.filter(s => catalogCaps.some(c => c.name === `dev.ucp.shopping.catalog.${s}` || c.name === 'dev.ucp.shopping.catalog'));
|
|
3476
|
+
const missingSubs = subs.filter(s => !presentSubs.includes(s));
|
|
3477
|
+
maxScore += 5;
|
|
3478
|
+
if (missingSubs.length === 0) {
|
|
3479
|
+
score += 5;
|
|
3480
|
+
checks.push({ status: 'pass', label: 'Catalog capability fully declared', detail: 'search, lookup, and get_product sub-capabilities all present' });
|
|
3481
|
+
} else {
|
|
3482
|
+
score += 3;
|
|
3483
|
+
checks.push({ status: 'warn', label: `Catalog declared with ${presentSubs.length}/${subs.length} sub-capabilities`, detail: `Missing: ${missingSubs.join(', ')}`, found: missingSubs });
|
|
3484
|
+
}
|
|
3485
|
+
} else if (capNames.includes('dev.ucp.shopping.cart')) {
|
|
3486
|
+
checks.push({ status: 'info', label: 'Cart declared without Catalog', detail: 'Consider adding catalog.search / catalog.lookup so agents can discover products before adding to cart' });
|
|
3487
|
+
}
|
|
3488
|
+
|
|
3489
|
+
// UCP CHECK 3.7: Order Webhook URL (required in 2026-04-08)
|
|
3490
|
+
const orderCap = capabilities.find(c => c.name === 'dev.ucp.shopping.order');
|
|
3491
|
+
if (orderCap) {
|
|
3492
|
+
const webhookUrl = orderCap.config && orderCap.config.webhook_url;
|
|
3493
|
+
if (webhookUrl && typeof webhookUrl === 'string' && webhookUrl.startsWith('https://')) {
|
|
3494
|
+
maxScore += 5;
|
|
3495
|
+
score += 5;
|
|
3496
|
+
checks.push({ status: 'pass', label: 'Order webhook URL declared', detail: 'config.webhook_url is HTTPS, enabling real-time order updates' });
|
|
3497
|
+
} else if (webhookUrl) {
|
|
3498
|
+
maxScore += 5;
|
|
3499
|
+
score += 2;
|
|
3500
|
+
checks.push({ status: 'warn', label: 'Order webhook URL is not HTTPS', detail: 'config.webhook_url must use https://' });
|
|
3501
|
+
} else if (isV2) {
|
|
3502
|
+
maxScore += 5;
|
|
3503
|
+
score += 2;
|
|
3504
|
+
checks.push({ status: 'warn', label: 'Order webhook URL missing', detail: 'UCP 2026-04-08 requires config.webhook_url on the order capability for real-time updates' });
|
|
3505
|
+
} else {
|
|
3506
|
+
checks.push({ status: 'info', label: 'Order webhook URL not declared', detail: 'UCP 2026-04-08 will require config.webhook_url on order capabilities' });
|
|
3507
|
+
}
|
|
3215
3508
|
}
|
|
3216
3509
|
|
|
3217
3510
|
// UCP CHECK 4: Extension Support
|
|
@@ -3238,6 +3531,20 @@ function checkWebMCP($, pageType, ucpData) {
|
|
|
3238
3531
|
}
|
|
3239
3532
|
}
|
|
3240
3533
|
|
|
3534
|
+
// Cart-specific transport recommendation (2026-04-08 adds embedded binding).
|
|
3535
|
+
if (capNames.includes('dev.ucp.shopping.cart')) {
|
|
3536
|
+
const cartTransports = new Set(allTransports.map(t => t.transport));
|
|
3537
|
+
const cartRecommended = cartTransports.has('embedded') || cartTransports.has('mcp');
|
|
3538
|
+
maxScore += 3;
|
|
3539
|
+
if (cartRecommended) {
|
|
3540
|
+
score += 3;
|
|
3541
|
+
checks.push({ status: 'pass', label: 'Cart capability has embedded or MCP transport', detail: 'UCP 2026-04-08 recommends embedded or MCP transport for cart capability' });
|
|
3542
|
+
} else {
|
|
3543
|
+
score += 1;
|
|
3544
|
+
checks.push({ status: 'warn', label: 'Cart capability missing embedded/MCP transport', detail: 'UCP 2026-04-08 recommends adding an embedded transport binding for cart so agents can hand off to checkout' });
|
|
3545
|
+
}
|
|
3546
|
+
}
|
|
3547
|
+
|
|
3241
3548
|
if (allTransports.length > 1) {
|
|
3242
3549
|
maxScore += 10;
|
|
3243
3550
|
const httpsTransports = allTransports.filter(t => (t.endpoint || '').startsWith('https://') && (t.schema || '').startsWith('https://'));
|
|
@@ -3284,8 +3591,8 @@ function checkWebMCP($, pageType, ucpData) {
|
|
|
3284
3591
|
// UCP CHECK 8: Page-Type-Specific Recommendations
|
|
3285
3592
|
const commercePageTypes = ['product', 'ecommerce', 'saas', 'local-business'];
|
|
3286
3593
|
const ucpRecommendations = {
|
|
3287
|
-
'product': 'Should have checkout +
|
|
3288
|
-
'ecommerce': 'Should have checkout +
|
|
3594
|
+
'product': 'Should have checkout + cart + catalog + fulfillment capabilities',
|
|
3595
|
+
'ecommerce': 'Should have checkout + cart + catalog; consider identity linking for personalization',
|
|
3289
3596
|
'saas': 'Should have checkout for subscription/trial flows',
|
|
3290
3597
|
'local-business': 'Consider checkout for booking/purchasing services',
|
|
3291
3598
|
'homepage': 'UCP profile should be accessible at domain root /.well-known/ucp',
|
|
@@ -3294,6 +3601,51 @@ function checkWebMCP($, pageType, ucpData) {
|
|
|
3294
3601
|
if (ucpRecommendations[pageType]) {
|
|
3295
3602
|
checks.push({ status: 'pass', label: `UCP detected on ${pageType} page`, detail: ucpRecommendations[pageType] });
|
|
3296
3603
|
}
|
|
3604
|
+
|
|
3605
|
+
// UCP CHECK 9: Disclosure / Eligibility / Signals / Delegation feature advertisement (info-only).
|
|
3606
|
+
const advertisedFeatures = new Set();
|
|
3607
|
+
for (const cap of capabilities) {
|
|
3608
|
+
const feats = Array.isArray(cap.features) ? cap.features : [];
|
|
3609
|
+
for (const f of feats) {
|
|
3610
|
+
if (typeof f === 'string') advertisedFeatures.add(f);
|
|
3611
|
+
}
|
|
3612
|
+
}
|
|
3613
|
+
const trackedFeatures = ['eligibility_claims', 'signals', 'disclosure_messages', 'link_delegation'];
|
|
3614
|
+
const presentFeats = trackedFeatures.filter(f => advertisedFeatures.has(f));
|
|
3615
|
+
if (presentFeats.length > 0) {
|
|
3616
|
+
checks.push({ status: 'info', label: `${presentFeats.length} UCP feature(s) advertised`, detail: `Capabilities advertise: ${presentFeats.join(', ')}`, found: presentFeats });
|
|
3617
|
+
} else {
|
|
3618
|
+
checks.push({ status: 'info', label: 'No optional UCP features advertised', detail: 'eligibility_claims, signals, disclosure_messages, and link_delegation are optional but improve agent trust negotiation' });
|
|
3619
|
+
}
|
|
3620
|
+
|
|
3621
|
+
// UCP CHECK 10: Spec Version Currency (info-only).
|
|
3622
|
+
if (versionDatePattern.test(version)) {
|
|
3623
|
+
if (version === LATEST_UCP_VERSION) {
|
|
3624
|
+
checks.push({ status: 'info', label: 'UCP version is current', detail: `Profile declares the latest known UCP version (${version})` });
|
|
3625
|
+
} else if (version < LATEST_UCP_VERSION) {
|
|
3626
|
+
checks.push({ status: 'info', label: 'UCP version is older than latest', detail: `Profile declares ${version}; latest known is ${LATEST_UCP_VERSION}` });
|
|
3627
|
+
} else {
|
|
3628
|
+
checks.push({ status: 'info', label: 'UCP version newer than checker knows', detail: `Profile declares ${version}; this checker is calibrated against ${LATEST_UCP_VERSION}` });
|
|
3629
|
+
}
|
|
3630
|
+
}
|
|
3631
|
+
|
|
3632
|
+
// UCP CHECK 11: A2A agent-card.json (only when profile advertises an a2a transport)
|
|
3633
|
+
const agentCard = ucpData && ucpData.agentCard;
|
|
3634
|
+
if (agentCard) {
|
|
3635
|
+
if (agentCard.exists && agentCard.valid) {
|
|
3636
|
+
maxScore += 3;
|
|
3637
|
+
score += 3;
|
|
3638
|
+
checks.push({ status: 'pass', label: 'A2A agent card found', detail: '/.well-known/agent-card.json is reachable and parses as JSON' });
|
|
3639
|
+
} else if (agentCard.exists && !agentCard.valid) {
|
|
3640
|
+
maxScore += 3;
|
|
3641
|
+
checks.push({ status: 'warn', label: 'A2A agent card not valid JSON', detail: '/.well-known/agent-card.json was reachable but did not parse as JSON' });
|
|
3642
|
+
} else if (agentCard.missing) {
|
|
3643
|
+
maxScore += 3;
|
|
3644
|
+
checks.push({ status: 'warn', label: 'A2A agent card not found', detail: 'Profile advertises an a2a transport but /.well-known/agent-card.json returns 404' });
|
|
3645
|
+
} else {
|
|
3646
|
+
checks.push({ status: 'info', label: 'A2A agent card unreachable', detail: agentCard.statusCode ? `HTTP ${agentCard.statusCode}` : 'fetch error' });
|
|
3647
|
+
}
|
|
3648
|
+
}
|
|
3297
3649
|
}
|
|
3298
3650
|
} else {
|
|
3299
3651
|
// No UCP profile found — informational only, no penalty
|
|
@@ -3301,10 +3653,15 @@ function checkWebMCP($, pageType, ucpData) {
|
|
|
3301
3653
|
if (commercePageTypes.includes(pageType)) {
|
|
3302
3654
|
checks.push({ status: 'info', label: 'No UCP discovery file found', detail: 'UCP enables AI agents to discover commerce capabilities via /.well-known/ucp' });
|
|
3303
3655
|
} else {
|
|
3304
|
-
checks.push({ status: 'info', label: 'No UCP discovery file found', detail: 'UCP enables AI agents to discover commerce capabilities
|
|
3656
|
+
checks.push({ status: 'info', label: 'No UCP discovery file found', detail: 'UCP enables AI agents to discover commerce capabilities - most relevant for commerce pages' });
|
|
3305
3657
|
}
|
|
3306
3658
|
}
|
|
3307
3659
|
|
|
3660
|
+
// Shopify dual-surface info shortcut (fires whether or not UCP profile exists).
|
|
3661
|
+
if (ucpData && ucpData.shopifyHosted) {
|
|
3662
|
+
checks.push({ status: 'info', label: 'Shopify-hosted: dual UCP surface expected', detail: 'Per-shop endpoint at /api/ucp/mcp; global catalog at https://discover.shopifyapps.com/global/mcp' });
|
|
3663
|
+
}
|
|
3664
|
+
|
|
3308
3665
|
return { checks, score: maxScore > 0 ? Math.round((score / maxScore) * 100) : 0, category: 'Agent Interactivity' };
|
|
3309
3666
|
}
|
|
3310
3667
|
|
|
@@ -4243,6 +4600,16 @@ function analyseHTML(html, domain, robotsTxtData, llmsTxtData, responseHeaders,
|
|
|
4243
4600
|
headings: { h1: [], h2: [] },
|
|
4244
4601
|
lang: null,
|
|
4245
4602
|
hasStructuredData: false,
|
|
4603
|
+
// Raw evidence for language-agnostic trust signal classification.
|
|
4604
|
+
// Populated by extractTrustSignals; consumers running inside an LLM can
|
|
4605
|
+
// reclassify legal / about / contact / imprint / cookies semantically
|
|
4606
|
+
// instead of relying on the heuristic pattern lists.
|
|
4607
|
+
evidence: {
|
|
4608
|
+
htmlLang: null,
|
|
4609
|
+
hreflangs: [],
|
|
4610
|
+
navLinks: [],
|
|
4611
|
+
footerLinks: [],
|
|
4612
|
+
},
|
|
4246
4613
|
};
|
|
4247
4614
|
|
|
4248
4615
|
if (!html) return result;
|
|
@@ -4265,6 +4632,9 @@ function analyseHTML(html, domain, robotsTxtData, llmsTxtData, responseHeaders,
|
|
|
4265
4632
|
const pageType = detectPageType($, schemaTypes, pathname);
|
|
4266
4633
|
result.pageType = pageType;
|
|
4267
4634
|
|
|
4635
|
+
// Extract language-agnostic trust signal evidence
|
|
4636
|
+
result.evidence = extractTrustSignals($);
|
|
4637
|
+
|
|
4268
4638
|
// Populate basic metadata fields (backward-compatible with old analyseHTML)
|
|
4269
4639
|
result.title = $('title').first().text().trim() || null;
|
|
4270
4640
|
result.lang = $('html').attr('lang') || null;
|
|
@@ -4391,6 +4761,7 @@ function analyseHTML(html, domain, robotsTxtData, llmsTxtData, responseHeaders,
|
|
|
4391
4761
|
async function checkGEO(domain, options = {}) {
|
|
4392
4762
|
const maxPages = options.maxPages ?? MAX_PAGES_PER_DOMAIN;
|
|
4393
4763
|
const skipCache = options.skipCache ?? false;
|
|
4764
|
+
const renderMode = options.renderMode ?? 'auto'; // 'static' | 'chrome' | 'auto'
|
|
4394
4765
|
|
|
4395
4766
|
// Check cache first (unless explicitly skipped)
|
|
4396
4767
|
if (!skipCache) {
|
|
@@ -4500,7 +4871,9 @@ async function checkGEO(domain, options = {}) {
|
|
|
4500
4871
|
[robotsRes, llmsRes, homepageRes, sitemapRes, ucpRes] = await Promise.all([
|
|
4501
4872
|
throttledFetchUrl(robotsUrl, FETCH_TIMEOUT_MS, MAX_TEXT_BODY_SIZE).catch(() => ({ body: null, statusCode: null, headers: {} })),
|
|
4502
4873
|
throttledFetchUrl(llmsUrl, FETCH_TIMEOUT_MS, MAX_TEXT_BODY_SIZE).catch(() => ({ body: null, statusCode: null, headers: {} })),
|
|
4503
|
-
|
|
4874
|
+
renderMode === 'chrome'
|
|
4875
|
+
? chromeFetch(homepageUrl).catch(() => ({ body: null, statusCode: null, headers: {} }))
|
|
4876
|
+
: throttledFetchUrl(homepageUrl).catch(() => ({ body: null, statusCode: null, headers: {} })),
|
|
4504
4877
|
throttledFetchUrl(sitemapUrl, FETCH_TIMEOUT_MS, MAX_TEXT_BODY_SIZE).catch(() => ({ body: null, statusCode: null, headers: {} })),
|
|
4505
4878
|
throttledFetchUrl(ucpUrl, FETCH_TIMEOUT_MS, MAX_TEXT_BODY_SIZE).catch(() => ({ body: null, statusCode: null, headers: {} })),
|
|
4506
4879
|
]);
|
|
@@ -4509,6 +4882,31 @@ async function checkGEO(domain, options = {}) {
|
|
|
4509
4882
|
return output;
|
|
4510
4883
|
}
|
|
4511
4884
|
|
|
4885
|
+
// Auto fallback: if static fetch couldn't get the homepage (bot block,
|
|
4886
|
+
// WAF, or network error), retry via headless Chrome. Record that we
|
|
4887
|
+
// rendered via Chrome so downstream multi-page crawl uses it too.
|
|
4888
|
+
let useChromeForCrawl = renderMode === 'chrome';
|
|
4889
|
+
if (renderMode === 'auto' && looksBotBlocked(homepageRes)) {
|
|
4890
|
+
const chromeRes = await chromeFetch(homepageUrl).catch(() => null);
|
|
4891
|
+
const chromeOk =
|
|
4892
|
+
chromeRes &&
|
|
4893
|
+
typeof chromeRes.statusCode === 'number' &&
|
|
4894
|
+
chromeRes.statusCode >= 200 &&
|
|
4895
|
+
chromeRes.statusCode < 300 &&
|
|
4896
|
+
chromeRes.body;
|
|
4897
|
+
if (chromeOk) {
|
|
4898
|
+
homepageRes = chromeRes;
|
|
4899
|
+
useChromeForCrawl = true;
|
|
4900
|
+
output.renderMode = 'chrome-fallback';
|
|
4901
|
+
} else {
|
|
4902
|
+
output.renderMode = chromeRes && chromeRes.statusCode
|
|
4903
|
+
? `chrome-blocked-${chromeRes.statusCode}`
|
|
4904
|
+
: 'static-blocked';
|
|
4905
|
+
}
|
|
4906
|
+
} else {
|
|
4907
|
+
output.renderMode = renderMode === 'chrome' ? 'chrome' : 'static';
|
|
4908
|
+
}
|
|
4909
|
+
|
|
4512
4910
|
// --- robots.txt ---
|
|
4513
4911
|
try {
|
|
4514
4912
|
if (robotsRes.statusCode === 200 && robotsRes.body) {
|
|
@@ -4539,15 +4937,63 @@ async function checkGEO(domain, options = {}) {
|
|
|
4539
4937
|
const profile = JSON.parse(ucpRes.body);
|
|
4540
4938
|
output.ucpProfile.exists = true;
|
|
4541
4939
|
output.ucpProfile.content = profile;
|
|
4940
|
+
output.ucpProfile.headers = ucpRes.headers || {};
|
|
4542
4941
|
}
|
|
4543
4942
|
} catch (err) {
|
|
4544
4943
|
output.ucpProfile.error = err.message;
|
|
4545
4944
|
}
|
|
4546
4945
|
|
|
4946
|
+
// --- /.well-known/agent-card.json (A2A discovery; only meaningful when profile advertises a2a) ---
|
|
4947
|
+
try {
|
|
4948
|
+
const services = output.ucpProfile.content && output.ucpProfile.content.ucp && output.ucpProfile.content.ucp.services;
|
|
4949
|
+
const advertisesA2a = services && Object.values(services).some(svc => svc && typeof svc === 'object' && svc.a2a);
|
|
4950
|
+
if (advertisesA2a) {
|
|
4951
|
+
const cardUrl = `${baseUrl}/.well-known/agent-card.json`;
|
|
4952
|
+
const cardRes = await throttledFetchUrl(cardUrl, FETCH_TIMEOUT_MS, MAX_TEXT_BODY_SIZE).catch(() => ({ body: null, statusCode: null }));
|
|
4953
|
+
if (cardRes.statusCode === 200 && cardRes.body) {
|
|
4954
|
+
try {
|
|
4955
|
+
JSON.parse(cardRes.body);
|
|
4956
|
+
output.ucpProfile.agentCard = { url: cardUrl, exists: true, valid: true };
|
|
4957
|
+
} catch {
|
|
4958
|
+
output.ucpProfile.agentCard = { url: cardUrl, exists: true, valid: false };
|
|
4959
|
+
}
|
|
4960
|
+
} else if (cardRes.statusCode === 404) {
|
|
4961
|
+
output.ucpProfile.agentCard = { url: cardUrl, exists: false, missing: true };
|
|
4962
|
+
} else {
|
|
4963
|
+
output.ucpProfile.agentCard = { url: cardUrl, exists: false, statusCode: cardRes.statusCode };
|
|
4964
|
+
}
|
|
4965
|
+
}
|
|
4966
|
+
} catch (err) {
|
|
4967
|
+
output.ucpProfile.agentCard = { error: err.message };
|
|
4968
|
+
}
|
|
4969
|
+
|
|
4970
|
+
// --- Shopify host detection (for dual-surface info shortcut in checks) ---
|
|
4971
|
+
try {
|
|
4972
|
+
const homepageHeaders = homepageRes && homepageRes.headers ? homepageRes.headers : {};
|
|
4973
|
+
const headerLookup = (n) => {
|
|
4974
|
+
const lower = n.toLowerCase();
|
|
4975
|
+
for (const k of Object.keys(homepageHeaders)) {
|
|
4976
|
+
if (k.toLowerCase() === lower) return String(homepageHeaders[k] || '');
|
|
4977
|
+
}
|
|
4978
|
+
return '';
|
|
4979
|
+
};
|
|
4980
|
+
const host = (cleanDomain || '').toLowerCase();
|
|
4981
|
+
const isShopifyDomain = host.endsWith('.myshopify.com') || host === 'myshopify.com';
|
|
4982
|
+
const isShopifyByHeader = !!(headerLookup('x-shopid') || headerLookup('x-shardid') || headerLookup('x-shopify-stage') || headerLookup('powered-by').toLowerCase().includes('shopify'));
|
|
4983
|
+
output.ucpProfile.shopifyHosted = isShopifyDomain || isShopifyByHeader;
|
|
4984
|
+
} catch (err) {
|
|
4985
|
+
output.ucpProfile.shopifyHosted = false;
|
|
4986
|
+
}
|
|
4987
|
+
|
|
4547
4988
|
// --- Homepage (full 16-category analysis) ---
|
|
4548
4989
|
try {
|
|
4549
4990
|
output.homepage.statusCode = homepageRes.statusCode;
|
|
4550
|
-
|
|
4991
|
+
// Accept any 2xx that came back with a body. In practice Chrome often
|
|
4992
|
+
// surfaces 202 (Amazon) or 206 responses that still carry the rendered
|
|
4993
|
+
// document; analysing those is strictly better than dropping the score.
|
|
4994
|
+
const homepageUsable = homepageRes.statusCode >= 200 &&
|
|
4995
|
+
homepageRes.statusCode < 300 && !!homepageRes.body;
|
|
4996
|
+
if (homepageUsable) {
|
|
4551
4997
|
output.homepage.analysis = analyseHTML(
|
|
4552
4998
|
homepageRes.body,
|
|
4553
4999
|
cleanDomain,
|
|
@@ -4633,14 +5079,18 @@ async function checkGEO(domain, options = {}) {
|
|
|
4633
5079
|
error: output.homepage.error,
|
|
4634
5080
|
});
|
|
4635
5081
|
|
|
5082
|
+
// Chrome fetches are serial (one tab at a time), static fetches run in batches.
|
|
5083
|
+
const concurrency = useChromeForCrawl ? 1 : MAX_CONCURRENT_PAGE_FETCHES;
|
|
4636
5084
|
// Fetch remaining pages in controlled batches
|
|
4637
|
-
for (let i = 0; i < pagesToCrawl.length; i +=
|
|
4638
|
-
const batch = pagesToCrawl.slice(i, i +
|
|
5085
|
+
for (let i = 0; i < pagesToCrawl.length; i += concurrency) {
|
|
5086
|
+
const batch = pagesToCrawl.slice(i, i + concurrency);
|
|
4639
5087
|
const batchResults = await Promise.all(
|
|
4640
5088
|
batch.map(async (pageUrl) => {
|
|
4641
5089
|
try {
|
|
4642
|
-
const res =
|
|
4643
|
-
|
|
5090
|
+
const res = useChromeForCrawl
|
|
5091
|
+
? await chromeFetch(pageUrl, PAGE_CRAWL_TIMEOUT_MS)
|
|
5092
|
+
: await throttledFetchUrl(pageUrl, PAGE_CRAWL_TIMEOUT_MS);
|
|
5093
|
+
if (res.statusCode >= 200 && res.statusCode < 300 && res.body) {
|
|
4644
5094
|
// Determine pathname for page type detection
|
|
4645
5095
|
let pathname = '/';
|
|
4646
5096
|
try { pathname = new URL(pageUrl).pathname; } catch {}
|