glippy-mcp 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,6 +9,19 @@ import http from 'node:http';
9
9
  import https from 'node:https';
10
10
  import { URL } from 'node:url';
11
11
  import * as cheerio from 'cheerio';
12
+ import { chromeFetch } from './chrome-fetcher.js';
13
+
14
+ // Status codes that indicate the server is refusing or stalling a bot-shaped
15
+ // request rather than serving real content. 202 (Amazon) and 400 (Douglas)
16
+ // sit here because in practice those are only returned to non-browser UAs.
17
+ const BOT_BLOCK_STATUS = new Set([202, 400, 401, 403, 407, 429, 503]);
18
+ function looksBotBlocked(res) {
19
+ if (!res) return true;
20
+ if (res.statusCode == null) return true;
21
+ if (BOT_BLOCK_STATUS.has(res.statusCode)) return true;
22
+ if (res.statusCode >= 200 && res.statusCode < 300 && !res.body) return true;
23
+ return false;
24
+ }
12
25
 
13
26
  // ---------------------------------------------------------------------------
14
27
  // Constants
@@ -750,8 +763,11 @@ function detectPageType($, schemaTypes, pathname) {
750
763
  if (['Article', 'NewsArticle', 'BlogPosting', 'TechArticle'].some((t) => schemaTypes.has(t))) return 'article';
751
764
  if (['LocalBusiness', 'Restaurant', 'Store'].some((t) => schemaTypes.has(t))) return 'local-business';
752
765
 
753
- // Heuristic: homepage detection
754
- if (pathname === '/' || pathname === '/index.html' || pathname === '/index.php' || pathname === '') return 'homepage';
766
+ // Heuristic: homepage detection (including language/locale-prefixed homepages like /en/, /de-DE/, /nl/)
767
+ // Strip a leading language or locale segment before checking so multilingual
768
+ // sites hosting their homepage at /en/ or /nl-NL/ are not treated as generic.
769
+ const normalizedPath = pathname.replace(/^\/[a-z]{2}(?:[-_][a-z]{2,3})?\/?$/i, '/');
770
+ if (normalizedPath === '/' || normalizedPath === '/index.html' || normalizedPath === '/index.php' || normalizedPath === '') return 'homepage';
755
771
 
756
772
  // Heuristic: FAQ page via DOM
757
773
  const faqIndicators = $('[class*="faq"], [id*="faq"], details, [class*="accordion"]');
@@ -1439,7 +1455,7 @@ function checkAccessibility($) {
1439
1455
  const unlabeledInputList = [];
1440
1456
  inputs.each((_, el) => {
1441
1457
  const id = $(el).attr('id');
1442
- const hasLabel = id && $(`label[for="${id}"]`).length > 0;
1458
+ const hasLabel = id && $(`label[for="${id.replace(/(["\\])/g, '\\$1')}"]`).length > 0;
1443
1459
  const hasAriaLabel = $(el).attr('aria-label') || $(el).attr('aria-labelledby');
1444
1460
  const wrappedInLabel = $(el).closest('label').length > 0;
1445
1461
  const hasPlaceholder = $(el).attr('placeholder');
@@ -1885,6 +1901,63 @@ function checkMachineReadability($, robotsTxtData, llmsTxtData, responseHeaders)
1885
1901
  return { checks, score: maxScore > 0 ? Math.round((score / maxScore) * 100) : 0, category: 'Machine Readability' };
1886
1902
  }
1887
1903
 
1904
+ // ---------------------------------------------------------------------------
1905
+ // Trust signal evidence extractor
1906
+ // ---------------------------------------------------------------------------
1907
+
1908
+ /**
1909
+ * Extract raw nav/header/footer links plus language signals. Hardcoded pattern
1910
+ * lists cannot keep up with ~100 languages and typos; instead we surface the
1911
+ * raw anchor text + href so the calling LLM (or downstream consumer) can
1912
+ * classify trust signals (about / contact / legal / imprint / cookies)
1913
+ * semantically in whatever language the site uses.
1914
+ *
1915
+ * @param {cheerio.CheerioAPI} $
1916
+ * @returns {{
1917
+ * htmlLang: string|null,
1918
+ * hreflangs: string[],
1919
+ * navLinks: Array<{href: string, text: string, rel: string|null}>,
1920
+ * footerLinks: Array<{href: string, text: string, rel: string|null}>,
1921
+ * }}
1922
+ */
1923
+ function extractTrustSignals($) {
1924
+ const PER_LOCATION_LIMIT = 80;
1925
+ const MAX_TEXT_LEN = 120;
1926
+
1927
+ function collect(selector) {
1928
+ const out = [];
1929
+ const seen = new Set();
1930
+ $(selector).find('a[href]').each((_, el) => {
1931
+ if (out.length >= PER_LOCATION_LIMIT) return false;
1932
+ const $el = $(el);
1933
+ const href = ($el.attr('href') || '').trim();
1934
+ if (!href || href.startsWith('#') || href.toLowerCase().startsWith('javascript:')) return;
1935
+ const text = $el.text().trim().replace(/\s+/g, ' ').slice(0, MAX_TEXT_LEN);
1936
+ const key = `${href}|${text}`;
1937
+ if (seen.has(key)) return;
1938
+ seen.add(key);
1939
+ out.push({ href, text, rel: $el.attr('rel') || null });
1940
+ });
1941
+ return out;
1942
+ }
1943
+
1944
+ const navLinks = collect('header, nav, [role="navigation"], [class*="menu" i], [class*="navigation" i], [id*="menu" i], [id*="nav" i]');
1945
+ const footerLinks = collect('footer, [role="contentinfo"], [class*="footer" i], [id*="footer" i]');
1946
+
1947
+ const hreflangs = [];
1948
+ $('link[rel="alternate"][hreflang]').each((_, el) => {
1949
+ const hl = $(el).attr('hreflang');
1950
+ if (hl) hreflangs.push(hl);
1951
+ });
1952
+
1953
+ return {
1954
+ htmlLang: $('html').attr('lang') || null,
1955
+ hreflangs,
1956
+ navLinks,
1957
+ footerLinks,
1958
+ };
1959
+ }
1960
+
1888
1961
  // ---------------------------------------------------------------------------
1889
1962
  // CHECK CATEGORY 7: Entity & Authority
1890
1963
  // ---------------------------------------------------------------------------
@@ -2464,20 +2537,133 @@ function checkEntity($, jsonLdData) {
2464
2537
  checks.push({ status: 'info', label: 'No About/Contact page links detected', detail: 'Link to organizational info for E-E-A-T' });
2465
2538
  }
2466
2539
 
2467
- // Privacy / Terms links (trust signals)
2468
- const privacyPatterns = ['privacy', 'datenschutz', 'privacidad', 'privacidade', 'confidentialite', 'riservatezza', 'privacybeleid', 'integritet', 'gizlilik'];
2469
- const termsPatterns = ['terms', 'voorwaarden', 'agb', 'condiciones', 'termos', 'conditions-generales', 'condizioni', 'villkor', 'regulamin', 'kosullar'];
2470
- const privacySelector = privacyPatterns.map((p) => `a[href*="${p}"]`).join(', ');
2471
- const termsSelector = termsPatterns.map((p) => `a[href*="${p}"]`).join(', ');
2472
- const privacyLink = $(privacySelector);
2473
- const termsLink = $(termsSelector);
2540
+ // Privacy / Terms / Imprint / Cookies links (trust signals, multi-language)
2541
+ // Hardcoded patterns are a fallback heuristic; the extractTrustSignals
2542
+ // evidence payload on the analysis result lets LLM callers reclassify
2543
+ // semantically in any language.
2544
+ const privacyPatterns = [
2545
+ // English
2546
+ 'privacy', 'privacy-policy',
2547
+ // Latin-alphabet European languages
2548
+ 'datenschutz', 'privatsphaere', 'privatsphare',
2549
+ 'privacidad', 'politica-de-privacidad',
2550
+ 'privacidade', 'politica-de-privacidade',
2551
+ 'confidentialite', 'politique-de-confidentialite', 'vie-privee',
2552
+ 'riservatezza', 'privacy-italia',
2553
+ 'privacybeleid', 'privacyverklaring',
2554
+ 'integritet', 'integritetspolicy',
2555
+ 'personvern',
2556
+ 'tietosuoja', 'yksityisyys',
2557
+ 'persondata', 'fortrolighed',
2558
+ 'adatvedelem',
2559
+ 'prywatnosc', 'polityka-prywatnosci',
2560
+ 'soukromi', 'ochrana-osobnich-udaju',
2561
+ 'ochrana-osobnych-udajov',
2562
+ 'confidentialitate',
2563
+ 'poverljivost', 'privatnost',
2564
+ 'zasebnost',
2565
+ 'privatesia', 'privatnost-hr',
2566
+ 'konfidentsialnost', 'privatnost-ba',
2567
+ 'gizlilik',
2568
+ 'privatumas', 'privatuma',
2569
+ 'yasslilik',
2570
+ // Romanized non-Latin
2571
+ 'konfidentsialnost', 'konfidentsialnost-ua', 'konfidentsialnist',
2572
+ 'idiotikotita', 'aporrito', 'prostasia-dedomenon',
2573
+ 'puraibashi', 'puraibasi-porisi',
2574
+ 'geinsajeongbobo', 'gaeinjeongbo',
2575
+ 'yinsi', 'yinsi-zhengce',
2576
+ 'khasusiyat', 'khososi',
2577
+ 'harimiyat',
2578
+ 'niji-gopaniyata', 'gopaniyata',
2579
+ 'gopniyata',
2580
+ 'kerahasiaan', 'privasi',
2581
+ 'quyen-rieng-tu', 'bao-mat',
2582
+ 'khwam-pen-suanto', 'nayobai-khwampensuntu',
2583
+ ];
2584
+ const termsPatterns = [
2585
+ // English
2586
+ 'terms', 'terms-of-service', 'terms-of-use', 'terms-conditions', 'tos',
2587
+ // Latin-alphabet European languages
2588
+ 'agb', 'nutzungsbedingungen', 'geschaeftsbedingungen',
2589
+ 'condiciones', 'terminos', 'terminos-y-condiciones', 'condiciones-de-uso',
2590
+ 'termos', 'termos-de-uso', 'termos-de-servico',
2591
+ 'conditions-generales', 'cgu', 'cgv', 'mentions-contrat',
2592
+ 'condizioni', 'termini', 'termini-e-condizioni',
2593
+ 'voorwaarden', 'algemene-voorwaarden', 'gebruiksvoorwaarden',
2594
+ 'villkor', 'anvandarvillkor', 'allmanna-villkor',
2595
+ 'brukervilkar', 'vilkar',
2596
+ 'kayttoehdot', 'ehdot',
2597
+ 'betingelser', 'vilkaar', 'handelsbetingelser',
2598
+ 'szerzodesi-feltetelek', 'felhasznalasi-feltetelek',
2599
+ 'regulamin', 'warunki',
2600
+ 'podminky', 'vseobecne-obchodni-podminky', 'obchodni-podminky',
2601
+ 'obchodne-podmienky',
2602
+ 'termeni-si-conditii', 'termeni',
2603
+ 'uslovi', 'uvjeti', 'pogoji',
2604
+ 'kosullar', 'kullanim-kosullari',
2605
+ 'salygos', 'naudojimo-salygos',
2606
+ 'noteikumi',
2607
+ 'kasutustingimused',
2608
+ // Romanized non-Latin
2609
+ 'usloviya', 'usloviya-ispolzovaniya', 'pravila',
2610
+ 'umovy', 'pravyla',
2611
+ 'oroi', 'oroi-xrisis',
2612
+ 'riyoukiyaku', 'riyou-kiyaku', 'kiyaku',
2613
+ 'iyong-yakgwan', 'yakgwan',
2614
+ 'tiaokuan', 'fuwu-tiaokuan', 'shiyong-tiaokuan',
2615
+ 'shuruth', 'shuroot-alistikhdam',
2616
+ 'sharayit-estefadeh', 'sharayet',
2617
+ 'niyam-shartein', 'shartein',
2618
+ 'sharth-o',
2619
+ 'ketentuan', 'syarat-ketentuan',
2620
+ 'dieu-khoan', 'dieu-khoan-su-dung',
2621
+ 'khoapkamnot', 'ngeuankhai-kan-chai',
2622
+ ];
2623
+ const imprintPatterns = [
2624
+ // Legally required in DE/AT/CH, common across DACH + EU
2625
+ 'impressum', 'imprint', 'mentions-legales', 'aviso-legal',
2626
+ 'note-legali', 'colofon', 'colophon', 'wettelijke-vermelding',
2627
+ 'juridisk-information', 'oikeudellinen-huomautus',
2628
+ 'aviso-legal-pt', 'noticia-legal',
2629
+ 'pravni-udaje', 'pravne-informacie',
2630
+ 'yasal-bildirim', 'yasal-uyari',
2631
+ 'informacje-prawne',
2632
+ 'hukuki-bilgiler',
2633
+ 'impresum',
2634
+ ];
2635
+ const cookiePatterns = [
2636
+ 'cookie', 'cookies', 'cookiebeleid', 'cookie-policy',
2637
+ 'politique-cookies', 'politica-cookies', 'politica-de-cookies',
2638
+ 'cookierichtlinie', 'cookie-einstellungen',
2639
+ 'kekse', 'cookie-instellingen',
2640
+ 'soubory-cookie', 'sukromie-cookie',
2641
+ 'cerezler', 'gizlilik-cerezler',
2642
+ 'pliki-cookie',
2643
+ 'fichiers-cookie',
2644
+ 'kukit',
2645
+ ];
2646
+ const buildSelector = (patterns) => patterns.map((p) => `a[href*="${p}" i]`).join(', ');
2647
+ const privacyLink = $(buildSelector(privacyPatterns));
2648
+ const termsLink = $(buildSelector(termsPatterns));
2649
+ const imprintLink = $(buildSelector(imprintPatterns));
2650
+ const cookieLink = $(buildSelector(cookiePatterns));
2474
2651
 
2475
2652
  maxScore += 5;
2476
- if (privacyLink.length > 0 || termsLink.length > 0) {
2653
+ const legalSignals = [];
2654
+ if (privacyLink.length > 0) legalSignals.push('privacy');
2655
+ if (termsLink.length > 0) legalSignals.push('terms');
2656
+ if (imprintLink.length > 0) legalSignals.push('imprint');
2657
+ if (cookieLink.length > 0) legalSignals.push('cookies');
2658
+
2659
+ if (legalSignals.length >= 2) {
2477
2660
  score += 5;
2478
- checks.push({ status: 'pass', label: 'Legal pages linked', detail: `Privacy: ${privacyLink.length > 0 ? 'yes' : 'no'}, Terms: ${termsLink.length > 0 ? 'yes' : 'no'}` });
2661
+ checks.push({ status: 'pass', label: `Legal pages linked (${legalSignals.length})`, detail: `Detected: ${legalSignals.join(', ')}` });
2662
+ } else if (legalSignals.length === 1) {
2663
+ score += 3;
2664
+ checks.push({ status: 'warn', label: `Only one legal page linked (${legalSignals[0]})`, detail: 'Add the others (privacy, terms, imprint, cookies) for full trust signals. Heuristic may miss non-Latin scripts — check evidence payload.' });
2479
2665
  } else {
2480
- checks.push({ status: 'info', label: 'No privacy/terms links detected', detail: null });
2666
+ checks.push({ status: 'info', label: 'No legal page links detected by heuristic', detail: 'If the site is non-English, verify via the footerLinks evidence payload before treating as missing.' });
2481
2667
  }
2482
2668
 
2483
2669
  // E-E-A-T Experience Signals (10 pts)
@@ -2539,7 +2725,7 @@ function checkEntity($, jsonLdData) {
2539
2725
  const hasPhone = /(\+?\d{1,3}[-.\s]?)?\(?\d{2,4}\)?[-.\s]?\d{3,4}[-.\s]?\d{3,4}/.test(bodyText);
2540
2726
  const hasEmail = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z]{2,}\b/i.test(bodyText);
2541
2727
  const hasAddress = $('[itemprop="address"], [class*="address"], address').length > 0;
2542
- const hasContactPage = $('a[href*="contact"]').length > 0;
2728
+ const hasContactPage = contactLink.length > 0;
2543
2729
  const contactSignals = (hasPhone ? 1 : 0) + (hasEmail ? 1 : 0) + (hasAddress ? 1 : 0) + (hasContactPage ? 1 : 0);
2544
2730
  maxScore += 5;
2545
2731
  if (contactSignals >= 3) {
@@ -2820,6 +3006,10 @@ function checkPerformance($) {
2820
3006
  // CHECK CATEGORY 10: Agent Interactivity (WebMCP + UCP)
2821
3007
  // ---------------------------------------------------------------------------
2822
3008
 
3009
+ // LATEST_UCP_VERSION: gating threshold for 2026-04-08 spec additions
3010
+ // (signing_keys, order webhook_url, etc. become required at this version).
3011
+ const LATEST_UCP_VERSION = '2026-04-08';
3012
+
2823
3013
  function checkWebMCP($, pageType, ucpData) {
2824
3014
  const checks = [];
2825
3015
  let score = 0;
@@ -2985,7 +3175,7 @@ function checkWebMCP($, pageType, ucpData) {
2985
3175
  const name = input.attr('name');
2986
3176
  const type = input.attr('type');
2987
3177
  const id = input.attr('id');
2988
- const label = id ? $(`label[for="${id}"]`).length > 0 : false;
3178
+ const label = id ? $(`label[for="${id.replace(/(["\\])/g, '\\$1')}"]`).length > 0 : false;
2989
3179
  const ariaLabel = input.attr('aria-label');
2990
3180
  const placeholder = input.attr('placeholder');
2991
3181
 
@@ -3164,6 +3354,40 @@ function checkWebMCP($, pageType, ucpData) {
3164
3354
  const capabilities = capsArray; // Already normalized above
3165
3355
  const transportKeys = ['rest', 'mcp', 'a2a', 'embedded'];
3166
3356
 
3357
+ // UCP CHECK 2.5: Cache Headers (only when caller passed response headers)
3358
+ const ucpHeaders = ucpData && ucpData.headers ? ucpData.headers : null;
3359
+ if (ucpHeaders) {
3360
+ const headerLookup = (n) => {
3361
+ const lower = n.toLowerCase();
3362
+ for (const k of Object.keys(ucpHeaders)) {
3363
+ if (k.toLowerCase() === lower) return String(ucpHeaders[k] || '');
3364
+ }
3365
+ return '';
3366
+ };
3367
+ const ct = headerLookup('content-type').toLowerCase();
3368
+ const cc = headerLookup('cache-control').toLowerCase();
3369
+ const ctOk = ct.startsWith('application/json');
3370
+ const ccTokens = cc.split(',').map(s => s.trim());
3371
+ const hasPublic = ccTokens.includes('public');
3372
+ const hasBadDirective = ccTokens.some(t => t === 'private' || t === 'no-store' || t === 'no-cache');
3373
+ const maxAgeMatch = cc.match(/max-age=(\d+)/);
3374
+ const maxAge = maxAgeMatch ? parseInt(maxAgeMatch[1], 10) : -1;
3375
+ const ccOk = hasPublic && !hasBadDirective && maxAge >= 60;
3376
+ maxScore += 5;
3377
+ if (ctOk && ccOk) {
3378
+ score += 5;
3379
+ checks.push({ status: 'pass', label: 'UCP profile cache headers OK', detail: `Content-Type application/json with Cache-Control: public, max-age=${maxAge}` });
3380
+ } else {
3381
+ score += 2;
3382
+ const issues = [];
3383
+ if (!ctOk) issues.push(`content-type "${ct || 'missing'}" (expected application/json)`);
3384
+ if (!hasPublic) issues.push('cache-control missing "public"');
3385
+ if (hasBadDirective) issues.push('cache-control contains private/no-store/no-cache');
3386
+ if (maxAge < 60) issues.push(`max-age=${maxAge >= 0 ? maxAge : 'missing'} (expected >=60)`);
3387
+ checks.push({ status: 'warn', label: 'UCP profile cache headers need attention', detail: issues.slice(0, 3).join('; '), found: issues });
3388
+ }
3389
+ }
3390
+
3167
3391
  // UCP CHECK 2: Profile Completeness
3168
3392
  let completenessIssues = [];
3169
3393
  if (!versionDatePattern.test(version)) completenessIssues.push('version not date-formatted (expected YYYY-MM-DD)');
@@ -3193,25 +3417,94 @@ function checkWebMCP($, pageType, ucpData) {
3193
3417
  checks.push({ status: 'warn', label: `UCP profile has ${completenessIssues.length} issue(s)`, detail: completenessIssues.slice(0, 3).join('; '), found: completenessIssues.slice(0, 5) });
3194
3418
  }
3195
3419
 
3196
- // UCP CHECK 3: Capability Coverage
3420
+ // UCP CHECK 3: Capability Coverage (synced with extension processUCPProfile)
3197
3421
  const capNames = capabilities.map(c => c.name || '');
3198
3422
  const coreCapabilities = {
3199
3423
  'dev.ucp.shopping.checkout': 'Checkout',
3200
3424
  'dev.ucp.shopping.identity_linking': 'Identity Linking',
3201
3425
  'dev.ucp.shopping.order': 'Order Management',
3426
+ 'dev.ucp.shopping.cart': 'Cart',
3202
3427
  };
3428
+ // Catalog has sub-capabilities; credit if any match the catalog prefix.
3429
+ const hasCatalog = capNames.some(n => n.startsWith('dev.ucp.shopping.catalog'));
3203
3430
  const presentCore = Object.keys(coreCapabilities).filter(c => capNames.includes(c));
3204
- const missingCore = Object.entries(coreCapabilities).filter(([k]) => !capNames.includes(k)).map(([, v]) => v);
3431
+ if (hasCatalog) presentCore.push('dev.ucp.shopping.catalog');
3432
+ const missingEntries = Object.entries(coreCapabilities).filter(([k]) => !capNames.includes(k));
3433
+ if (!hasCatalog) missingEntries.push(['dev.ucp.shopping.catalog', 'Catalog']);
3434
+ const missingCore = missingEntries.map(([, v]) => v);
3435
+ const totalCore = Object.keys(coreCapabilities).length + 1; // +1 for Catalog
3205
3436
 
3206
3437
  maxScore += 10;
3207
- if (presentCore.length === 3) {
3438
+ if (presentCore.length === totalCore) {
3208
3439
  score += 10;
3209
- checks.push({ status: 'pass', label: 'All 3 core UCP capabilities declared', detail: 'Checkout, Identity Linking, and Order Management' });
3440
+ checks.push({ status: 'pass', label: `All ${totalCore} core UCP capabilities declared`, detail: 'Checkout, Identity Linking, Order Management, Cart, and Catalog' });
3210
3441
  } else if (presentCore.length > 0) {
3211
3442
  score += 5;
3212
- checks.push({ status: 'warn', label: `${presentCore.length}/3 core UCP capabilities declared`, detail: `Missing: ${missingCore.join(', ')}`, found: presentCore });
3443
+ checks.push({ status: 'warn', label: `${presentCore.length}/${totalCore} core UCP capabilities declared`, detail: `Missing: ${missingCore.join(', ')}`, found: presentCore });
3213
3444
  } else {
3214
- checks.push({ status: 'info', label: 'No core UCP capabilities declared', detail: 'Consider adding checkout, identity_linking, and order capabilities' });
3445
+ checks.push({ status: 'info', label: 'No core UCP capabilities declared', detail: 'Consider adding checkout, identity_linking, order, cart, and catalog capabilities' });
3446
+ }
3447
+
3448
+ // 2026-04-08 spec gating: declared version >= 2026-04-08?
3449
+ const isV2 = versionDatePattern.test(version) && version >= LATEST_UCP_VERSION;
3450
+
3451
+ // UCP CHECK 3.5: Signing Keys (RFC 9421 ES256, mandatory in 2026-04-08)
3452
+ const signingKeys = Array.isArray(profile.signing_keys) ? profile.signing_keys : null;
3453
+ if (signingKeys && signingKeys.length > 0) {
3454
+ const malformed = signingKeys.filter(k => !k || !k.kid || k.kty !== 'EC' || k.crv !== 'P-256' || !k.x || !k.y);
3455
+ maxScore += 10;
3456
+ if (malformed.length === 0) {
3457
+ score += 10;
3458
+ checks.push({ status: 'pass', label: `${signingKeys.length} UCP signing key(s) declared`, detail: 'Profile advertises EC P-256 JWK(s) for RFC 9421 message signing' });
3459
+ } else {
3460
+ score += 3;
3461
+ checks.push({ status: 'warn', label: `${malformed.length}/${signingKeys.length} UCP signing key(s) malformed`, detail: 'Each key must have kid, kty=EC, crv=P-256, x, y', found: malformed.map(k => k && k.kid ? k.kid : '<missing kid>').slice(0, 5) });
3462
+ }
3463
+ } else if (isV2) {
3464
+ maxScore += 10;
3465
+ score += 3;
3466
+ checks.push({ status: 'warn', label: 'UCP signing keys missing', detail: 'UCP 2026-04-08 mandates RFC 9421 ES256 signatures; profile must publish signing_keys[]' });
3467
+ } else {
3468
+ checks.push({ status: 'info', label: 'UCP signing keys not declared', detail: 'UCP 2026-04-08 will require signing_keys[]; consider adding for forward compatibility' });
3469
+ }
3470
+
3471
+ // UCP CHECK 3.6: Catalog Sub-Capability Coverage
3472
+ const catalogCaps = capabilities.filter(c => (c.name || '').startsWith('dev.ucp.shopping.catalog'));
3473
+ if (catalogCaps.length > 0) {
3474
+ const subs = ['search', 'lookup', 'get_product'];
3475
+ const presentSubs = subs.filter(s => catalogCaps.some(c => c.name === `dev.ucp.shopping.catalog.${s}` || c.name === 'dev.ucp.shopping.catalog'));
3476
+ const missingSubs = subs.filter(s => !presentSubs.includes(s));
3477
+ maxScore += 5;
3478
+ if (missingSubs.length === 0) {
3479
+ score += 5;
3480
+ checks.push({ status: 'pass', label: 'Catalog capability fully declared', detail: 'search, lookup, and get_product sub-capabilities all present' });
3481
+ } else {
3482
+ score += 3;
3483
+ checks.push({ status: 'warn', label: `Catalog declared with ${presentSubs.length}/${subs.length} sub-capabilities`, detail: `Missing: ${missingSubs.join(', ')}`, found: missingSubs });
3484
+ }
3485
+ } else if (capNames.includes('dev.ucp.shopping.cart')) {
3486
+ checks.push({ status: 'info', label: 'Cart declared without Catalog', detail: 'Consider adding catalog.search / catalog.lookup so agents can discover products before adding to cart' });
3487
+ }
3488
+
3489
+ // UCP CHECK 3.7: Order Webhook URL (required in 2026-04-08)
3490
+ const orderCap = capabilities.find(c => c.name === 'dev.ucp.shopping.order');
3491
+ if (orderCap) {
3492
+ const webhookUrl = orderCap.config && orderCap.config.webhook_url;
3493
+ if (webhookUrl && typeof webhookUrl === 'string' && webhookUrl.startsWith('https://')) {
3494
+ maxScore += 5;
3495
+ score += 5;
3496
+ checks.push({ status: 'pass', label: 'Order webhook URL declared', detail: 'config.webhook_url is HTTPS, enabling real-time order updates' });
3497
+ } else if (webhookUrl) {
3498
+ maxScore += 5;
3499
+ score += 2;
3500
+ checks.push({ status: 'warn', label: 'Order webhook URL is not HTTPS', detail: 'config.webhook_url must use https://' });
3501
+ } else if (isV2) {
3502
+ maxScore += 5;
3503
+ score += 2;
3504
+ checks.push({ status: 'warn', label: 'Order webhook URL missing', detail: 'UCP 2026-04-08 requires config.webhook_url on the order capability for real-time updates' });
3505
+ } else {
3506
+ checks.push({ status: 'info', label: 'Order webhook URL not declared', detail: 'UCP 2026-04-08 will require config.webhook_url on order capabilities' });
3507
+ }
3215
3508
  }
3216
3509
 
3217
3510
  // UCP CHECK 4: Extension Support
@@ -3238,6 +3531,20 @@ function checkWebMCP($, pageType, ucpData) {
3238
3531
  }
3239
3532
  }
3240
3533
 
3534
+ // Cart-specific transport recommendation (2026-04-08 adds embedded binding).
3535
+ if (capNames.includes('dev.ucp.shopping.cart')) {
3536
+ const cartTransports = new Set(allTransports.map(t => t.transport));
3537
+ const cartRecommended = cartTransports.has('embedded') || cartTransports.has('mcp');
3538
+ maxScore += 3;
3539
+ if (cartRecommended) {
3540
+ score += 3;
3541
+ checks.push({ status: 'pass', label: 'Cart capability has embedded or MCP transport', detail: 'UCP 2026-04-08 recommends embedded or MCP transport for cart capability' });
3542
+ } else {
3543
+ score += 1;
3544
+ checks.push({ status: 'warn', label: 'Cart capability missing embedded/MCP transport', detail: 'UCP 2026-04-08 recommends adding an embedded transport binding for cart so agents can hand off to checkout' });
3545
+ }
3546
+ }
3547
+
3241
3548
  if (allTransports.length > 1) {
3242
3549
  maxScore += 10;
3243
3550
  const httpsTransports = allTransports.filter(t => (t.endpoint || '').startsWith('https://') && (t.schema || '').startsWith('https://'));
@@ -3284,8 +3591,8 @@ function checkWebMCP($, pageType, ucpData) {
3284
3591
  // UCP CHECK 8: Page-Type-Specific Recommendations
3285
3592
  const commercePageTypes = ['product', 'ecommerce', 'saas', 'local-business'];
3286
3593
  const ucpRecommendations = {
3287
- 'product': 'Should have checkout + fulfillment + discount capabilities',
3288
- 'ecommerce': 'Should have checkout + fulfillment; consider identity linking for personalization',
3594
+ 'product': 'Should have checkout + cart + catalog + fulfillment capabilities',
3595
+ 'ecommerce': 'Should have checkout + cart + catalog; consider identity linking for personalization',
3289
3596
  'saas': 'Should have checkout for subscription/trial flows',
3290
3597
  'local-business': 'Consider checkout for booking/purchasing services',
3291
3598
  'homepage': 'UCP profile should be accessible at domain root /.well-known/ucp',
@@ -3294,6 +3601,51 @@ function checkWebMCP($, pageType, ucpData) {
3294
3601
  if (ucpRecommendations[pageType]) {
3295
3602
  checks.push({ status: 'pass', label: `UCP detected on ${pageType} page`, detail: ucpRecommendations[pageType] });
3296
3603
  }
3604
+
3605
+ // UCP CHECK 9: Disclosure / Eligibility / Signals / Delegation feature advertisement (info-only).
3606
+ const advertisedFeatures = new Set();
3607
+ for (const cap of capabilities) {
3608
+ const feats = Array.isArray(cap.features) ? cap.features : [];
3609
+ for (const f of feats) {
3610
+ if (typeof f === 'string') advertisedFeatures.add(f);
3611
+ }
3612
+ }
3613
+ const trackedFeatures = ['eligibility_claims', 'signals', 'disclosure_messages', 'link_delegation'];
3614
+ const presentFeats = trackedFeatures.filter(f => advertisedFeatures.has(f));
3615
+ if (presentFeats.length > 0) {
3616
+ checks.push({ status: 'info', label: `${presentFeats.length} UCP feature(s) advertised`, detail: `Capabilities advertise: ${presentFeats.join(', ')}`, found: presentFeats });
3617
+ } else {
3618
+ checks.push({ status: 'info', label: 'No optional UCP features advertised', detail: 'eligibility_claims, signals, disclosure_messages, and link_delegation are optional but improve agent trust negotiation' });
3619
+ }
3620
+
3621
+ // UCP CHECK 10: Spec Version Currency (info-only).
3622
+ if (versionDatePattern.test(version)) {
3623
+ if (version === LATEST_UCP_VERSION) {
3624
+ checks.push({ status: 'info', label: 'UCP version is current', detail: `Profile declares the latest known UCP version (${version})` });
3625
+ } else if (version < LATEST_UCP_VERSION) {
3626
+ checks.push({ status: 'info', label: 'UCP version is older than latest', detail: `Profile declares ${version}; latest known is ${LATEST_UCP_VERSION}` });
3627
+ } else {
3628
+ checks.push({ status: 'info', label: 'UCP version newer than checker knows', detail: `Profile declares ${version}; this checker is calibrated against ${LATEST_UCP_VERSION}` });
3629
+ }
3630
+ }
3631
+
3632
+ // UCP CHECK 11: A2A agent-card.json (only when profile advertises an a2a transport)
3633
+ const agentCard = ucpData && ucpData.agentCard;
3634
+ if (agentCard) {
3635
+ if (agentCard.exists && agentCard.valid) {
3636
+ maxScore += 3;
3637
+ score += 3;
3638
+ checks.push({ status: 'pass', label: 'A2A agent card found', detail: '/.well-known/agent-card.json is reachable and parses as JSON' });
3639
+ } else if (agentCard.exists && !agentCard.valid) {
3640
+ maxScore += 3;
3641
+ checks.push({ status: 'warn', label: 'A2A agent card not valid JSON', detail: '/.well-known/agent-card.json was reachable but did not parse as JSON' });
3642
+ } else if (agentCard.missing) {
3643
+ maxScore += 3;
3644
+ checks.push({ status: 'warn', label: 'A2A agent card not found', detail: 'Profile advertises an a2a transport but /.well-known/agent-card.json returns 404' });
3645
+ } else {
3646
+ checks.push({ status: 'info', label: 'A2A agent card unreachable', detail: agentCard.statusCode ? `HTTP ${agentCard.statusCode}` : 'fetch error' });
3647
+ }
3648
+ }
3297
3649
  }
3298
3650
  } else {
3299
3651
  // No UCP profile found — informational only, no penalty
@@ -3301,10 +3653,15 @@ function checkWebMCP($, pageType, ucpData) {
3301
3653
  if (commercePageTypes.includes(pageType)) {
3302
3654
  checks.push({ status: 'info', label: 'No UCP discovery file found', detail: 'UCP enables AI agents to discover commerce capabilities via /.well-known/ucp' });
3303
3655
  } else {
3304
- checks.push({ status: 'info', label: 'No UCP discovery file found', detail: 'UCP enables AI agents to discover commerce capabilities most relevant for commerce pages' });
3656
+ checks.push({ status: 'info', label: 'No UCP discovery file found', detail: 'UCP enables AI agents to discover commerce capabilities - most relevant for commerce pages' });
3305
3657
  }
3306
3658
  }
3307
3659
 
3660
+ // Shopify dual-surface info shortcut (fires whether or not UCP profile exists).
3661
+ if (ucpData && ucpData.shopifyHosted) {
3662
+ checks.push({ status: 'info', label: 'Shopify-hosted: dual UCP surface expected', detail: 'Per-shop endpoint at /api/ucp/mcp; global catalog at https://discover.shopifyapps.com/global/mcp' });
3663
+ }
3664
+
3308
3665
  return { checks, score: maxScore > 0 ? Math.round((score / maxScore) * 100) : 0, category: 'Agent Interactivity' };
3309
3666
  }
3310
3667
 
@@ -4243,6 +4600,16 @@ function analyseHTML(html, domain, robotsTxtData, llmsTxtData, responseHeaders,
4243
4600
  headings: { h1: [], h2: [] },
4244
4601
  lang: null,
4245
4602
  hasStructuredData: false,
4603
+ // Raw evidence for language-agnostic trust signal classification.
4604
+ // Populated by extractTrustSignals; consumers running inside an LLM can
4605
+ // reclassify legal / about / contact / imprint / cookies semantically
4606
+ // instead of relying on the heuristic pattern lists.
4607
+ evidence: {
4608
+ htmlLang: null,
4609
+ hreflangs: [],
4610
+ navLinks: [],
4611
+ footerLinks: [],
4612
+ },
4246
4613
  };
4247
4614
 
4248
4615
  if (!html) return result;
@@ -4265,6 +4632,9 @@ function analyseHTML(html, domain, robotsTxtData, llmsTxtData, responseHeaders,
4265
4632
  const pageType = detectPageType($, schemaTypes, pathname);
4266
4633
  result.pageType = pageType;
4267
4634
 
4635
+ // Extract language-agnostic trust signal evidence
4636
+ result.evidence = extractTrustSignals($);
4637
+
4268
4638
  // Populate basic metadata fields (backward-compatible with old analyseHTML)
4269
4639
  result.title = $('title').first().text().trim() || null;
4270
4640
  result.lang = $('html').attr('lang') || null;
@@ -4391,6 +4761,7 @@ function analyseHTML(html, domain, robotsTxtData, llmsTxtData, responseHeaders,
4391
4761
  async function checkGEO(domain, options = {}) {
4392
4762
  const maxPages = options.maxPages ?? MAX_PAGES_PER_DOMAIN;
4393
4763
  const skipCache = options.skipCache ?? false;
4764
+ const renderMode = options.renderMode ?? 'auto'; // 'static' | 'chrome' | 'auto'
4394
4765
 
4395
4766
  // Check cache first (unless explicitly skipped)
4396
4767
  if (!skipCache) {
@@ -4500,7 +4871,9 @@ async function checkGEO(domain, options = {}) {
4500
4871
  [robotsRes, llmsRes, homepageRes, sitemapRes, ucpRes] = await Promise.all([
4501
4872
  throttledFetchUrl(robotsUrl, FETCH_TIMEOUT_MS, MAX_TEXT_BODY_SIZE).catch(() => ({ body: null, statusCode: null, headers: {} })),
4502
4873
  throttledFetchUrl(llmsUrl, FETCH_TIMEOUT_MS, MAX_TEXT_BODY_SIZE).catch(() => ({ body: null, statusCode: null, headers: {} })),
4503
- throttledFetchUrl(homepageUrl).catch(() => ({ body: null, statusCode: null, headers: {} })),
4874
+ renderMode === 'chrome'
4875
+ ? chromeFetch(homepageUrl).catch(() => ({ body: null, statusCode: null, headers: {} }))
4876
+ : throttledFetchUrl(homepageUrl).catch(() => ({ body: null, statusCode: null, headers: {} })),
4504
4877
  throttledFetchUrl(sitemapUrl, FETCH_TIMEOUT_MS, MAX_TEXT_BODY_SIZE).catch(() => ({ body: null, statusCode: null, headers: {} })),
4505
4878
  throttledFetchUrl(ucpUrl, FETCH_TIMEOUT_MS, MAX_TEXT_BODY_SIZE).catch(() => ({ body: null, statusCode: null, headers: {} })),
4506
4879
  ]);
@@ -4509,6 +4882,31 @@ async function checkGEO(domain, options = {}) {
4509
4882
  return output;
4510
4883
  }
4511
4884
 
4885
+ // Auto fallback: if static fetch couldn't get the homepage (bot block,
4886
+ // WAF, or network error), retry via headless Chrome. Record that we
4887
+ // rendered via Chrome so downstream multi-page crawl uses it too.
4888
+ let useChromeForCrawl = renderMode === 'chrome';
4889
+ if (renderMode === 'auto' && looksBotBlocked(homepageRes)) {
4890
+ const chromeRes = await chromeFetch(homepageUrl).catch(() => null);
4891
+ const chromeOk =
4892
+ chromeRes &&
4893
+ typeof chromeRes.statusCode === 'number' &&
4894
+ chromeRes.statusCode >= 200 &&
4895
+ chromeRes.statusCode < 300 &&
4896
+ chromeRes.body;
4897
+ if (chromeOk) {
4898
+ homepageRes = chromeRes;
4899
+ useChromeForCrawl = true;
4900
+ output.renderMode = 'chrome-fallback';
4901
+ } else {
4902
+ output.renderMode = chromeRes && chromeRes.statusCode
4903
+ ? `chrome-blocked-${chromeRes.statusCode}`
4904
+ : 'static-blocked';
4905
+ }
4906
+ } else {
4907
+ output.renderMode = renderMode === 'chrome' ? 'chrome' : 'static';
4908
+ }
4909
+
4512
4910
  // --- robots.txt ---
4513
4911
  try {
4514
4912
  if (robotsRes.statusCode === 200 && robotsRes.body) {
@@ -4539,15 +4937,63 @@ async function checkGEO(domain, options = {}) {
4539
4937
  const profile = JSON.parse(ucpRes.body);
4540
4938
  output.ucpProfile.exists = true;
4541
4939
  output.ucpProfile.content = profile;
4940
+ output.ucpProfile.headers = ucpRes.headers || {};
4542
4941
  }
4543
4942
  } catch (err) {
4544
4943
  output.ucpProfile.error = err.message;
4545
4944
  }
4546
4945
 
4946
+ // --- /.well-known/agent-card.json (A2A discovery; only meaningful when profile advertises a2a) ---
4947
+ try {
4948
+ const services = output.ucpProfile.content && output.ucpProfile.content.ucp && output.ucpProfile.content.ucp.services;
4949
+ const advertisesA2a = services && Object.values(services).some(svc => svc && typeof svc === 'object' && svc.a2a);
4950
+ if (advertisesA2a) {
4951
+ const cardUrl = `${baseUrl}/.well-known/agent-card.json`;
4952
+ const cardRes = await throttledFetchUrl(cardUrl, FETCH_TIMEOUT_MS, MAX_TEXT_BODY_SIZE).catch(() => ({ body: null, statusCode: null }));
4953
+ if (cardRes.statusCode === 200 && cardRes.body) {
4954
+ try {
4955
+ JSON.parse(cardRes.body);
4956
+ output.ucpProfile.agentCard = { url: cardUrl, exists: true, valid: true };
4957
+ } catch {
4958
+ output.ucpProfile.agentCard = { url: cardUrl, exists: true, valid: false };
4959
+ }
4960
+ } else if (cardRes.statusCode === 404) {
4961
+ output.ucpProfile.agentCard = { url: cardUrl, exists: false, missing: true };
4962
+ } else {
4963
+ output.ucpProfile.agentCard = { url: cardUrl, exists: false, statusCode: cardRes.statusCode };
4964
+ }
4965
+ }
4966
+ } catch (err) {
4967
+ output.ucpProfile.agentCard = { error: err.message };
4968
+ }
4969
+
4970
+ // --- Shopify host detection (for dual-surface info shortcut in checks) ---
4971
+ try {
4972
+ const homepageHeaders = homepageRes && homepageRes.headers ? homepageRes.headers : {};
4973
+ const headerLookup = (n) => {
4974
+ const lower = n.toLowerCase();
4975
+ for (const k of Object.keys(homepageHeaders)) {
4976
+ if (k.toLowerCase() === lower) return String(homepageHeaders[k] || '');
4977
+ }
4978
+ return '';
4979
+ };
4980
+ const host = (cleanDomain || '').toLowerCase();
4981
+ const isShopifyDomain = host.endsWith('.myshopify.com') || host === 'myshopify.com';
4982
+ const isShopifyByHeader = !!(headerLookup('x-shopid') || headerLookup('x-shardid') || headerLookup('x-shopify-stage') || headerLookup('powered-by').toLowerCase().includes('shopify'));
4983
+ output.ucpProfile.shopifyHosted = isShopifyDomain || isShopifyByHeader;
4984
+ } catch (err) {
4985
+ output.ucpProfile.shopifyHosted = false;
4986
+ }
4987
+
4547
4988
  // --- Homepage (full 16-category analysis) ---
4548
4989
  try {
4549
4990
  output.homepage.statusCode = homepageRes.statusCode;
4550
- if (homepageRes.statusCode === 200 && homepageRes.body) {
4991
+ // Accept any 2xx that came back with a body. In practice Chrome often
4992
+ // surfaces 202 (Amazon) or 206 responses that still carry the rendered
4993
+ // document; analysing those is strictly better than dropping the score.
4994
+ const homepageUsable = homepageRes.statusCode >= 200 &&
4995
+ homepageRes.statusCode < 300 && !!homepageRes.body;
4996
+ if (homepageUsable) {
4551
4997
  output.homepage.analysis = analyseHTML(
4552
4998
  homepageRes.body,
4553
4999
  cleanDomain,
@@ -4633,14 +5079,18 @@ async function checkGEO(domain, options = {}) {
4633
5079
  error: output.homepage.error,
4634
5080
  });
4635
5081
 
5082
+ // Chrome fetches are serial (one tab at a time), static fetches run in batches.
5083
+ const concurrency = useChromeForCrawl ? 1 : MAX_CONCURRENT_PAGE_FETCHES;
4636
5084
  // Fetch remaining pages in controlled batches
4637
- for (let i = 0; i < pagesToCrawl.length; i += MAX_CONCURRENT_PAGE_FETCHES) {
4638
- const batch = pagesToCrawl.slice(i, i + MAX_CONCURRENT_PAGE_FETCHES);
5085
+ for (let i = 0; i < pagesToCrawl.length; i += concurrency) {
5086
+ const batch = pagesToCrawl.slice(i, i + concurrency);
4639
5087
  const batchResults = await Promise.all(
4640
5088
  batch.map(async (pageUrl) => {
4641
5089
  try {
4642
- const res = await throttledFetchUrl(pageUrl, PAGE_CRAWL_TIMEOUT_MS);
4643
- if (res.statusCode === 200 && res.body) {
5090
+ const res = useChromeForCrawl
5091
+ ? await chromeFetch(pageUrl, PAGE_CRAWL_TIMEOUT_MS)
5092
+ : await throttledFetchUrl(pageUrl, PAGE_CRAWL_TIMEOUT_MS);
5093
+ if (res.statusCode >= 200 && res.statusCode < 300 && res.body) {
4644
5094
  // Determine pathname for page type detection
4645
5095
  let pathname = '/';
4646
5096
  try { pathname = new URL(pageUrl).pathname; } catch {}