glippy-mcp 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/geo-checker.js +140 -20
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "glippy-mcp",
3
- "version": "0.3.0",
3
+ "version": "0.3.1",
4
4
  "description": "MCP server for GEO (Generative Engine Optimization) analysis — check any domain's AI-readiness",
5
5
  "main": "src/index.js",
6
6
  "type": "module",
@@ -757,10 +757,27 @@ function aggregatePageScores(pageResults) {
757
757
  * @returns {string} - One of: 'faq', 'product', 'article', 'local-business', 'homepage', 'ecommerce', 'saas', 'generic'.
758
758
  */
759
759
  function detectPageType($, schemaTypes, pathname) {
760
- // Check JSON-LD schema types first (most reliable signal)
761
- if (['FAQPage'].some((t) => schemaTypes.has(t))) return 'faq';
762
- if (['Product', 'Offer'].some((t) => schemaTypes.has(t))) return 'product';
760
+ // Check JSON-LD schema types first (most reliable signal).
761
+ // A page can carry FAQPage schema for a small FAQ section while being a long-form
762
+ // guide. Only classify as "faq" when FAQPage is the dominant structure - otherwise
763
+ // a 6,400-word guide with a FAQ at the bottom gets penalized as exceeding FAQ length.
764
+ const allH2s = $('h2');
765
+ const h2Count = allH2s.length;
766
+ let questionH2Count = 0;
767
+ allH2s.each((_, el) => {
768
+ const t = ($(el).text() || '').trim();
769
+ if (t.includes('?') || /^(how|what|why|when|where|who|which|can|do|does|is|are|should)\b/i.test(t)) {
770
+ questionH2Count++;
771
+ }
772
+ });
773
+ const isDominantlyFaq = h2Count > 0 && questionH2Count >= h2Count * 0.7;
774
+
775
+ if (schemaTypes.has('FAQPage') && isDominantlyFaq) return 'faq';
763
776
  if (['Article', 'NewsArticle', 'BlogPosting', 'TechArticle'].some((t) => schemaTypes.has(t))) return 'article';
777
+ // FAQPage schema present but page also has many topic-style H2s = guide with a FAQ section.
778
+ if (schemaTypes.has('FAQPage') && h2Count >= 6) return 'article';
779
+ if (schemaTypes.has('FAQPage')) return 'faq';
780
+ if (['Product', 'Offer'].some((t) => schemaTypes.has(t))) return 'product';
764
781
  if (['LocalBusiness', 'Restaurant', 'Store'].some((t) => schemaTypes.has(t))) return 'local-business';
765
782
 
766
783
  // Heuristic: homepage detection (including language/locale-prefixed homepages like /en/, /de-DE/, /nl/)
@@ -769,9 +786,10 @@ function detectPageType($, schemaTypes, pathname) {
769
786
  const normalizedPath = pathname.replace(/^\/[a-z]{2}(?:[-_][a-z]{2,3})?\/?$/i, '/');
770
787
  if (normalizedPath === '/' || normalizedPath === '/index.html' || normalizedPath === '/index.php' || normalizedPath === '') return 'homepage';
771
788
 
772
- // Heuristic: FAQ page via DOM
789
+ // Heuristic: FAQ page via DOM. Only treat as FAQ when FAQ-like elements dominate the
790
+ // structure - if the page has many topic H2s it's a guide that happens to include a FAQ.
773
791
  const faqIndicators = $('[class*="faq"], [id*="faq"], details, [class*="accordion"]');
774
- if (faqIndicators.length >= 3) return 'faq';
792
+ if (faqIndicators.length >= 3 && (h2Count < 6 || isDominantlyFaq)) return 'faq';
775
793
 
776
794
  // Heuristic: article via DOM
777
795
  const hasArticle = $('article').length > 0;
@@ -2014,7 +2032,29 @@ function checkEntity($, jsonLdData) {
2014
2032
  });
2015
2033
  }
2016
2034
 
2017
- // 6. JSON-LD schema author with quality check
2035
+ // 6. JSON-LD schema author with quality check.
2036
+ // Only treat `author` as the page author when it's attached to a content type
2037
+ // (Article, WebPage, Book, etc.) - NOT inside Review/Comment, where `author` is
2038
+ // the reviewer/commenter and shouldn't be credited to the page.
2039
+ const PAGE_AUTHOR_TYPES = new Set([
2040
+ 'Article', 'NewsArticle', 'BlogPosting', 'TechArticle', 'ScholarlyArticle', 'Report', 'OpinionNewsArticle',
2041
+ 'WebPage', 'AboutPage', 'CollectionPage', 'ItemPage', 'ProfilePage', 'QAPage', 'FAQPage',
2042
+ 'Book', 'Chapter', 'CreativeWork', 'CreativeWorkSeries', 'HowTo', 'Recipe', 'Course', 'LearningResource',
2043
+ 'VideoObject', 'AudioObject', 'PodcastEpisode', 'Podcast',
2044
+ 'DiscussionForumPosting', 'SocialMediaPosting',
2045
+ ]);
2046
+ const SKIP_AUTHOR_TYPES = new Set(['Review', 'Comment', 'UserComments', 'Rating']);
2047
+ const isContentType = (t) => {
2048
+ if (!t) return false;
2049
+ const types = Array.isArray(t) ? t : [t];
2050
+ return types.some((x) => PAGE_AUTHOR_TYPES.has(x));
2051
+ };
2052
+ const isSkipType = (t) => {
2053
+ if (!t) return false;
2054
+ const types = Array.isArray(t) ? t : [t];
2055
+ return types.some((x) => SKIP_AUTHOR_TYPES.has(x));
2056
+ };
2057
+
2018
2058
  let hasAuthorSchema = false;
2019
2059
  let hasAuthorSameAs = false;
2020
2060
  let hasPersonSchema = false;
@@ -2022,12 +2062,14 @@ function checkEntity($, jsonLdData) {
2022
2062
  try {
2023
2063
  const processSchema = (schema) => {
2024
2064
  if (!schema) return;
2025
- if (schema.author) {
2065
+ // Skip Review/Comment subtrees - their author is not the page author.
2066
+ if (isSkipType(schema['@type'])) return;
2067
+ if (schema.author && isContentType(schema['@type'])) {
2026
2068
  hasAuthorSchema = true;
2027
2069
  const authors = Array.isArray(schema.author) ? schema.author : [schema.author];
2028
2070
  authors.forEach((a) => {
2029
2071
  if (typeof a === 'string') authorNames.add(a);
2030
- else if (a.name) {
2072
+ else if (a && a.name) {
2031
2073
  authorNames.add(a.name);
2032
2074
  if (a.sameAs) hasAuthorSameAs = true;
2033
2075
  if (a['@type'] === 'Person') hasPersonSchema = true;
@@ -2038,6 +2080,13 @@ function checkEntity($, jsonLdData) {
2038
2080
  hasPersonSchema = true;
2039
2081
  if (schema.sameAs) hasAuthorSameAs = true;
2040
2082
  }
2083
+ // Recurse into common content-bearing fields, but skip review arrays.
2084
+ ['mainEntity', 'mainEntityOfPage', 'about', 'isPartOf', 'hasPart', 'workExample', 'exampleOfWork'].forEach((key) => {
2085
+ const val = schema[key];
2086
+ if (!val) return;
2087
+ if (Array.isArray(val)) val.forEach(processSchema);
2088
+ else if (typeof val === 'object') processSchema(val);
2089
+ });
2041
2090
  };
2042
2091
  if (Array.isArray(d)) d.forEach(processSchema);
2043
2092
  else if (d['@graph']) d['@graph'].forEach(processSchema);
@@ -2047,14 +2096,17 @@ function checkEntity($, jsonLdData) {
2047
2096
  if (hasAuthorSchema) authorSources.schema.push('JSON-LD author');
2048
2097
  if (hasPersonSchema) authorSources.schema.push('Person schema');
2049
2098
 
2050
- // 7. HTML byline elements - extended selectors
2099
+ // 7. HTML byline elements - extended selectors.
2100
+ // Exclude bylines inside review/comment/testimonial containers - they identify the
2101
+ // reviewer, not the page author.
2051
2102
  const bylineSelectors = [
2052
2103
  '[class*="author"]', '[rel="author"]', '[itemprop="author"]',
2053
2104
  '.byline', '.post-author', '.article-author', '.entry-author',
2054
2105
  '[data-author]', '[data-byline]',
2055
2106
  'address.author', '.writer', '.contributor',
2056
2107
  ].join(', ');
2057
- const authorByline = $(bylineSelectors).first();
2108
+ const reviewContextSel = '[itemtype*="Review"], [itemtype*="Comment"], .review, .reviews, .comment, .comments, .testimonial, .testimonials, [class*="review-"], [class*="reviews-"]';
2109
+ const authorByline = $(bylineSelectors).filter((_, el) => $(el).closest(reviewContextSel).length === 0).first();
2058
2110
  if (authorByline.length > 0) {
2059
2111
  const bylineText = (authorByline.text() || '').trim();
2060
2112
  if (bylineText && bylineText.length < 100) {
@@ -2070,8 +2122,9 @@ function checkEntity($, jsonLdData) {
2070
2122
  authorSources.html.push('address element');
2071
2123
  }
2072
2124
 
2073
- // 9. Author profile links
2074
- const authorLinks = $('a[href*="/author/"], a[href*="/writers/"], a[href*="/contributors/"], a[href*="/team/"], a[rel="author"]');
2125
+ // 9. Author profile links - skip review-context links (reviewer profile links).
2126
+ const authorLinks = $('a[href*="/author/"], a[href*="/writers/"], a[href*="/contributors/"], a[href*="/team/"], a[rel="author"]')
2127
+ .filter((_, el) => $(el).closest(reviewContextSel).length === 0);
2075
2128
  if (authorLinks.length > 0) {
2076
2129
  authorSources.links.push(`${authorLinks.length} author link(s)`);
2077
2130
  authorLinks.each((_, el) => {
@@ -3662,6 +3715,32 @@ function checkWebMCP($, pageType, ucpData) {
3662
3715
  checks.push({ status: 'info', label: 'Shopify-hosted: dual UCP surface expected', detail: 'Per-shop endpoint at /api/ucp/mcp; global catalog at https://discover.shopifyapps.com/global/mcp' });
3663
3716
  }
3664
3717
 
3718
+ // Baseline credit for purely informational pages.
3719
+ // If the page has no forms, no WebMCP signals, no UCP profile, and no Shopify
3720
+ // surface, there's nothing for it to expose to agents - WebMCP/UCP are N/A here.
3721
+ // Without this, content-only pages are capped well below 100 even when there's
3722
+ // nothing to fix, dragging the overall score unfairly.
3723
+ const totalForms = $('form').length;
3724
+ const hasUcp = !!(ucpData && ucpData.exists && ucpData.content);
3725
+ const hasShopify = !!(ucpData && ucpData.shopifyHosted);
3726
+ const hasNoInteractiveSurface =
3727
+ totalForms === 0 &&
3728
+ toolCount === 0 &&
3729
+ !hasImperativeSignals &&
3730
+ !webmcpSDKFound &&
3731
+ !hasSchemaActions &&
3732
+ !hasUcp &&
3733
+ !hasShopify;
3734
+
3735
+ if (hasNoInteractiveSurface) {
3736
+ checks.push({
3737
+ status: 'info',
3738
+ label: 'Informational page — Agent Interactivity not applicable',
3739
+ detail: 'No forms or WebMCP/UCP signals detected. Pure-content pages can\'t expose tools to agents, so this category is scored as a baseline rather than penalized.',
3740
+ });
3741
+ return { checks, score: 80, category: 'Agent Interactivity', notApplicable: true };
3742
+ }
3743
+
3665
3744
  return { checks, score: maxScore > 0 ? Math.round((score / maxScore) * 100) : 0, category: 'Agent Interactivity' };
3666
3745
  }
3667
3746
 
@@ -3929,8 +4008,18 @@ function checkContentFreshness($, jsonLdData) {
3929
4008
  new RegExp('\\bin ' + currentYear + '\\b', 'i'),
3930
4009
  new RegExp('\\b(as of|updated)\\s+(january|february|march|april|may|june|july|august|september|october|november|december)\\s+' + currentYear + '\\b', 'i'),
3931
4010
  ];
4011
+ // Historical/founding-context phrases - "records from 1841 to present", "since 1990",
4012
+ // "established 1936" - are accurate facts, not stale temporal references.
4013
+ const HISTORICAL_CONTEXT_PATTERNS = [
4014
+ /\b(since|from|established|founded|operating since|serving since|in business since)\s+(in\s+)?\d{4}\b/i,
4015
+ /\b\d{4}\s*(?:[‐-―−\-–—~]|to)\s*(present|current|today|now|\d{4})\b/i,
4016
+ /\b(records?|archives?|documents?|history|heritage|founded|established|originated|dating back)\b[^.]{0,80}\b(from|since|in)\s+\d{4}\b/i,
4017
+ /\b(historical|historic|vintage|legacy)\b/i,
4018
+ ];
4019
+ const hasHistoricalContext = HISTORICAL_CONTEXT_PATTERNS.some(p => p.test(visibleText));
3932
4020
  const hasCurrentRefs = CURRENT_YEAR_PATTERNS.some(p => p.test(visibleText));
3933
- const hasOutdatedRefs = OUTDATED_TEMPORAL_PATTERNS.some(p => p.test(visibleText));
4021
+ const rawOutdatedHits = OUTDATED_TEMPORAL_PATTERNS.some(p => p.test(visibleText));
4022
+ const hasOutdatedRefs = rawOutdatedHits && !hasHistoricalContext;
3934
4023
  maxScore += 20;
3935
4024
  if (hasCurrentRefs && !hasOutdatedRefs) {
3936
4025
  score += 20;
@@ -3947,13 +4036,16 @@ function checkContentFreshness($, jsonLdData) {
3947
4036
  }
3948
4037
 
3949
4038
  // 12d. Copyright Year & Footer Freshness (10 pts)
4039
+ // Year ranges ("© 1997 - 2026") signal a founding year + current year - take the END
4040
+ // year as the freshness signal, not the founding year.
3950
4041
  const footerEl = $('footer');
3951
4042
  maxScore += 10;
3952
4043
  if (footerEl.length > 0) {
3953
4044
  const footerText = footerEl.text();
3954
- const copyrightMatch = footerText.match(/©\s*(\d{4})/);
3955
- if (copyrightMatch) {
3956
- const copyrightYear = parseInt(copyrightMatch[1], 10);
4045
+ const rangeMatch = footerText.match(/©\s*(\d{4})\s*(?:[‐-―−\-–—~]|to)\s*(\d{4})/);
4046
+ const singleMatch = !rangeMatch ? footerText.match(/©\s*(\d{4})/) : null;
4047
+ if (rangeMatch || singleMatch) {
4048
+ const copyrightYear = parseInt((rangeMatch ? rangeMatch[2] : singleMatch[1]), 10);
3957
4049
  if (copyrightYear === currentYear) {
3958
4050
  score += 10;
3959
4051
  checks.push({ status: 'pass', label: `Copyright year current (${copyrightYear})`, detail: `Footer copyright is ${copyrightYear}` });
@@ -4041,22 +4133,36 @@ function checkInformationDensity($) {
4041
4133
  }
4042
4134
 
4043
4135
  // 13b. Self-Contained Section Scoring (25 pts)
4136
+ // Sections with structured content (tables w/ headers, lists, definition lists) are
4137
+ // self-contained even at lower word counts - the structure carries the meaning.
4044
4138
  const h2s = $('main h2, article h2, [role="main"] h2');
4045
4139
  maxScore += 25;
4046
4140
  if (h2s.length > 0) {
4047
4141
  let selfContainedCount = 0;
4048
4142
  h2s.each((_i, h2El) => {
4049
4143
  let sectionText = '';
4144
+ let hasStructuredContent = false;
4145
+ let hasLabeledTable = false;
4050
4146
  let sibling = $(h2El).next();
4051
4147
  while (sibling.length > 0 && !sibling.is('h2')) {
4052
4148
  sectionText += (sibling.text() || '') + ' ';
4149
+ if (sibling.is('table, ul, ol, dl') || sibling.find('table, ul, ol, dl').length > 0) {
4150
+ hasStructuredContent = true;
4151
+ }
4152
+ const tablesHere = sibling.is('table') ? sibling : sibling.find('table');
4153
+ tablesHere.each((__, t) => {
4154
+ if ($(t).find('th').length > 0) hasLabeledTable = true;
4155
+ });
4053
4156
  sibling = sibling.next();
4054
4157
  }
4055
4158
  const wordCount = sectionText.trim().split(/\s+/).length;
4056
4159
  const hasData = /\d/.test(sectionText);
4057
4160
  const firstSentence = sectionText.split(/[.!?]/)[0] || '';
4058
4161
  const hasTopicSentence = firstSentence.trim().length > 30;
4059
- if (wordCount >= 150 && wordCount <= 500 && hasData && hasTopicSentence) {
4162
+ const isStandardComplete = wordCount >= 150 && wordCount <= 500 && hasData && hasTopicSentence;
4163
+ const isStructurallyComplete = hasStructuredContent && wordCount >= 40 && (hasData || hasLabeledTable);
4164
+ const isLabeledTableSection = hasLabeledTable && wordCount >= 10;
4165
+ if (isStandardComplete || isStructurallyComplete || isLabeledTableSection) {
4060
4166
  selfContainedCount++;
4061
4167
  }
4062
4168
  });
@@ -4076,6 +4182,8 @@ function checkInformationDensity($) {
4076
4182
  }
4077
4183
 
4078
4184
  // 13c. Claim-Evidence Pairing (20 pts)
4185
+ // Tables with header cells provide column-level context for every numeric value,
4186
+ // so data points inside labeled tables are considered already-paired by design.
4079
4187
  const DATA_SENTENCE = /\d+(\.\d+)?(%|x|\$|€|£)/;
4080
4188
  let dataSentences = 0;
4081
4189
  let pairedData = 0;
@@ -4090,14 +4198,26 @@ function checkInformationDensity($) {
4090
4198
  }
4091
4199
  }
4092
4200
  });
4201
+ // Count data cells inside labeled tables - they're context-paired via column headers.
4202
+ let labeledTableDataCells = 0;
4203
+ const pairingTables = mainEl.length > 0 ? mainEl.find('table') : $('table');
4204
+ pairingTables.each((_i, t) => {
4205
+ const $t = $(t);
4206
+ if ($t.find('th').length === 0) return;
4207
+ $t.find('tbody td, td').each((__, td) => {
4208
+ if (DATA_SENTENCE.test($(td).text() || '')) labeledTableDataCells++;
4209
+ });
4210
+ });
4093
4211
  maxScore += 20;
4094
- if (dataSentences === 0) {
4212
+ const totalData = dataSentences + labeledTableDataCells;
4213
+ const totalPaired = pairedData + labeledTableDataCells;
4214
+ if (totalData === 0) {
4095
4215
  checks.push({ status: 'info', label: 'No data claims detected', detail: 'Add quantitative data points with context' });
4096
4216
  } else {
4097
- const pairedPct = Math.round((pairedData / dataSentences) * 100);
4217
+ const pairedPct = Math.round((totalPaired / totalData) * 100);
4098
4218
  if (pairedPct > 80) {
4099
4219
  score += 20;
4100
- checks.push({ status: 'pass', label: `Claims well-paired (${pairedPct}%)`, detail: `${pairedPct}% of data claims have contextual explanations` });
4220
+ checks.push({ status: 'pass', label: `Claims well-paired (${pairedPct}%)`, detail: `${pairedPct}% of data claims have contextual explanations${labeledTableDataCells > 0 ? ` (incl. ${labeledTableDataCells} table cells)` : ''}` });
4101
4221
  } else if (pairedPct >= 50) {
4102
4222
  score += 10;
4103
4223
  checks.push({ status: 'warn', label: `Claims partially paired (${pairedPct}%)`, detail: `${pairedPct}% of data claims have context — add more explanations` });