glippy-mcp 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/geo-checker.js +140 -20
package/package.json
CHANGED
package/src/geo-checker.js
CHANGED
|
@@ -757,10 +757,27 @@ function aggregatePageScores(pageResults) {
|
|
|
757
757
|
* @returns {string} - One of: 'faq', 'product', 'article', 'local-business', 'homepage', 'ecommerce', 'saas', 'generic'.
|
|
758
758
|
*/
|
|
759
759
|
function detectPageType($, schemaTypes, pathname) {
|
|
760
|
-
// Check JSON-LD schema types first (most reliable signal)
|
|
761
|
-
|
|
762
|
-
|
|
760
|
+
// Check JSON-LD schema types first (most reliable signal).
|
|
761
|
+
// A page can carry FAQPage schema for a small FAQ section while being a long-form
|
|
762
|
+
// guide. Only classify as "faq" when FAQPage is the dominant structure - otherwise
|
|
763
|
+
// a 6,400-word guide with a FAQ at the bottom gets penalized as exceeding FAQ length.
|
|
764
|
+
const allH2s = $('h2');
|
|
765
|
+
const h2Count = allH2s.length;
|
|
766
|
+
let questionH2Count = 0;
|
|
767
|
+
allH2s.each((_, el) => {
|
|
768
|
+
const t = ($(el).text() || '').trim();
|
|
769
|
+
if (t.includes('?') || /^(how|what|why|when|where|who|which|can|do|does|is|are|should)\b/i.test(t)) {
|
|
770
|
+
questionH2Count++;
|
|
771
|
+
}
|
|
772
|
+
});
|
|
773
|
+
const isDominantlyFaq = h2Count > 0 && questionH2Count >= h2Count * 0.7;
|
|
774
|
+
|
|
775
|
+
if (schemaTypes.has('FAQPage') && isDominantlyFaq) return 'faq';
|
|
763
776
|
if (['Article', 'NewsArticle', 'BlogPosting', 'TechArticle'].some((t) => schemaTypes.has(t))) return 'article';
|
|
777
|
+
// FAQPage schema present but page also has many topic-style H2s = guide with a FAQ section.
|
|
778
|
+
if (schemaTypes.has('FAQPage') && h2Count >= 6) return 'article';
|
|
779
|
+
if (schemaTypes.has('FAQPage')) return 'faq';
|
|
780
|
+
if (['Product', 'Offer'].some((t) => schemaTypes.has(t))) return 'product';
|
|
764
781
|
if (['LocalBusiness', 'Restaurant', 'Store'].some((t) => schemaTypes.has(t))) return 'local-business';
|
|
765
782
|
|
|
766
783
|
// Heuristic: homepage detection (including language/locale-prefixed homepages like /en/, /de-DE/, /nl/)
|
|
@@ -769,9 +786,10 @@ function detectPageType($, schemaTypes, pathname) {
|
|
|
769
786
|
const normalizedPath = pathname.replace(/^\/[a-z]{2}(?:[-_][a-z]{2,3})?\/?$/i, '/');
|
|
770
787
|
if (normalizedPath === '/' || normalizedPath === '/index.html' || normalizedPath === '/index.php' || normalizedPath === '') return 'homepage';
|
|
771
788
|
|
|
772
|
-
// Heuristic: FAQ page via DOM
|
|
789
|
+
// Heuristic: FAQ page via DOM. Only treat as FAQ when FAQ-like elements dominate the
|
|
790
|
+
// structure - if the page has many topic H2s it's a guide that happens to include a FAQ.
|
|
773
791
|
const faqIndicators = $('[class*="faq"], [id*="faq"], details, [class*="accordion"]');
|
|
774
|
-
if (faqIndicators.length >= 3) return 'faq';
|
|
792
|
+
if (faqIndicators.length >= 3 && (h2Count < 6 || isDominantlyFaq)) return 'faq';
|
|
775
793
|
|
|
776
794
|
// Heuristic: article via DOM
|
|
777
795
|
const hasArticle = $('article').length > 0;
|
|
@@ -2014,7 +2032,29 @@ function checkEntity($, jsonLdData) {
|
|
|
2014
2032
|
});
|
|
2015
2033
|
}
|
|
2016
2034
|
|
|
2017
|
-
// 6. JSON-LD schema author with quality check
|
|
2035
|
+
// 6. JSON-LD schema author with quality check.
|
|
2036
|
+
// Only treat `author` as the page author when it's attached to a content type
|
|
2037
|
+
// (Article, WebPage, Book, etc.) - NOT inside Review/Comment, where `author` is
|
|
2038
|
+
// the reviewer/commenter and shouldn't be credited to the page.
|
|
2039
|
+
const PAGE_AUTHOR_TYPES = new Set([
|
|
2040
|
+
'Article', 'NewsArticle', 'BlogPosting', 'TechArticle', 'ScholarlyArticle', 'Report', 'OpinionNewsArticle',
|
|
2041
|
+
'WebPage', 'AboutPage', 'CollectionPage', 'ItemPage', 'ProfilePage', 'QAPage', 'FAQPage',
|
|
2042
|
+
'Book', 'Chapter', 'CreativeWork', 'CreativeWorkSeries', 'HowTo', 'Recipe', 'Course', 'LearningResource',
|
|
2043
|
+
'VideoObject', 'AudioObject', 'PodcastEpisode', 'Podcast',
|
|
2044
|
+
'DiscussionForumPosting', 'SocialMediaPosting',
|
|
2045
|
+
]);
|
|
2046
|
+
const SKIP_AUTHOR_TYPES = new Set(['Review', 'Comment', 'UserComments', 'Rating']);
|
|
2047
|
+
const isContentType = (t) => {
|
|
2048
|
+
if (!t) return false;
|
|
2049
|
+
const types = Array.isArray(t) ? t : [t];
|
|
2050
|
+
return types.some((x) => PAGE_AUTHOR_TYPES.has(x));
|
|
2051
|
+
};
|
|
2052
|
+
const isSkipType = (t) => {
|
|
2053
|
+
if (!t) return false;
|
|
2054
|
+
const types = Array.isArray(t) ? t : [t];
|
|
2055
|
+
return types.some((x) => SKIP_AUTHOR_TYPES.has(x));
|
|
2056
|
+
};
|
|
2057
|
+
|
|
2018
2058
|
let hasAuthorSchema = false;
|
|
2019
2059
|
let hasAuthorSameAs = false;
|
|
2020
2060
|
let hasPersonSchema = false;
|
|
@@ -2022,12 +2062,14 @@ function checkEntity($, jsonLdData) {
|
|
|
2022
2062
|
try {
|
|
2023
2063
|
const processSchema = (schema) => {
|
|
2024
2064
|
if (!schema) return;
|
|
2025
|
-
|
|
2065
|
+
// Skip Review/Comment subtrees - their author is not the page author.
|
|
2066
|
+
if (isSkipType(schema['@type'])) return;
|
|
2067
|
+
if (schema.author && isContentType(schema['@type'])) {
|
|
2026
2068
|
hasAuthorSchema = true;
|
|
2027
2069
|
const authors = Array.isArray(schema.author) ? schema.author : [schema.author];
|
|
2028
2070
|
authors.forEach((a) => {
|
|
2029
2071
|
if (typeof a === 'string') authorNames.add(a);
|
|
2030
|
-
else if (a.name) {
|
|
2072
|
+
else if (a && a.name) {
|
|
2031
2073
|
authorNames.add(a.name);
|
|
2032
2074
|
if (a.sameAs) hasAuthorSameAs = true;
|
|
2033
2075
|
if (a['@type'] === 'Person') hasPersonSchema = true;
|
|
@@ -2038,6 +2080,13 @@ function checkEntity($, jsonLdData) {
|
|
|
2038
2080
|
hasPersonSchema = true;
|
|
2039
2081
|
if (schema.sameAs) hasAuthorSameAs = true;
|
|
2040
2082
|
}
|
|
2083
|
+
// Recurse into common content-bearing fields, but skip review arrays.
|
|
2084
|
+
['mainEntity', 'mainEntityOfPage', 'about', 'isPartOf', 'hasPart', 'workExample', 'exampleOfWork'].forEach((key) => {
|
|
2085
|
+
const val = schema[key];
|
|
2086
|
+
if (!val) return;
|
|
2087
|
+
if (Array.isArray(val)) val.forEach(processSchema);
|
|
2088
|
+
else if (typeof val === 'object') processSchema(val);
|
|
2089
|
+
});
|
|
2041
2090
|
};
|
|
2042
2091
|
if (Array.isArray(d)) d.forEach(processSchema);
|
|
2043
2092
|
else if (d['@graph']) d['@graph'].forEach(processSchema);
|
|
@@ -2047,14 +2096,17 @@ function checkEntity($, jsonLdData) {
|
|
|
2047
2096
|
if (hasAuthorSchema) authorSources.schema.push('JSON-LD author');
|
|
2048
2097
|
if (hasPersonSchema) authorSources.schema.push('Person schema');
|
|
2049
2098
|
|
|
2050
|
-
// 7. HTML byline elements - extended selectors
|
|
2099
|
+
// 7. HTML byline elements - extended selectors.
|
|
2100
|
+
// Exclude bylines inside review/comment/testimonial containers - they identify the
|
|
2101
|
+
// reviewer, not the page author.
|
|
2051
2102
|
const bylineSelectors = [
|
|
2052
2103
|
'[class*="author"]', '[rel="author"]', '[itemprop="author"]',
|
|
2053
2104
|
'.byline', '.post-author', '.article-author', '.entry-author',
|
|
2054
2105
|
'[data-author]', '[data-byline]',
|
|
2055
2106
|
'address.author', '.writer', '.contributor',
|
|
2056
2107
|
].join(', ');
|
|
2057
|
-
const
|
|
2108
|
+
const reviewContextSel = '[itemtype*="Review"], [itemtype*="Comment"], .review, .reviews, .comment, .comments, .testimonial, .testimonials, [class*="review-"], [class*="reviews-"]';
|
|
2109
|
+
const authorByline = $(bylineSelectors).filter((_, el) => $(el).closest(reviewContextSel).length === 0).first();
|
|
2058
2110
|
if (authorByline.length > 0) {
|
|
2059
2111
|
const bylineText = (authorByline.text() || '').trim();
|
|
2060
2112
|
if (bylineText && bylineText.length < 100) {
|
|
@@ -2070,8 +2122,9 @@ function checkEntity($, jsonLdData) {
|
|
|
2070
2122
|
authorSources.html.push('address element');
|
|
2071
2123
|
}
|
|
2072
2124
|
|
|
2073
|
-
// 9. Author profile links
|
|
2074
|
-
const authorLinks = $('a[href*="/author/"], a[href*="/writers/"], a[href*="/contributors/"], a[href*="/team/"], a[rel="author"]')
|
|
2125
|
+
// 9. Author profile links - skip review-context links (reviewer profile links).
|
|
2126
|
+
const authorLinks = $('a[href*="/author/"], a[href*="/writers/"], a[href*="/contributors/"], a[href*="/team/"], a[rel="author"]')
|
|
2127
|
+
.filter((_, el) => $(el).closest(reviewContextSel).length === 0);
|
|
2075
2128
|
if (authorLinks.length > 0) {
|
|
2076
2129
|
authorSources.links.push(`${authorLinks.length} author link(s)`);
|
|
2077
2130
|
authorLinks.each((_, el) => {
|
|
@@ -3662,6 +3715,32 @@ function checkWebMCP($, pageType, ucpData) {
|
|
|
3662
3715
|
checks.push({ status: 'info', label: 'Shopify-hosted: dual UCP surface expected', detail: 'Per-shop endpoint at /api/ucp/mcp; global catalog at https://discover.shopifyapps.com/global/mcp' });
|
|
3663
3716
|
}
|
|
3664
3717
|
|
|
3718
|
+
// Baseline credit for purely informational pages.
|
|
3719
|
+
// If the page has no forms, no WebMCP signals, no UCP profile, and no Shopify
|
|
3720
|
+
// surface, there's nothing for it to expose to agents - WebMCP/UCP are N/A here.
|
|
3721
|
+
// Without this, content-only pages are capped well below 100 even when there's
|
|
3722
|
+
// nothing to fix, dragging the overall score unfairly.
|
|
3723
|
+
const totalForms = $('form').length;
|
|
3724
|
+
const hasUcp = !!(ucpData && ucpData.exists && ucpData.content);
|
|
3725
|
+
const hasShopify = !!(ucpData && ucpData.shopifyHosted);
|
|
3726
|
+
const hasNoInteractiveSurface =
|
|
3727
|
+
totalForms === 0 &&
|
|
3728
|
+
toolCount === 0 &&
|
|
3729
|
+
!hasImperativeSignals &&
|
|
3730
|
+
!webmcpSDKFound &&
|
|
3731
|
+
!hasSchemaActions &&
|
|
3732
|
+
!hasUcp &&
|
|
3733
|
+
!hasShopify;
|
|
3734
|
+
|
|
3735
|
+
if (hasNoInteractiveSurface) {
|
|
3736
|
+
checks.push({
|
|
3737
|
+
status: 'info',
|
|
3738
|
+
label: 'Informational page — Agent Interactivity not applicable',
|
|
3739
|
+
detail: 'No forms or WebMCP/UCP signals detected. Pure-content pages can\'t expose tools to agents, so this category is scored as a baseline rather than penalized.',
|
|
3740
|
+
});
|
|
3741
|
+
return { checks, score: 80, category: 'Agent Interactivity', notApplicable: true };
|
|
3742
|
+
}
|
|
3743
|
+
|
|
3665
3744
|
return { checks, score: maxScore > 0 ? Math.round((score / maxScore) * 100) : 0, category: 'Agent Interactivity' };
|
|
3666
3745
|
}
|
|
3667
3746
|
|
|
@@ -3929,8 +4008,18 @@ function checkContentFreshness($, jsonLdData) {
|
|
|
3929
4008
|
new RegExp('\\bin ' + currentYear + '\\b', 'i'),
|
|
3930
4009
|
new RegExp('\\b(as of|updated)\\s+(january|february|march|april|may|june|july|august|september|october|november|december)\\s+' + currentYear + '\\b', 'i'),
|
|
3931
4010
|
];
|
|
4011
|
+
// Historical/founding-context phrases - "records from 1841 to present", "since 1990",
|
|
4012
|
+
// "established 1936" - are accurate facts, not stale temporal references.
|
|
4013
|
+
const HISTORICAL_CONTEXT_PATTERNS = [
|
|
4014
|
+
/\b(since|from|established|founded|operating since|serving since|in business since)\s+(in\s+)?\d{4}\b/i,
|
|
4015
|
+
/\b\d{4}\s*(?:[‐-―−\-–—~]|to)\s*(present|current|today|now|\d{4})\b/i,
|
|
4016
|
+
/\b(records?|archives?|documents?|history|heritage|founded|established|originated|dating back)\b[^.]{0,80}\b(from|since|in)\s+\d{4}\b/i,
|
|
4017
|
+
/\b(historical|historic|vintage|legacy)\b/i,
|
|
4018
|
+
];
|
|
4019
|
+
const hasHistoricalContext = HISTORICAL_CONTEXT_PATTERNS.some(p => p.test(visibleText));
|
|
3932
4020
|
const hasCurrentRefs = CURRENT_YEAR_PATTERNS.some(p => p.test(visibleText));
|
|
3933
|
-
const
|
|
4021
|
+
const rawOutdatedHits = OUTDATED_TEMPORAL_PATTERNS.some(p => p.test(visibleText));
|
|
4022
|
+
const hasOutdatedRefs = rawOutdatedHits && !hasHistoricalContext;
|
|
3934
4023
|
maxScore += 20;
|
|
3935
4024
|
if (hasCurrentRefs && !hasOutdatedRefs) {
|
|
3936
4025
|
score += 20;
|
|
@@ -3947,13 +4036,16 @@ function checkContentFreshness($, jsonLdData) {
|
|
|
3947
4036
|
}
|
|
3948
4037
|
|
|
3949
4038
|
// 12d. Copyright Year & Footer Freshness (10 pts)
|
|
4039
|
+
// Year ranges ("© 1997 - 2026") signal a founding year + current year - take the END
|
|
4040
|
+
// year as the freshness signal, not the founding year.
|
|
3950
4041
|
const footerEl = $('footer');
|
|
3951
4042
|
maxScore += 10;
|
|
3952
4043
|
if (footerEl.length > 0) {
|
|
3953
4044
|
const footerText = footerEl.text();
|
|
3954
|
-
const
|
|
3955
|
-
|
|
3956
|
-
|
|
4045
|
+
const rangeMatch = footerText.match(/©\s*(\d{4})\s*(?:[‐-―−\-–—~]|to)\s*(\d{4})/);
|
|
4046
|
+
const singleMatch = !rangeMatch ? footerText.match(/©\s*(\d{4})/) : null;
|
|
4047
|
+
if (rangeMatch || singleMatch) {
|
|
4048
|
+
const copyrightYear = parseInt((rangeMatch ? rangeMatch[2] : singleMatch[1]), 10);
|
|
3957
4049
|
if (copyrightYear === currentYear) {
|
|
3958
4050
|
score += 10;
|
|
3959
4051
|
checks.push({ status: 'pass', label: `Copyright year current (${copyrightYear})`, detail: `Footer copyright is ${copyrightYear}` });
|
|
@@ -4041,22 +4133,36 @@ function checkInformationDensity($) {
|
|
|
4041
4133
|
}
|
|
4042
4134
|
|
|
4043
4135
|
// 13b. Self-Contained Section Scoring (25 pts)
|
|
4136
|
+
// Sections with structured content (tables w/ headers, lists, definition lists) are
|
|
4137
|
+
// self-contained even at lower word counts - the structure carries the meaning.
|
|
4044
4138
|
const h2s = $('main h2, article h2, [role="main"] h2');
|
|
4045
4139
|
maxScore += 25;
|
|
4046
4140
|
if (h2s.length > 0) {
|
|
4047
4141
|
let selfContainedCount = 0;
|
|
4048
4142
|
h2s.each((_i, h2El) => {
|
|
4049
4143
|
let sectionText = '';
|
|
4144
|
+
let hasStructuredContent = false;
|
|
4145
|
+
let hasLabeledTable = false;
|
|
4050
4146
|
let sibling = $(h2El).next();
|
|
4051
4147
|
while (sibling.length > 0 && !sibling.is('h2')) {
|
|
4052
4148
|
sectionText += (sibling.text() || '') + ' ';
|
|
4149
|
+
if (sibling.is('table, ul, ol, dl') || sibling.find('table, ul, ol, dl').length > 0) {
|
|
4150
|
+
hasStructuredContent = true;
|
|
4151
|
+
}
|
|
4152
|
+
const tablesHere = sibling.is('table') ? sibling : sibling.find('table');
|
|
4153
|
+
tablesHere.each((__, t) => {
|
|
4154
|
+
if ($(t).find('th').length > 0) hasLabeledTable = true;
|
|
4155
|
+
});
|
|
4053
4156
|
sibling = sibling.next();
|
|
4054
4157
|
}
|
|
4055
4158
|
const wordCount = sectionText.trim().split(/\s+/).length;
|
|
4056
4159
|
const hasData = /\d/.test(sectionText);
|
|
4057
4160
|
const firstSentence = sectionText.split(/[.!?]/)[0] || '';
|
|
4058
4161
|
const hasTopicSentence = firstSentence.trim().length > 30;
|
|
4059
|
-
|
|
4162
|
+
const isStandardComplete = wordCount >= 150 && wordCount <= 500 && hasData && hasTopicSentence;
|
|
4163
|
+
const isStructurallyComplete = hasStructuredContent && wordCount >= 40 && (hasData || hasLabeledTable);
|
|
4164
|
+
const isLabeledTableSection = hasLabeledTable && wordCount >= 10;
|
|
4165
|
+
if (isStandardComplete || isStructurallyComplete || isLabeledTableSection) {
|
|
4060
4166
|
selfContainedCount++;
|
|
4061
4167
|
}
|
|
4062
4168
|
});
|
|
@@ -4076,6 +4182,8 @@ function checkInformationDensity($) {
|
|
|
4076
4182
|
}
|
|
4077
4183
|
|
|
4078
4184
|
// 13c. Claim-Evidence Pairing (20 pts)
|
|
4185
|
+
// Tables with header cells provide column-level context for every numeric value,
|
|
4186
|
+
// so data points inside labeled tables are considered already-paired by design.
|
|
4079
4187
|
const DATA_SENTENCE = /\d+(\.\d+)?(%|x|\$|€|£)/;
|
|
4080
4188
|
let dataSentences = 0;
|
|
4081
4189
|
let pairedData = 0;
|
|
@@ -4090,14 +4198,26 @@ function checkInformationDensity($) {
|
|
|
4090
4198
|
}
|
|
4091
4199
|
}
|
|
4092
4200
|
});
|
|
4201
|
+
// Count data cells inside labeled tables - they're context-paired via column headers.
|
|
4202
|
+
let labeledTableDataCells = 0;
|
|
4203
|
+
const pairingTables = mainEl.length > 0 ? mainEl.find('table') : $('table');
|
|
4204
|
+
pairingTables.each((_i, t) => {
|
|
4205
|
+
const $t = $(t);
|
|
4206
|
+
if ($t.find('th').length === 0) return;
|
|
4207
|
+
$t.find('tbody td, td').each((__, td) => {
|
|
4208
|
+
if (DATA_SENTENCE.test($(td).text() || '')) labeledTableDataCells++;
|
|
4209
|
+
});
|
|
4210
|
+
});
|
|
4093
4211
|
maxScore += 20;
|
|
4094
|
-
|
|
4212
|
+
const totalData = dataSentences + labeledTableDataCells;
|
|
4213
|
+
const totalPaired = pairedData + labeledTableDataCells;
|
|
4214
|
+
if (totalData === 0) {
|
|
4095
4215
|
checks.push({ status: 'info', label: 'No data claims detected', detail: 'Add quantitative data points with context' });
|
|
4096
4216
|
} else {
|
|
4097
|
-
const pairedPct = Math.round((
|
|
4217
|
+
const pairedPct = Math.round((totalPaired / totalData) * 100);
|
|
4098
4218
|
if (pairedPct > 80) {
|
|
4099
4219
|
score += 20;
|
|
4100
|
-
checks.push({ status: 'pass', label: `Claims well-paired (${pairedPct}%)`, detail: `${pairedPct}% of data claims have contextual explanations` });
|
|
4220
|
+
checks.push({ status: 'pass', label: `Claims well-paired (${pairedPct}%)`, detail: `${pairedPct}% of data claims have contextual explanations${labeledTableDataCells > 0 ? ` (incl. ${labeledTableDataCells} table cells)` : ''}` });
|
|
4101
4221
|
} else if (pairedPct >= 50) {
|
|
4102
4222
|
score += 10;
|
|
4103
4223
|
checks.push({ status: 'warn', label: `Claims partially paired (${pairedPct}%)`, detail: `${pairedPct}% of data claims have context — add more explanations` });
|