pagesight 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,279 @@
1
+ /**
2
+ * robots.txt parser per RFC 9309
3
+ * https://www.rfc-editor.org/rfc/rfc9309
4
+ *
5
+ * AI crawler registry sourced from:
6
+ * https://github.com/ai-robots-txt/ai.robots.txt (robots.json)
7
+ */
8
+
9
+ export interface RobotsGroup {
10
+ userAgents: string[];
11
+ rules: Array<{ type: "allow" | "disallow"; path: string }>;
12
+ }
13
+
14
+ export interface RobotsTxt {
15
+ groups: RobotsGroup[];
16
+ sitemaps: string[];
17
+ raw: string;
18
+ errors: string[];
19
+ }
20
+
21
+ export interface CrawlerStatus {
22
+ name: string;
23
+ company: string;
24
+ category: string;
25
+ respectsRobotsTxt: string;
26
+ description: string;
27
+ allowed: boolean;
28
+ matchedRule?: { type: "allow" | "disallow"; path: string } | null;
29
+ matchedGroup?: string;
30
+ }
31
+
32
+ export interface CrawlerInfo {
33
+ token: string;
34
+ operator: string;
35
+ respect: string;
36
+ function: string;
37
+ description: string;
38
+ }
39
+
40
+ // --- Remote Registry ---
41
+
42
+ const REGISTRY_URL = "https://raw.githubusercontent.com/ai-robots-txt/ai.robots.txt/main/robots.json";
43
+ const CACHE_TTL_MS = 24 * 60 * 60 * 1000; // 24 hours
44
+
45
+ let cachedRegistry: CrawlerInfo[] | null = null;
46
+ let cacheTimestamp = 0;
47
+
48
+ function stripMarkdownLinks(text: string): string {
49
+ return text.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1");
50
+ }
51
+
52
+ function parseRespect(raw: string): string {
53
+ const clean = stripMarkdownLinks(raw).trim().toLowerCase();
54
+ if (clean.startsWith("yes")) return "yes";
55
+ if (clean.startsWith("no")) return "no";
56
+ return "unclear";
57
+ }
58
+
59
+ async function fetchRegistry(): Promise<CrawlerInfo[]> {
60
+ if (cachedRegistry && Date.now() - cacheTimestamp < CACHE_TTL_MS) {
61
+ return cachedRegistry;
62
+ }
63
+
64
+ try {
65
+ const res = await fetch(REGISTRY_URL, {
66
+ headers: { "User-Agent": "Pagesight/0.1" },
67
+ signal: AbortSignal.timeout(10_000),
68
+ });
69
+
70
+ if (!res.ok) throw new Error(`HTTP ${res.status}`);
71
+
72
+ const data = (await res.json()) as Record<string, Record<string, string>>;
73
+
74
+ cachedRegistry = Object.entries(data).map(([token, info]) => ({
75
+ token,
76
+ operator: stripMarkdownLinks(info.operator ?? "Unknown"),
77
+ respect: info.respect ?? "Unclear",
78
+ function: info.function ?? "Unknown",
79
+ description: stripMarkdownLinks(info.description ?? ""),
80
+ }));
81
+
82
+ cacheTimestamp = Date.now();
83
+ return cachedRegistry;
84
+ } catch {
85
+ // Fall back to cached or empty
86
+ return cachedRegistry ?? [];
87
+ }
88
+ }
89
+
90
+ // --- Parser ---
91
+
92
+ export function parseRobotsTxt(raw: string): RobotsTxt {
93
+ const errors: string[] = [];
94
+ const groups: RobotsGroup[] = [];
95
+ const sitemaps: string[] = [];
96
+
97
+ let currentGroup: RobotsGroup | null = null;
98
+
99
+ const lines = raw.split(/\r?\n/);
100
+
101
+ for (let i = 0; i < lines.length; i++) {
102
+ const lineNum = i + 1;
103
+ let line = lines[i];
104
+
105
+ // Strip comments
106
+ const commentIdx = line.indexOf("#");
107
+ if (commentIdx !== -1) line = line.substring(0, commentIdx);
108
+ line = line.trim();
109
+
110
+ if (!line) continue;
111
+
112
+ const colonIdx = line.indexOf(":");
113
+ if (colonIdx === -1) {
114
+ errors.push(`Line ${lineNum}: Missing colon — "${line}"`);
115
+ continue;
116
+ }
117
+
118
+ const directive = line.substring(0, colonIdx).trim().toLowerCase();
119
+ const value = line.substring(colonIdx + 1).trim();
120
+
121
+ if (directive === "user-agent") {
122
+ if (!value) {
123
+ errors.push(`Line ${lineNum}: Empty user-agent value`);
124
+ continue;
125
+ }
126
+ if (!currentGroup || currentGroup.rules.length > 0) {
127
+ currentGroup = { userAgents: [value], rules: [] };
128
+ groups.push(currentGroup);
129
+ } else {
130
+ currentGroup.userAgents.push(value);
131
+ }
132
+ } else if (directive === "disallow") {
133
+ if (!currentGroup) {
134
+ errors.push(`Line ${lineNum}: Disallow before any User-agent`);
135
+ continue;
136
+ }
137
+ currentGroup.rules.push({ type: "disallow", path: value });
138
+ } else if (directive === "allow") {
139
+ if (!currentGroup) {
140
+ errors.push(`Line ${lineNum}: Allow before any User-agent`);
141
+ continue;
142
+ }
143
+ currentGroup.rules.push({ type: "allow", path: value });
144
+ } else if (directive === "sitemap") {
145
+ if (value) sitemaps.push(value);
146
+ else errors.push(`Line ${lineNum}: Empty sitemap URL`);
147
+ } else if (directive === "crawl-delay" || directive === "host") {
148
+ // Known non-standard directives — ignore silently
149
+ } else {
150
+ errors.push(`Line ${lineNum}: Unknown directive "${directive}"`);
151
+ }
152
+ }
153
+
154
+ return { groups, sitemaps, raw, errors };
155
+ }
156
+
157
+ // --- Matching (per RFC 9309) ---
158
+
159
+ function pathMatches(pattern: string, path: string): boolean {
160
+ if (!pattern) return false;
161
+
162
+ let regex = "^";
163
+ for (let i = 0; i < pattern.length; i++) {
164
+ const c = pattern[i];
165
+ if (c === "*") {
166
+ regex += ".*";
167
+ } else if (c === "$" && i === pattern.length - 1) {
168
+ regex += "$";
169
+ } else {
170
+ regex += c.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
171
+ }
172
+ }
173
+ if (!pattern.endsWith("$")) regex += ".*";
174
+
175
+ try {
176
+ return new RegExp(regex).test(path);
177
+ } catch {
178
+ return false;
179
+ }
180
+ }
181
+
182
+ export function isAllowed(
183
+ robots: RobotsTxt,
184
+ userAgent: string,
185
+ path: string,
186
+ ): {
187
+ allowed: boolean;
188
+ matchedRule: { type: "allow" | "disallow"; path: string } | null;
189
+ matchedGroup: string | null;
190
+ } {
191
+ const ua = userAgent.toLowerCase();
192
+
193
+ let matchingGroup: RobotsGroup | null = null;
194
+ let matchedGroupName: string | null = null;
195
+
196
+ for (const group of robots.groups) {
197
+ for (const agent of group.userAgents) {
198
+ if (agent.toLowerCase() === ua) {
199
+ matchingGroup = group;
200
+ matchedGroupName = agent;
201
+ break;
202
+ }
203
+ }
204
+ if (matchingGroup) break;
205
+ }
206
+
207
+ if (!matchingGroup) {
208
+ for (const group of robots.groups) {
209
+ if (group.userAgents.some((a) => a === "*")) {
210
+ matchingGroup = group;
211
+ matchedGroupName = "*";
212
+ break;
213
+ }
214
+ }
215
+ }
216
+
217
+ if (!matchingGroup) return { allowed: true, matchedRule: null, matchedGroup: null };
218
+
219
+ let bestRule: { type: "allow" | "disallow"; path: string } | null = null;
220
+ let bestLength = -1;
221
+
222
+ for (const rule of matchingGroup.rules) {
223
+ if (pathMatches(rule.path, path)) {
224
+ const ruleLength = rule.path.length;
225
+ if (ruleLength > bestLength || (ruleLength === bestLength && rule.type === "allow")) {
226
+ bestRule = rule;
227
+ bestLength = ruleLength;
228
+ }
229
+ }
230
+ }
231
+
232
+ if (!bestRule) return { allowed: true, matchedRule: null, matchedGroup: matchedGroupName };
233
+
234
+ return {
235
+ allowed: bestRule.type === "allow",
236
+ matchedRule: bestRule,
237
+ matchedGroup: matchedGroupName,
238
+ };
239
+ }
240
+
241
+ // --- AI Crawler Audit ---
242
+
243
+ export async function auditAiCrawlers(robots: RobotsTxt): Promise<CrawlerStatus[]> {
244
+ const registry = await fetchRegistry();
245
+
246
+ return registry.map((crawler) => {
247
+ const result = isAllowed(robots, crawler.token, "/");
248
+ return {
249
+ name: crawler.token,
250
+ company: crawler.operator,
251
+ category: crawler.function,
252
+ respectsRobotsTxt: parseRespect(crawler.respect),
253
+ description: crawler.description.slice(0, 120),
254
+ allowed: result.allowed,
255
+ matchedRule: result.matchedRule,
256
+ matchedGroup: result.matchedGroup ?? undefined,
257
+ };
258
+ });
259
+ }
260
+
261
+ // --- Fetch ---
262
+
263
+ export async function fetchRobotsTxt(origin: string): Promise<{ robotsTxt: RobotsTxt; statusCode: number }> {
264
+ const url = new URL("/robots.txt", origin).href;
265
+
266
+ const res = await fetch(url, {
267
+ headers: { "User-Agent": "Pagesight/0.1" },
268
+ redirect: "follow",
269
+ });
270
+
271
+ if (res.status >= 400) {
272
+ return { robotsTxt: { groups: [], sitemaps: [], raw: "", errors: [] }, statusCode: res.status };
273
+ }
274
+
275
+ const raw = await res.text();
276
+ return { robotsTxt: parseRobotsTxt(raw), statusCode: res.status };
277
+ }
278
+
279
+ export { fetchRegistry as loadCrawlerRegistry };
@@ -0,0 +1,288 @@
1
+ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
+ import { z } from "zod";
3
+ import {
4
+ type CruxFormFactor,
5
+ type CruxHistoryResponse,
6
+ type CruxResponse,
7
+ queryCrux,
8
+ queryCruxHistory,
9
+ } from "../lib/crux.js";
10
+
11
+ function formatDate(d: { year: number; month: number; day: number }): string {
12
+ return `${d.year}-${String(d.month).padStart(2, "0")}-${String(d.day).padStart(2, "0")}`;
13
+ }
14
+
15
+ const METRIC_LABELS: Record<string, string> = {
16
+ cumulative_layout_shift: "CLS",
17
+ first_contentful_paint: "FCP",
18
+ interaction_to_next_paint: "INP",
19
+ largest_contentful_paint: "LCP",
20
+ experimental_time_to_first_byte: "TTFB",
21
+ round_trip_time: "RTT",
22
+ navigation_types: "Navigation Types",
23
+ form_factors: "Form Factors",
24
+ };
25
+
26
+ function formatCrux(target: string, result: CruxResponse): string {
27
+ const r = result.record;
28
+ const period = r.collectionPeriod;
29
+ const lines: string[] = [
30
+ `=== CrUX: ${target} ===`,
31
+ `Form factor: ${r.key.formFactor ?? "all"}`,
32
+ `Period: ${formatDate(period.firstDate)} to ${formatDate(period.lastDate)}`,
33
+ "",
34
+ ];
35
+
36
+ if (result.urlNormalizationDetails) {
37
+ const norm = result.urlNormalizationDetails;
38
+ if (norm.originalUrl !== norm.normalizedUrl) {
39
+ lines.push(`Normalized: ${norm.originalUrl} → ${norm.normalizedUrl}`, "");
40
+ }
41
+ }
42
+
43
+ lines.push("--- Metrics (p75) ---", "");
44
+
45
+ for (const [key, metric] of Object.entries(r.metrics)) {
46
+ const label = METRIC_LABELS[key] ?? key;
47
+
48
+ if (metric.percentiles) {
49
+ const val = metric.percentiles.p75;
50
+ const unit = key === "cumulative_layout_shift" ? "" : "ms";
51
+ lines.push(`${label}: ${val}${unit}`);
52
+
53
+ if (metric.histogram) {
54
+ const buckets = metric.histogram.map((b) => `${Math.round(b.density * 100)}%`).join(" / ");
55
+ lines.push(` Distribution (good/needs improvement/poor): ${buckets}`);
56
+ }
57
+ } else if (metric.fractions) {
58
+ lines.push(`${label}:`);
59
+ for (const [fKey, fVal] of Object.entries(metric.fractions)) {
60
+ lines.push(` ${fKey}: ${(fVal * 100).toFixed(1)}%`);
61
+ }
62
+ }
63
+ }
64
+
65
+ return lines.join("\n");
66
+ }
67
+
68
+ function formatCruxHistory(target: string, result: CruxHistoryResponse): string {
69
+ const r = result.record;
70
+ const periods = r.collectionPeriods;
71
+ const lines: string[] = [
72
+ `=== CrUX History: ${target} ===`,
73
+ `Form factor: ${r.key.formFactor ?? "all"}`,
74
+ `Periods: ${periods.length} (${formatDate(periods[0].firstDate)} to ${formatDate(periods[periods.length - 1].lastDate)})`,
75
+ "",
76
+ ];
77
+
78
+ if (result.urlNormalizationDetails) {
79
+ const norm = result.urlNormalizationDetails;
80
+ if (norm.originalUrl !== norm.normalizedUrl) {
81
+ lines.push(`Normalized: ${norm.originalUrl} → ${norm.normalizedUrl}`, "");
82
+ }
83
+ }
84
+
85
+ lines.push("--- p75 Trend ---", "");
86
+
87
+ for (const [key, metric] of Object.entries(r.metrics)) {
88
+ const label = METRIC_LABELS[key] ?? key;
89
+
90
+ if (metric.percentilesTimeseries) {
91
+ const values = metric.percentilesTimeseries.p75s;
92
+ const first = values[0];
93
+ const last = values[values.length - 1];
94
+ const unit = key === "cumulative_layout_shift" ? "" : "ms";
95
+
96
+ if (first === null && last === null) {
97
+ lines.push(`${label}: insufficient data`);
98
+ continue;
99
+ }
100
+
101
+ lines.push(
102
+ `${label}: ${first ?? "N/A"}${first !== null ? unit : ""} → ${last ?? "N/A"}${last !== null ? unit : ""} (${values.length} points)`,
103
+ );
104
+
105
+ // Show trend direction
106
+ if (first !== null && last !== null) {
107
+ const f = Number(first);
108
+ const l = Number(last);
109
+ if (!Number.isNaN(f) && !Number.isNaN(l)) {
110
+ const change = ((l - f) / f) * 100;
111
+ const dir = change > 5 ? "worse" : change < -5 ? "improved" : "stable";
112
+ lines.push(` Trend: ${change > 0 ? "+" : ""}${change.toFixed(1)}% (${dir})`);
113
+ }
114
+ }
115
+ } else if (metric.fractionTimeseries) {
116
+ lines.push(`${label}: (fraction timeseries, ${periods.length} points)`);
117
+ for (const [fKey, fData] of Object.entries(metric.fractionTimeseries)) {
118
+ const fracs = fData.fractions;
119
+ const first = fracs[0];
120
+ const last = fracs[fracs.length - 1];
121
+ if (first !== null && last !== null && !Number.isNaN(first) && !Number.isNaN(last)) {
122
+ lines.push(` ${fKey}: ${(first * 100).toFixed(1)}% → ${(last * 100).toFixed(1)}%`);
123
+ }
124
+ }
125
+ }
126
+ }
127
+
128
+ // Show last 5 data points as table for core metrics
129
+ const coreMetrics = ["largest_contentful_paint", "interaction_to_next_paint", "cumulative_layout_shift"];
130
+ const available = coreMetrics.filter((m) => r.metrics[m]?.percentilesTimeseries);
131
+
132
+ if (available.length > 0 && periods.length >= 5) {
133
+ lines.push("", "--- Recent Data Points ---", "");
134
+ const lastN = 5;
135
+ const startIdx = periods.length - lastN;
136
+
137
+ lines.push(`${"Date".padEnd(12)} ${available.map((m) => (METRIC_LABELS[m] ?? m).padEnd(10)).join(" ")}`);
138
+ for (let i = startIdx; i < periods.length; i++) {
139
+ const date = formatDate(periods[i].lastDate);
140
+ const vals = available.map((m) => {
141
+ const v = r.metrics[m].percentilesTimeseries?.p75s[i];
142
+ return String(v ?? "N/A").padEnd(10);
143
+ });
144
+ lines.push(`${date.padEnd(12)} ${vals.join(" ")}`);
145
+ }
146
+ }
147
+
148
+ return lines.join("\n");
149
+ }
150
+
151
+ export function registerCruxTool(server: McpServer): void {
152
+ server.tool(
153
+ "crux",
154
+ "Query Chrome UX Report (CrUX) for real-world Core Web Vitals data. Returns p75 metrics from actual Chrome users over a 28-day rolling window.",
155
+ {
156
+ url: z.string().optional().describe("Specific page URL. Provide either url or origin."),
157
+ origin: z.string().optional().describe("Origin URL (e.g., 'https://example.com'). Provide either url or origin."),
158
+ form_factor: z
159
+ .enum(["DESKTOP", "PHONE", "TABLET"])
160
+ .optional()
161
+ .describe("Filter by device type. Omit for all devices."),
162
+ metrics: z
163
+ .array(
164
+ z.enum([
165
+ "cumulative_layout_shift",
166
+ "first_contentful_paint",
167
+ "interaction_to_next_paint",
168
+ "largest_contentful_paint",
169
+ "experimental_time_to_first_byte",
170
+ "round_trip_time",
171
+ "navigation_types",
172
+ "form_factors",
173
+ ]),
174
+ )
175
+ .optional()
176
+ .describe("Specific metrics to return. Default: all available."),
177
+ },
178
+ async ({ url, origin, form_factor, metrics }) => {
179
+ if (!url && !origin) {
180
+ return { content: [{ type: "text", text: "Error: provide either url or origin." }] };
181
+ }
182
+ try {
183
+ const result = await queryCrux({
184
+ url,
185
+ origin,
186
+ formFactor: form_factor as CruxFormFactor | undefined,
187
+ metrics,
188
+ });
189
+ return { content: [{ type: "text", text: formatCrux(url ?? origin ?? "", result) }] };
190
+ } catch (err) {
191
+ const msg = err instanceof Error ? err.message : String(err);
192
+ if (msg.includes("404")) {
193
+ return {
194
+ content: [
195
+ {
196
+ type: "text",
197
+ text: `No CrUX data available for ${url ?? origin}. The page may not have enough Chrome user traffic.`,
198
+ },
199
+ ],
200
+ };
201
+ }
202
+ if (msg.includes("SERVICE_DISABLED") || msg.includes("API_KEY_SERVICE_BLOCKED")) {
203
+ return {
204
+ content: [
205
+ {
206
+ type: "text",
207
+ text: "Chrome UX Report API is not enabled or the API key doesn't have access. Enable the API at: https://console.cloud.google.com/apis/library/chromeuxreport.googleapis.com — and ensure your API key allows it (Credentials > API key > API restrictions).",
208
+ },
209
+ ],
210
+ };
211
+ }
212
+ return { content: [{ type: "text", text: `Error querying CrUX: ${msg}` }] };
213
+ }
214
+ },
215
+ );
216
+
217
+ server.tool(
218
+ "crux_history",
219
+ "Query CrUX History API for Core Web Vitals trends over time. Returns up to 40 weekly data points (~10 months) of real-world performance data.",
220
+ {
221
+ url: z.string().optional().describe("Specific page URL. Provide either url or origin."),
222
+ origin: z.string().optional().describe("Origin URL (e.g., 'https://example.com'). Provide either url or origin."),
223
+ form_factor: z
224
+ .enum(["DESKTOP", "PHONE", "TABLET"])
225
+ .optional()
226
+ .describe("Filter by device type. Omit for all devices."),
227
+ metrics: z
228
+ .array(
229
+ z.enum([
230
+ "cumulative_layout_shift",
231
+ "first_contentful_paint",
232
+ "interaction_to_next_paint",
233
+ "largest_contentful_paint",
234
+ "experimental_time_to_first_byte",
235
+ "round_trip_time",
236
+ "navigation_types",
237
+ "form_factors",
238
+ ]),
239
+ )
240
+ .optional()
241
+ .describe("Specific metrics to return. Default: all available."),
242
+ periods: z
243
+ .number()
244
+ .min(1)
245
+ .max(40)
246
+ .optional()
247
+ .describe("Number of collection periods (1-40). Default: 25. Each is a 28-day window."),
248
+ },
249
+ async ({ url, origin, form_factor, metrics, periods }) => {
250
+ if (!url && !origin) {
251
+ return { content: [{ type: "text", text: "Error: provide either url or origin." }] };
252
+ }
253
+ try {
254
+ const result = await queryCruxHistory({
255
+ url,
256
+ origin,
257
+ formFactor: form_factor as CruxFormFactor | undefined,
258
+ metrics,
259
+ collectionPeriodCount: periods,
260
+ });
261
+ return { content: [{ type: "text", text: formatCruxHistory(url ?? origin ?? "", result) }] };
262
+ } catch (err) {
263
+ const msg = err instanceof Error ? err.message : String(err);
264
+ if (msg.includes("404")) {
265
+ return {
266
+ content: [
267
+ {
268
+ type: "text",
269
+ text: `No CrUX history data for ${url ?? origin}. The page may not have enough Chrome user traffic.`,
270
+ },
271
+ ],
272
+ };
273
+ }
274
+ if (msg.includes("SERVICE_DISABLED")) {
275
+ return {
276
+ content: [
277
+ {
278
+ type: "text",
279
+ text: "Chrome UX Report API is not enabled. Enable it at: https://console.cloud.google.com/apis/library/chromeuxreport.googleapis.com",
280
+ },
281
+ ],
282
+ };
283
+ }
284
+ return { content: [{ type: "text", text: `Error querying CrUX History: ${msg}` }] };
285
+ }
286
+ },
287
+ );
288
+ }
@@ -0,0 +1,105 @@
1
+ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
+ import { z } from "zod";
3
+ import { type InspectionResult, inspectUrl } from "../lib/gsc.js";
4
+
5
+ function formatInspection(url: string, siteUrl: string, r: InspectionResult): string {
6
+ const idx = r.indexStatusResult;
7
+ const lines: string[] = [
8
+ `=== URL Inspection: ${url} ===`,
9
+ `Property: ${siteUrl}`,
10
+ "",
11
+ "--- Index Status ---",
12
+ "",
13
+ `Verdict: ${idx.verdict}`,
14
+ `Coverage: ${idx.coverageState}`,
15
+ `Page fetch: ${idx.pageFetchState}`,
16
+ `Robots.txt: ${idx.robotsTxtState}`,
17
+ `Indexing: ${idx.indexingState}`,
18
+ `Crawled as: ${idx.crawledAs ?? "unknown"}`,
19
+ ];
20
+
21
+ if (idx.lastCrawlTime) lines.push(`Last crawled: ${idx.lastCrawlTime}`);
22
+ if (idx.userCanonical) lines.push(`Your canonical: ${idx.userCanonical}`);
23
+ if (idx.googleCanonical) lines.push(`Google's canonical: ${idx.googleCanonical}`);
24
+
25
+ if (idx.userCanonical && idx.googleCanonical && idx.userCanonical !== idx.googleCanonical) {
26
+ lines.push(`\n⚠ CANONICAL MISMATCH: You declared "${idx.userCanonical}" but Google chose "${idx.googleCanonical}"`);
27
+ }
28
+
29
+ if (idx.sitemap && idx.sitemap.length > 0) {
30
+ lines.push(`\nSitemaps: ${idx.sitemap.join(", ")}`);
31
+ }
32
+
33
+ if (idx.referringUrls && idx.referringUrls.length > 0) {
34
+ lines.push(`\nReferring URLs: ${idx.referringUrls.join(", ")}`);
35
+ }
36
+
37
+ // Rich Results
38
+ if (r.richResultsResult) {
39
+ lines.push("", "--- Rich Results ---", "");
40
+ lines.push(`Verdict: ${r.richResultsResult.verdict}`);
41
+
42
+ for (const item of r.richResultsResult.detectedItems ?? []) {
43
+ lines.push(`\nType: ${item.richResultType}`);
44
+ for (const instance of item.items ?? []) {
45
+ if (instance.name) lines.push(` Name: ${instance.name}`);
46
+ const issues = instance.issues ?? [];
47
+ if (issues.length === 0) {
48
+ lines.push(" Status: PASS");
49
+ } else {
50
+ for (const issue of issues) {
51
+ lines.push(` ${issue.severity}: ${issue.issueMessage}`);
52
+ }
53
+ }
54
+ }
55
+ }
56
+ }
57
+
58
+ // Mobile Usability (deprecated but still returned)
59
+ if (r.mobileUsabilityResult) {
60
+ lines.push("", "--- Mobile Usability (deprecated) ---", "");
61
+ lines.push(`Verdict: ${r.mobileUsabilityResult.verdict}`);
62
+ if (r.mobileUsabilityResult.issues) {
63
+ for (const issue of r.mobileUsabilityResult.issues) {
64
+ lines.push(` ${issue.issueType}${issue.message ? `: ${issue.message}` : ""}`);
65
+ }
66
+ }
67
+ }
68
+
69
+ // Inspection link
70
+ if (r.inspectionResultLink) {
71
+ lines.push("", `Full report: ${r.inspectionResultLink}`);
72
+ }
73
+
74
+ return lines.join("\n");
75
+ }
76
+
77
+ export function registerInspectTool(server: McpServer): void {
78
+ server.tool(
79
+ "inspect",
80
+ "Inspect a URL using Google Search Console. Returns index status, canonical, crawl status, rich results validation, and more — directly from Google's index.",
81
+ {
82
+ url: z.string().url().describe("The URL to inspect"),
83
+ site_url: z.string().describe("The GSC property (e.g., 'https://example.com/' or 'sc-domain:example.com')"),
84
+ },
85
+ async ({ url, site_url }) => {
86
+ try {
87
+ const result = await inspectUrl(url, site_url);
88
+ return { content: [{ type: "text", text: formatInspection(url, site_url, result) }] };
89
+ } catch (err) {
90
+ const msg = err instanceof Error ? err.message : String(err);
91
+ if (msg.includes("PERMISSION_DENIED")) {
92
+ return {
93
+ content: [
94
+ {
95
+ type: "text",
96
+ text: `Error: URL "${url}" is not part of property "${site_url}", or you don't have access.`,
97
+ },
98
+ ],
99
+ };
100
+ }
101
+ return { content: [{ type: "text", text: `Error inspecting URL: ${msg}` }] };
102
+ }
103
+ },
104
+ );
105
+ }