pagesight 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/index.ts +3 -1
- package/src/lib/auth.ts +9 -5
- package/src/lib/robots.ts +27 -12
- package/src/tools/performance.ts +5 -4
- package/src/tools/robots.ts +6 -1
- package/src/tools/setup.ts +24 -19
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -9,9 +9,11 @@ import { registerRobotsTool } from "./tools/robots.js";
|
|
|
9
9
|
import { registerSetupTool } from "./tools/setup.js";
|
|
10
10
|
import { registerSitemapsTool } from "./tools/sitemaps.js";
|
|
11
11
|
|
|
12
|
+
const pkg = await Bun.file(new URL("../package.json", import.meta.url)).json();
|
|
13
|
+
|
|
12
14
|
const server = new McpServer({
|
|
13
15
|
name: "pagesight",
|
|
14
|
-
version:
|
|
16
|
+
version: pkg.version,
|
|
15
17
|
});
|
|
16
18
|
|
|
17
19
|
registerCruxTool(server);
|
package/src/lib/auth.ts
CHANGED
|
@@ -17,8 +17,8 @@ async function getServiceAccountToken(keyPath: string): Promise<string> {
|
|
|
17
17
|
const keyFile = JSON.parse(await Bun.file(keyPath).text());
|
|
18
18
|
const now = Math.floor(Date.now() / 1000);
|
|
19
19
|
|
|
20
|
-
const header =
|
|
21
|
-
const payload =
|
|
20
|
+
const header = toBase64Url(JSON.stringify({ alg: "RS256", typ: "JWT" }));
|
|
21
|
+
const payload = toBase64Url(
|
|
22
22
|
JSON.stringify({
|
|
23
23
|
iss: keyFile.client_email,
|
|
24
24
|
scope: SCOPES.join(" "),
|
|
@@ -65,11 +65,15 @@ function pemToBuffer(pem: string): ArrayBuffer {
|
|
|
65
65
|
return buf.buffer;
|
|
66
66
|
}
|
|
67
67
|
|
|
68
|
+
function toBase64Url(input: string): string {
|
|
69
|
+
return btoa(input).replace(/\+/g, "-").replace(/\//g, "_").replace(/=+$/, "");
|
|
70
|
+
}
|
|
71
|
+
|
|
68
72
|
function bufferToBase64Url(buf: ArrayBuffer): string {
|
|
69
73
|
const bytes = new Uint8Array(buf);
|
|
70
74
|
let binary = "";
|
|
71
75
|
for (const b of bytes) binary += String.fromCharCode(b);
|
|
72
|
-
return
|
|
76
|
+
return toBase64Url(binary);
|
|
73
77
|
}
|
|
74
78
|
|
|
75
79
|
// --- OAuth Refresh Token Auth ---
|
|
@@ -140,7 +144,7 @@ export function getAuthMethod(): string {
|
|
|
140
144
|
export function getOAuthSetupUrl(clientId: string): string {
|
|
141
145
|
const params = new URLSearchParams({
|
|
142
146
|
client_id: clientId,
|
|
143
|
-
redirect_uri: "
|
|
147
|
+
redirect_uri: "http://localhost",
|
|
144
148
|
response_type: "code",
|
|
145
149
|
scope: SCOPES.join(" "),
|
|
146
150
|
access_type: "offline",
|
|
@@ -162,7 +166,7 @@ export async function exchangeCodeForToken(
|
|
|
162
166
|
client_id: clientId,
|
|
163
167
|
client_secret: clientSecret,
|
|
164
168
|
code,
|
|
165
|
-
redirect_uri: "
|
|
169
|
+
redirect_uri: "http://localhost",
|
|
166
170
|
}).toString(),
|
|
167
171
|
});
|
|
168
172
|
|
package/src/lib/robots.ts
CHANGED
|
@@ -144,10 +144,8 @@ export function parseRobotsTxt(raw: string): RobotsTxt {
|
|
|
144
144
|
} else if (directive === "sitemap") {
|
|
145
145
|
if (value) sitemaps.push(value);
|
|
146
146
|
else errors.push(`Line ${lineNum}: Empty sitemap URL`);
|
|
147
|
-
} else if (directive === "crawl-delay" || directive === "host") {
|
|
148
|
-
// Known non-standard directives — ignore silently
|
|
149
147
|
} else {
|
|
150
|
-
|
|
148
|
+
// RFC 9309 allows parser-specific extensions — ignore unknown directives silently
|
|
151
149
|
}
|
|
152
150
|
}
|
|
153
151
|
|
|
@@ -190,36 +188,41 @@ export function isAllowed(
|
|
|
190
188
|
} {
|
|
191
189
|
const ua = userAgent.toLowerCase();
|
|
192
190
|
|
|
193
|
-
|
|
191
|
+
// RFC 9309: collect ALL rules from groups matching this user-agent
|
|
192
|
+
// Try specific user-agent match first, merging all matching groups
|
|
193
|
+
const matchedRules: Array<{ type: "allow" | "disallow"; path: string }> = [];
|
|
194
194
|
let matchedGroupName: string | null = null;
|
|
195
|
+
let foundSpecific = false;
|
|
195
196
|
|
|
196
197
|
for (const group of robots.groups) {
|
|
197
198
|
for (const agent of group.userAgents) {
|
|
198
199
|
if (agent.toLowerCase() === ua) {
|
|
199
|
-
|
|
200
|
+
matchedRules.push(...group.rules);
|
|
200
201
|
matchedGroupName = agent;
|
|
201
|
-
|
|
202
|
+
foundSpecific = true;
|
|
202
203
|
}
|
|
203
204
|
}
|
|
204
|
-
if (matchingGroup) break;
|
|
205
205
|
}
|
|
206
206
|
|
|
207
|
-
if
|
|
207
|
+
// Fall back to wildcard if no specific match
|
|
208
|
+
if (!foundSpecific) {
|
|
208
209
|
for (const group of robots.groups) {
|
|
209
210
|
if (group.userAgents.some((a) => a === "*")) {
|
|
210
|
-
|
|
211
|
+
matchedRules.push(...group.rules);
|
|
211
212
|
matchedGroupName = "*";
|
|
212
|
-
break;
|
|
213
213
|
}
|
|
214
214
|
}
|
|
215
215
|
}
|
|
216
216
|
|
|
217
|
-
if (
|
|
217
|
+
if (matchedRules.length === 0 && !matchedGroupName) {
|
|
218
|
+
return { allowed: true, matchedRule: null, matchedGroup: null };
|
|
219
|
+
}
|
|
218
220
|
|
|
221
|
+
// Find the most specific (longest path) matching rule
|
|
219
222
|
let bestRule: { type: "allow" | "disallow"; path: string } | null = null;
|
|
220
223
|
let bestLength = -1;
|
|
221
224
|
|
|
222
|
-
for (const rule of
|
|
225
|
+
for (const rule of matchedRules) {
|
|
223
226
|
if (pathMatches(rule.path, path)) {
|
|
224
227
|
const ruleLength = rule.path.length;
|
|
225
228
|
if (ruleLength > bestLength || (ruleLength === bestLength && rule.type === "allow")) {
|
|
@@ -268,6 +271,18 @@ export async function fetchRobotsTxt(origin: string): Promise<{ robotsTxt: Robot
|
|
|
268
271
|
redirect: "follow",
|
|
269
272
|
});
|
|
270
273
|
|
|
274
|
+
// RFC 9309: 4xx (except 429) = no restrictions (allow all)
|
|
275
|
+
// 5xx and 429 = assume complete disallow
|
|
276
|
+
if (res.status >= 500 || res.status === 429) {
|
|
277
|
+
const disallowAll: RobotsTxt = {
|
|
278
|
+
groups: [{ userAgents: ["*"], rules: [{ type: "disallow", path: "/" }] }],
|
|
279
|
+
sitemaps: [],
|
|
280
|
+
raw: "",
|
|
281
|
+
errors: [`Server returned ${res.status} — treating as full disallow per RFC 9309`],
|
|
282
|
+
};
|
|
283
|
+
return { robotsTxt: disallowAll, statusCode: res.status };
|
|
284
|
+
}
|
|
285
|
+
|
|
271
286
|
if (res.status >= 400) {
|
|
272
287
|
return { robotsTxt: { groups: [], sitemaps: [], raw: "", errors: [] }, statusCode: res.status };
|
|
273
288
|
}
|
package/src/tools/performance.ts
CHANGED
|
@@ -39,7 +39,7 @@ function formatPerformance(
|
|
|
39
39
|
totalImpressions > 0 ? rows.reduce((sum, r) => sum + r.position * r.impressions, 0) / totalImpressions : 0;
|
|
40
40
|
|
|
41
41
|
lines.push(
|
|
42
|
-
|
|
42
|
+
`--- Summary (${rows.length} rows returned) ---`,
|
|
43
43
|
"",
|
|
44
44
|
`Clicks: ${totalClicks.toLocaleString()}`,
|
|
45
45
|
`Impressions: ${totalImpressions.toLocaleString()}`,
|
|
@@ -67,9 +67,10 @@ function formatPerformance(
|
|
|
67
67
|
}
|
|
68
68
|
|
|
69
69
|
function daysAgo(n: number): string {
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
70
|
+
// GSC dates are in PT (Pacific Time). Use UTC-8 as a stable approximation.
|
|
71
|
+
const now = new Date(Date.now() - 8 * 60 * 60 * 1000);
|
|
72
|
+
now.setDate(now.getDate() - n);
|
|
73
|
+
return now.toISOString().split("T")[0];
|
|
73
74
|
}
|
|
74
75
|
|
|
75
76
|
export function registerPerformanceTool(server: McpServer): void {
|
package/src/tools/robots.ts
CHANGED
|
@@ -50,6 +50,11 @@ function formatRobotsAudit(origin: string, robots: RobotsTxt, statusCode: number
|
|
|
50
50
|
const blocked = crawlers.filter((c) => !c.allowed);
|
|
51
51
|
const allowed = crawlers.filter((c) => c.allowed);
|
|
52
52
|
|
|
53
|
+
if (crawlers.length === 0) {
|
|
54
|
+
lines.push("", "--- AI Crawlers ---", "", "Could not load AI crawler registry. Audit skipped.");
|
|
55
|
+
return lines.join("\n");
|
|
56
|
+
}
|
|
57
|
+
|
|
53
58
|
lines.push(
|
|
54
59
|
"",
|
|
55
60
|
`--- AI Crawlers: ${blocked.length} blocked, ${allowed.length} allowed (of ${crawlers.length} known) ---`,
|
|
@@ -58,7 +63,7 @@ function formatRobotsAudit(origin: string, robots: RobotsTxt, statusCode: number
|
|
|
58
63
|
);
|
|
59
64
|
|
|
60
65
|
if (blocked.length === 0) {
|
|
61
|
-
lines.push("",
|
|
66
|
+
lines.push("", `All ${crawlers.length} known AI crawlers are allowed. No bots are explicitly blocked.`);
|
|
62
67
|
} else if (blocked.length === crawlers.length) {
|
|
63
68
|
lines.push("", "All known AI crawlers are blocked.");
|
|
64
69
|
// Show how they're blocked
|
package/src/tools/setup.ts
CHANGED
|
@@ -87,25 +87,30 @@ export function registerSetupTool(server: McpServer): void {
|
|
|
87
87
|
if (!client_id || !client_secret || !code) {
|
|
88
88
|
return { content: [{ type: "text", text: "Error: client_id, client_secret, and code are all required." }] };
|
|
89
89
|
}
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
90
|
+
try {
|
|
91
|
+
const tokens = await exchangeCodeForToken(client_id, client_secret, code);
|
|
92
|
+
return {
|
|
93
|
+
content: [
|
|
94
|
+
{
|
|
95
|
+
type: "text",
|
|
96
|
+
text: [
|
|
97
|
+
"=== OAuth Setup Complete ===",
|
|
98
|
+
"",
|
|
99
|
+
"Add these environment variables to your MCP server config:",
|
|
100
|
+
"",
|
|
101
|
+
`GSC_CLIENT_ID=${client_id}`,
|
|
102
|
+
"GSC_CLIENT_SECRET=(use the client_secret you already have)",
|
|
103
|
+
`GSC_REFRESH_TOKEN=${tokens.refreshToken}`,
|
|
104
|
+
"",
|
|
105
|
+
"Then restart Pagesight.",
|
|
106
|
+
].join("\n"),
|
|
107
|
+
},
|
|
108
|
+
],
|
|
109
|
+
};
|
|
110
|
+
} catch (err) {
|
|
111
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
112
|
+
return { content: [{ type: "text", text: `Error exchanging code: ${msg}` }] };
|
|
113
|
+
}
|
|
109
114
|
}
|
|
110
115
|
|
|
111
116
|
return { content: [{ type: "text", text: "Unknown action." }] };
|