pagesight 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pagesight",
3
- "version": "0.2.1",
3
+ "version": "0.2.2",
4
4
  "description": "See your site the way search engines and AI see it.",
5
5
  "keywords": [
6
6
  "seo",
package/src/lib/auth.ts CHANGED
@@ -17,8 +17,8 @@ async function getServiceAccountToken(keyPath: string): Promise<string> {
17
17
  const keyFile = JSON.parse(await Bun.file(keyPath).text());
18
18
  const now = Math.floor(Date.now() / 1000);
19
19
 
20
- const header = btoa(JSON.stringify({ alg: "RS256", typ: "JWT" }));
21
- const payload = btoa(
20
+ const header = toBase64Url(JSON.stringify({ alg: "RS256", typ: "JWT" }));
21
+ const payload = toBase64Url(
22
22
  JSON.stringify({
23
23
  iss: keyFile.client_email,
24
24
  scope: SCOPES.join(" "),
@@ -65,11 +65,15 @@ function pemToBuffer(pem: string): ArrayBuffer {
65
65
  return buf.buffer;
66
66
  }
67
67
 
68
+ function toBase64Url(input: string): string {
69
+ return btoa(input).replace(/\+/g, "-").replace(/\//g, "_").replace(/=+$/, "");
70
+ }
71
+
68
72
  function bufferToBase64Url(buf: ArrayBuffer): string {
69
73
  const bytes = new Uint8Array(buf);
70
74
  let binary = "";
71
75
  for (const b of bytes) binary += String.fromCharCode(b);
72
- return btoa(binary).replace(/\+/g, "-").replace(/\//g, "_").replace(/=+$/, "");
76
+ return toBase64Url(binary);
73
77
  }
74
78
 
75
79
  // --- OAuth Refresh Token Auth ---
@@ -140,7 +144,7 @@ export function getAuthMethod(): string {
140
144
  export function getOAuthSetupUrl(clientId: string): string {
141
145
  const params = new URLSearchParams({
142
146
  client_id: clientId,
143
- redirect_uri: "urn:ietf:wg:oauth:2.0:oob",
147
+ redirect_uri: "http://localhost",
144
148
  response_type: "code",
145
149
  scope: SCOPES.join(" "),
146
150
  access_type: "offline",
@@ -162,7 +166,7 @@ export async function exchangeCodeForToken(
162
166
  client_id: clientId,
163
167
  client_secret: clientSecret,
164
168
  code,
165
- redirect_uri: "urn:ietf:wg:oauth:2.0:oob",
169
+ redirect_uri: "http://localhost",
166
170
  }).toString(),
167
171
  });
168
172
 
package/src/lib/robots.ts CHANGED
@@ -144,10 +144,8 @@ export function parseRobotsTxt(raw: string): RobotsTxt {
144
144
  } else if (directive === "sitemap") {
145
145
  if (value) sitemaps.push(value);
146
146
  else errors.push(`Line ${lineNum}: Empty sitemap URL`);
147
- } else if (directive === "crawl-delay" || directive === "host") {
148
- // Known non-standard directives — ignore silently
149
147
  } else {
150
- errors.push(`Line ${lineNum}: Unknown directive "${directive}"`);
148
+ // RFC 9309 allows parser-specific extensions — ignore unknown directives silently
151
149
  }
152
150
  }
153
151
 
@@ -190,36 +188,41 @@ export function isAllowed(
190
188
  } {
191
189
  const ua = userAgent.toLowerCase();
192
190
 
193
- let matchingGroup: RobotsGroup | null = null;
191
+ // RFC 9309: collect ALL rules from groups matching this user-agent
192
+ // Try specific user-agent match first, merging all matching groups
193
+ const matchedRules: Array<{ type: "allow" | "disallow"; path: string }> = [];
194
194
  let matchedGroupName: string | null = null;
195
+ let foundSpecific = false;
195
196
 
196
197
  for (const group of robots.groups) {
197
198
  for (const agent of group.userAgents) {
198
199
  if (agent.toLowerCase() === ua) {
199
- matchingGroup = group;
200
+ matchedRules.push(...group.rules);
200
201
  matchedGroupName = agent;
201
- break;
202
+ foundSpecific = true;
202
203
  }
203
204
  }
204
- if (matchingGroup) break;
205
205
  }
206
206
 
207
- if (!matchingGroup) {
207
+ // Fall back to wildcard if no specific match
208
+ if (!foundSpecific) {
208
209
  for (const group of robots.groups) {
209
210
  if (group.userAgents.some((a) => a === "*")) {
210
- matchingGroup = group;
211
+ matchedRules.push(...group.rules);
211
212
  matchedGroupName = "*";
212
- break;
213
213
  }
214
214
  }
215
215
  }
216
216
 
217
- if (!matchingGroup) return { allowed: true, matchedRule: null, matchedGroup: null };
217
+ if (matchedRules.length === 0 && !matchedGroupName) {
218
+ return { allowed: true, matchedRule: null, matchedGroup: null };
219
+ }
218
220
 
221
+ // Find the most specific (longest path) matching rule
219
222
  let bestRule: { type: "allow" | "disallow"; path: string } | null = null;
220
223
  let bestLength = -1;
221
224
 
222
- for (const rule of matchingGroup.rules) {
225
+ for (const rule of matchedRules) {
223
226
  if (pathMatches(rule.path, path)) {
224
227
  const ruleLength = rule.path.length;
225
228
  if (ruleLength > bestLength || (ruleLength === bestLength && rule.type === "allow")) {
@@ -268,6 +271,18 @@ export async function fetchRobotsTxt(origin: string): Promise<{ robotsTxt: Robot
268
271
  redirect: "follow",
269
272
  });
270
273
 
274
+ // RFC 9309: 4xx (except 429) = no restrictions (allow all)
275
+ // 5xx and 429 = assume complete disallow
276
+ if (res.status >= 500 || res.status === 429) {
277
+ const disallowAll: RobotsTxt = {
278
+ groups: [{ userAgents: ["*"], rules: [{ type: "disallow", path: "/" }] }],
279
+ sitemaps: [],
280
+ raw: "",
281
+ errors: [`Server returned ${res.status} — treating as full disallow per RFC 9309`],
282
+ };
283
+ return { robotsTxt: disallowAll, statusCode: res.status };
284
+ }
285
+
271
286
  if (res.status >= 400) {
272
287
  return { robotsTxt: { groups: [], sitemaps: [], raw: "", errors: [] }, statusCode: res.status };
273
288
  }
@@ -50,6 +50,11 @@ function formatRobotsAudit(origin: string, robots: RobotsTxt, statusCode: number
50
50
  const blocked = crawlers.filter((c) => !c.allowed);
51
51
  const allowed = crawlers.filter((c) => c.allowed);
52
52
 
53
+ if (crawlers.length === 0) {
54
+ lines.push("", "--- AI Crawlers ---", "", "Could not load AI crawler registry. Audit skipped.");
55
+ return lines.join("\n");
56
+ }
57
+
53
58
  lines.push(
54
59
  "",
55
60
  `--- AI Crawlers: ${blocked.length} blocked, ${allowed.length} allowed (of ${crawlers.length} known) ---`,
@@ -58,7 +63,7 @@ function formatRobotsAudit(origin: string, robots: RobotsTxt, statusCode: number
58
63
  );
59
64
 
60
65
  if (blocked.length === 0) {
61
- lines.push("", "All 139 known AI crawlers are allowed. No bots are explicitly blocked.");
66
+ lines.push("", `All ${crawlers.length} known AI crawlers are allowed. No bots are explicitly blocked.`);
62
67
  } else if (blocked.length === crawlers.length) {
63
68
  lines.push("", "All known AI crawlers are blocked.");
64
69
  // Show how they're blocked
@@ -87,25 +87,30 @@ export function registerSetupTool(server: McpServer): void {
87
87
  if (!client_id || !client_secret || !code) {
88
88
  return { content: [{ type: "text", text: "Error: client_id, client_secret, and code are all required." }] };
89
89
  }
90
- const tokens = await exchangeCodeForToken(client_id, client_secret, code);
91
- return {
92
- content: [
93
- {
94
- type: "text",
95
- text: [
96
- "=== OAuth Setup Complete ===",
97
- "",
98
- "Add these environment variables to your MCP server config:",
99
- "",
100
- `GSC_CLIENT_ID=${client_id}`,
101
- `GSC_CLIENT_SECRET=${client_secret}`,
102
- `GSC_REFRESH_TOKEN=${tokens.refreshToken}`,
103
- "",
104
- "Then restart Pagesight.",
105
- ].join("\n"),
106
- },
107
- ],
108
- };
90
+ try {
91
+ const tokens = await exchangeCodeForToken(client_id, client_secret, code);
92
+ return {
93
+ content: [
94
+ {
95
+ type: "text",
96
+ text: [
97
+ "=== OAuth Setup Complete ===",
98
+ "",
99
+ "Add these environment variables to your MCP server config:",
100
+ "",
101
+ `GSC_CLIENT_ID=${client_id}`,
102
+ "GSC_CLIENT_SECRET=(use the client_secret you already have)",
103
+ `GSC_REFRESH_TOKEN=${tokens.refreshToken}`,
104
+ "",
105
+ "Then restart Pagesight.",
106
+ ].join("\n"),
107
+ },
108
+ ],
109
+ };
110
+ } catch (err) {
111
+ const msg = err instanceof Error ? err.message : String(err);
112
+ return { content: [{ type: "text", text: `Error exchanging code: ${msg}` }] };
113
+ }
109
114
  }
110
115
 
111
116
  return { content: [{ type: "text", text: "Unknown action." }] };