agentpay-seo-audit-mcp 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +7 -0
- package/server.py +782 -0
package/package.json
ADDED
package/server.py
ADDED
|
@@ -0,0 +1,782 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SEO Audit MCP Server
|
|
3
|
+
Provides AI agents with the ability to audit any URL's on-page SEO.
|
|
4
|
+
|
|
5
|
+
Usage:
|
|
6
|
+
python3 server.py # Free tier (50 calls/instance)
|
|
7
|
+
python3 server.py --pro-key PROL_XXX # Pro tier (unlimited)
|
|
8
|
+
|
|
9
|
+
Tools:
|
|
10
|
+
- seo_analyze_url: Full on-page SEO audit
|
|
11
|
+
- seo_check_headers: HTTP headers audit
|
|
12
|
+
- seo_suggest_keywords: Extract keywords from page content
|
|
13
|
+
- seo_analyze_speed_factors: Page weight and resource analysis
|
|
14
|
+
|
|
15
|
+
Pricing: $19/mo — https://buy.stripe.com/dRm6oJ4Hd2Jugek0wz1oI0m
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import json
|
|
19
|
+
import re
|
|
20
|
+
import ssl as ssl_module
|
|
21
|
+
import sys
|
|
22
|
+
from datetime import datetime
|
|
23
|
+
from typing import Any, Optional
|
|
24
|
+
from urllib.parse import urlparse
|
|
25
|
+
|
|
26
|
+
import httpx
|
|
27
|
+
from mcp.server.models import InitializationOptions
|
|
28
|
+
import mcp.types as types
|
|
29
|
+
from mcp.server import NotificationOptions, Server
|
|
30
|
+
from pydantic import AnyUrl
|
|
31
|
+
|
|
32
|
+
server = Server("seo-audit-mcp")
|
|
33
|
+
|
|
34
|
+
# ─── Rate Limiting & Pro Key ───────────────────────────────────────────
|
|
35
|
+
FREE_LIMIT = 50
|
|
36
|
+
PRO_KEYS = {"PROL_AGENTPAY_DEMO": "demo"} # Demo key for testing
|
|
37
|
+
|
|
38
|
+
# Parse --pro-key from command line
|
|
39
|
+
PRO_KEY = None
|
|
40
|
+
for i, arg in enumerate(sys.argv):
|
|
41
|
+
if arg == "--pro-key" and i + 1 < len(sys.argv):
|
|
42
|
+
PRO_KEY = sys.argv[i + 1]
|
|
43
|
+
break
|
|
44
|
+
|
|
45
|
+
IS_PRO = PRO_KEY in PRO_KEYS
|
|
46
|
+
call_counter = 0
|
|
47
|
+
|
|
48
|
+
STRIPE_LINK = "https://buy.stripe.com/dRm6oJ4Hd2Jugek0wz1oI0m" # $19/mo
|
|
49
|
+
|
|
50
|
+
def check_rate_limit():
|
|
51
|
+
"""Check if free tier has exceeded limit. Returns error dict or None."""
|
|
52
|
+
global call_counter
|
|
53
|
+
if IS_PRO:
|
|
54
|
+
return None
|
|
55
|
+
call_counter += 1
|
|
56
|
+
if call_counter > FREE_LIMIT:
|
|
57
|
+
remaining = call_counter - FREE_LIMIT
|
|
58
|
+
return {
|
|
59
|
+
"error": f"Free tier limit reached ({FREE_LIMIT} calls). Upgrade to Pro for unlimited access.",
|
|
60
|
+
"isError": True,
|
|
61
|
+
"next_steps": [
|
|
62
|
+
f"Purchase Pro at {STRIPE_LINK} ($19/mo, unlimited)",
|
|
63
|
+
"Restart the server to reset the free counter",
|
|
64
|
+
"Use --pro-key PROL_XXX to run in Pro mode"
|
|
65
|
+
],
|
|
66
|
+
"calls_used": call_counter,
|
|
67
|
+
"limit": FREE_LIMIT,
|
|
68
|
+
"over_by": remaining
|
|
69
|
+
}
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# ─── Helpers ───────────────────────────────────────────────────────────────
|
|
74
|
+
|
|
75
|
+
def fetch_url(url: str, follow_redirects: bool = True) -> httpx.Response:
|
|
76
|
+
"""Fetch a URL with sensible defaults. Raises on network errors."""
|
|
77
|
+
with httpx.Client(
|
|
78
|
+
follow_redirects=follow_redirects,
|
|
79
|
+
timeout=30.0,
|
|
80
|
+
headers={
|
|
81
|
+
"User-Agent": (
|
|
82
|
+
"Mozilla/5.0 (compatible; SEOAuditBot/1.0; "
|
|
83
|
+
"+https://seo-audit-mcp.example.com)"
|
|
84
|
+
),
|
|
85
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
86
|
+
"Accept-Language": "en-US,en;q=0.5",
|
|
87
|
+
},
|
|
88
|
+
) as client:
|
|
89
|
+
return client.get(url)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def extract_tag(html: str, tag: str) -> list[str]:
|
|
93
|
+
"""Extract full <tag ...>content</tag> from HTML using regex."""
|
|
94
|
+
pattern = re.compile(
|
|
95
|
+
rf'<{tag}[^>]*>(.*?)</{tag}>', re.IGNORECASE | re.DOTALL
|
|
96
|
+
)
|
|
97
|
+
return pattern.findall(html)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def extract_attr(html: str, tag: str, attr: str) -> list[str]:
|
|
101
|
+
"""Extract attribute values from specific tags."""
|
|
102
|
+
pattern = re.compile(
|
|
103
|
+
rf'<{tag}[^>]*\s{attr}\s*=\s*["\']([^"\']*)["\']',
|
|
104
|
+
re.IGNORECASE | re.DOTALL,
|
|
105
|
+
)
|
|
106
|
+
return pattern.findall(html)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def extract_meta_content(html: str, name_or_prop: str) -> Optional[str]:
|
|
110
|
+
"""Extract content from a <meta> tag by name or property."""
|
|
111
|
+
# Try property first (Open Graph)
|
|
112
|
+
pattern = re.compile(
|
|
113
|
+
rf'<meta[^>]*(?:property|name)\s*=\s*["\']{re.escape(name_or_prop)}["\'][^>]*'
|
|
114
|
+
rf'\s+content\s*=\s*["\']([^"\']*)["\']',
|
|
115
|
+
re.IGNORECASE,
|
|
116
|
+
)
|
|
117
|
+
m = pattern.search(html)
|
|
118
|
+
if m:
|
|
119
|
+
return m.group(1)
|
|
120
|
+
# Try reversed attribute order
|
|
121
|
+
pattern2 = re.compile(
|
|
122
|
+
rf'<meta[^>]*\s+content\s*=\s*["\']([^"\']*)["\'][^>]*'
|
|
123
|
+
rf'(?:property|name)\s*=\s*["\']{re.escape(name_or_prop)}["\']',
|
|
124
|
+
re.IGNORECASE,
|
|
125
|
+
)
|
|
126
|
+
m2 = pattern2.search(html)
|
|
127
|
+
if m2:
|
|
128
|
+
return m2.group(1)
|
|
129
|
+
return None
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def extract_all_meta(html: str) -> dict[str, str]:
|
|
133
|
+
"""Extract all meta tags into a dict keyed by name/property."""
|
|
134
|
+
metas: dict[str, str] = {}
|
|
135
|
+
pattern = re.compile(
|
|
136
|
+
r'<meta[^>]*(?:name|property)\s*=\s*["\']([^"\']*)["\']'
|
|
137
|
+
r'[^>]*\s+content\s*=\s*["\']([^"\']*)["\']',
|
|
138
|
+
re.IGNORECASE,
|
|
139
|
+
)
|
|
140
|
+
for m in pattern.finditer(html):
|
|
141
|
+
metas[m.group(1).strip()] = m.group(2).strip()
|
|
142
|
+
# reversed order
|
|
143
|
+
pattern2 = re.compile(
|
|
144
|
+
r'<meta[^>]*\s+content\s*=\s*["\']([^"\']*)["\']'
|
|
145
|
+
r'[^>]*(?:name|property)\s*=\s*["\']([^"\']*)["\']',
|
|
146
|
+
re.IGNORECASE,
|
|
147
|
+
)
|
|
148
|
+
for m in pattern2.finditer(html):
|
|
149
|
+
metas[m.group(2).strip()] = m.group(1).strip()
|
|
150
|
+
return metas
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def strip_html(html: str) -> str:
|
|
154
|
+
"""Remove all HTML tags, returning only visible text."""
|
|
155
|
+
text = re.sub(r'<[^>]+>', ' ', html)
|
|
156
|
+
text = re.sub(r'\s+', ' ', text).strip()
|
|
157
|
+
return text
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def check_ssl(url: str) -> dict[str, Any]:
|
|
161
|
+
"""Check SSL certificate validity for a URL."""
|
|
162
|
+
parsed = urlparse(url)
|
|
163
|
+
hostname = parsed.hostname or ""
|
|
164
|
+
port = parsed.port or 443
|
|
165
|
+
result: dict[str, Any] = {"valid": False, "issuer": None, "error": None}
|
|
166
|
+
try:
|
|
167
|
+
ctx = ssl_module.create_default_context()
|
|
168
|
+
with ctx.wrap_socket(
|
|
169
|
+
ssl_module.SSLSocket(socket=None, family=0, type=0, proto=0), # type: ignore
|
|
170
|
+
server_hostname=hostname,
|
|
171
|
+
) as sock:
|
|
172
|
+
pass
|
|
173
|
+
except Exception as e:
|
|
174
|
+
# Try a simpler check via httpx
|
|
175
|
+
try:
|
|
176
|
+
with httpx.Client(verify=True, timeout=10.0) as client:
|
|
177
|
+
resp = client.get(f"https://{hostname}:{port}", headers={"User-Agent": "SEOAuditBot/1.0"})
|
|
178
|
+
result["valid"] = True
|
|
179
|
+
result["issuer"] = "verified (via httpx)"
|
|
180
|
+
except Exception as e2:
|
|
181
|
+
result["valid"] = False
|
|
182
|
+
result["error"] = str(e2)
|
|
183
|
+
return result
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def status_for_length(length: int, min_val: int, max_val: int) -> str:
|
|
187
|
+
"""Return 'good', 'fair', or 'poor' based on length bounds."""
|
|
188
|
+
if min_val <= length <= max_val:
|
|
189
|
+
return "good"
|
|
190
|
+
if length == 0:
|
|
191
|
+
return "poor"
|
|
192
|
+
return "fair"
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def compute_score(issues: list[str]) -> int:
|
|
196
|
+
"""Compute an SEO score out of 100 based on issues list."""
|
|
197
|
+
penalties = {
|
|
198
|
+
"Missing title tag": 20,
|
|
199
|
+
"Title tag too short": 10,
|
|
200
|
+
"Title tag too long": 10,
|
|
201
|
+
"Missing meta description": 20,
|
|
202
|
+
"Meta description too short": 5,
|
|
203
|
+
"Meta description too long": 5,
|
|
204
|
+
"No H1 heading": 15,
|
|
205
|
+
"Multiple H1 headings": 10,
|
|
206
|
+
"H1 too long": 5,
|
|
207
|
+
"Missing alt text on images": 5,
|
|
208
|
+
"No Open Graph title": 5,
|
|
209
|
+
"No Open Graph description": 3,
|
|
210
|
+
"No Open Graph image": 3,
|
|
211
|
+
"No viewport meta tag": 10,
|
|
212
|
+
"No canonical URL": 5,
|
|
213
|
+
"No structured data": 5,
|
|
214
|
+
"SSL certificate issue": 15,
|
|
215
|
+
}
|
|
216
|
+
score = 100
|
|
217
|
+
for issue in issues:
|
|
218
|
+
penalty = penalties.get(issue, 2)
|
|
219
|
+
score -= penalty
|
|
220
|
+
return max(0, min(100, score))
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
# ─── Parse helpers ─────────────────────────────────────────────────────────
|
|
224
|
+
|
|
225
|
+
def parse_headings(html: str) -> dict[str, Any]:
|
|
226
|
+
"""Analyze heading structure (H1-H6)."""
|
|
227
|
+
result: dict[str, Any] = {
|
|
228
|
+
"h1_count": 0, "h2_count": 0, "h3_count": 0,
|
|
229
|
+
"h4_count": 0, "h5_count": 0, "h6_count": 0,
|
|
230
|
+
"h1_contents": [], "issues": [],
|
|
231
|
+
}
|
|
232
|
+
for level in range(1, 7):
|
|
233
|
+
tag = f"h{level}"
|
|
234
|
+
contents = extract_tag(html, tag)
|
|
235
|
+
result[f"h{level}_count"] = len(contents)
|
|
236
|
+
if level == 1:
|
|
237
|
+
result["h1_contents"] = [strip_html(c)[:100] for c in contents]
|
|
238
|
+
|
|
239
|
+
# H1 issues
|
|
240
|
+
if result["h1_count"] == 0:
|
|
241
|
+
result["issues"].append("No H1 heading")
|
|
242
|
+
elif result["h1_count"] > 1:
|
|
243
|
+
result["issues"].append("Multiple H1 headings")
|
|
244
|
+
if result["h1_contents"]:
|
|
245
|
+
for h1 in result["h1_contents"]:
|
|
246
|
+
if len(h1) > 70:
|
|
247
|
+
result["issues"].append("H1 too long")
|
|
248
|
+
break
|
|
249
|
+
return result
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def parse_images(html: str) -> dict[str, Any]:
|
|
253
|
+
"""Analyze image tags for alt text coverage."""
|
|
254
|
+
img_tags = re.findall(r'<img[^>]*>', html, re.IGNORECASE)
|
|
255
|
+
total = len(img_tags)
|
|
256
|
+
missing_alt = 0
|
|
257
|
+
for img in img_tags:
|
|
258
|
+
if not re.search(r'\salt\s*=\s*["\']', img, re.IGNORECASE):
|
|
259
|
+
missing_alt += 1
|
|
260
|
+
return {"total": total, "missing_alt": missing_alt}
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def parse_open_graph(html: str) -> dict[str, bool]:
|
|
264
|
+
"""Check for Open Graph meta tags."""
|
|
265
|
+
og_title = extract_meta_content(html, "og:title") is not None
|
|
266
|
+
og_description = extract_meta_content(html, "og:description") is not None
|
|
267
|
+
og_image = extract_meta_content(html, "og:image") is not None
|
|
268
|
+
og_url = extract_meta_content(html, "og:url") is not None
|
|
269
|
+
og_type = extract_meta_content(html, "og:type") is not None
|
|
270
|
+
return {
|
|
271
|
+
"og_title": og_title,
|
|
272
|
+
"og_description": og_description,
|
|
273
|
+
"og_image": og_image,
|
|
274
|
+
"og_url": og_url,
|
|
275
|
+
"og_type": og_type,
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def parse_title(html: str) -> dict[str, Any]:
|
|
280
|
+
"""Extract and analyze the <title> tag."""
|
|
281
|
+
titles = extract_tag(html, "title")
|
|
282
|
+
if not titles:
|
|
283
|
+
return {"content": "", "length": 0, "status": "poor"}
|
|
284
|
+
content = strip_html(titles[0])
|
|
285
|
+
length = len(content)
|
|
286
|
+
if length == 0:
|
|
287
|
+
return {"content": "", "length": 0, "status": "poor"}
|
|
288
|
+
status = status_for_length(length, 30, 60)
|
|
289
|
+
return {"content": content, "length": length, "status": status}
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def parse_meta_description(html: str) -> dict[str, Any]:
|
|
293
|
+
"""Extract and analyze the meta description."""
|
|
294
|
+
content = extract_meta_content(html, "description")
|
|
295
|
+
if not content:
|
|
296
|
+
return {"content": "", "length": 0, "status": "poor"}
|
|
297
|
+
length = len(content)
|
|
298
|
+
status = status_for_length(length, 120, 158)
|
|
299
|
+
return {"content": content, "length": length, "status": status}
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def parse_structured_data(html: str) -> dict[str, Any]:
|
|
303
|
+
"""Detect structured data (JSON-LD, Microdata, RDFa)."""
|
|
304
|
+
# JSON-LD
|
|
305
|
+
jsonld = bool(re.search(
|
|
306
|
+
r'<script[^>]*type\s*=\s*["\']application/ld\+json["\']',
|
|
307
|
+
html, re.IGNORECASE,
|
|
308
|
+
))
|
|
309
|
+
# Microdata (itemscope/itemprop)
|
|
310
|
+
microdata = bool(re.search(r'\bitemscope\b', html, re.IGNORECASE))
|
|
311
|
+
# RDFa
|
|
312
|
+
rdfa = bool(re.search(r'\btypeof\s*=\s*["\']', html, re.IGNORECASE))
|
|
313
|
+
return {
|
|
314
|
+
"json_ld": jsonld,
|
|
315
|
+
"microdata": microdata,
|
|
316
|
+
"rdfa": rdfa,
|
|
317
|
+
"present": jsonld or microdata or rdfa,
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
def parse_hreflang(html: str) -> list[dict[str, str]]:
|
|
322
|
+
"""Extract hreflang tags."""
|
|
323
|
+
tags = []
|
|
324
|
+
pattern = re.compile(
|
|
325
|
+
r'<link[^>]*\brel\s*=\s*["\']alternate["\'][^>]*\bhreflang\s*=\s*["\']([^"\']*)["\']',
|
|
326
|
+
re.IGNORECASE,
|
|
327
|
+
)
|
|
328
|
+
for m in pattern.finditer(html):
|
|
329
|
+
link_tag = m.group(0)
|
|
330
|
+
href_m = re.search(r'\bhref\s*=\s*["\']([^"\']*)["\']', link_tag, re.IGNORECASE)
|
|
331
|
+
href = href_m.group(1) if href_m else ""
|
|
332
|
+
tags.append({"hreflang": m.group(1), "href": href})
|
|
333
|
+
return tags
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
# ─── MCP Tools ─────────────────────────────────────────────────────────────
|
|
337
|
+
|
|
338
|
+
@server.list_tools()
|
|
339
|
+
async def handle_list_tools() -> list[types.Tool]:
|
|
340
|
+
return [
|
|
341
|
+
types.Tool(
|
|
342
|
+
name="seo_analyze_url",
|
|
343
|
+
description=(
|
|
344
|
+
"Full on-page SEO audit of any URL. Analyzes title tag, meta "
|
|
345
|
+
"description, headings (H1-H6), image alt texts, word count, "
|
|
346
|
+
"SSL status, mobile-friendly viewport tag, Open Graph tags, "
|
|
347
|
+
"canonical URL, hreflang tags, robots meta, and structured "
|
|
348
|
+
"data presence. Returns a comprehensive JSON report with an "
|
|
349
|
+
"SEO score out of 100."
|
|
350
|
+
),
|
|
351
|
+
inputSchema={
|
|
352
|
+
"type": "object",
|
|
353
|
+
"required": ["url"],
|
|
354
|
+
"properties": {
|
|
355
|
+
"url": {
|
|
356
|
+
"type": "string",
|
|
357
|
+
"description": "The URL to audit (e.g., https://example.com)",
|
|
358
|
+
}
|
|
359
|
+
},
|
|
360
|
+
},
|
|
361
|
+
),
|
|
362
|
+
types.Tool(
|
|
363
|
+
name="seo_check_headers",
|
|
364
|
+
description=(
|
|
365
|
+
"HTTP headers audit of a URL. Checks status code, content-type, "
|
|
366
|
+
"x-robots-tag, link rel=canonical, cache-control, server, "
|
|
367
|
+
"last-modified headers."
|
|
368
|
+
),
|
|
369
|
+
inputSchema={
|
|
370
|
+
"type": "object",
|
|
371
|
+
"required": ["url"],
|
|
372
|
+
"properties": {
|
|
373
|
+
"url": {
|
|
374
|
+
"type": "string",
|
|
375
|
+
"description": "The URL to check headers for",
|
|
376
|
+
}
|
|
377
|
+
},
|
|
378
|
+
},
|
|
379
|
+
),
|
|
380
|
+
types.Tool(
|
|
381
|
+
name="seo_suggest_keywords",
|
|
382
|
+
description=(
|
|
383
|
+
"Extract keyword suggestions from page content. Analyzes most "
|
|
384
|
+
"frequent words, words in headings, and words in the title. "
|
|
385
|
+
"Suggests primary and secondary keywords."
|
|
386
|
+
),
|
|
387
|
+
inputSchema={
|
|
388
|
+
"type": "object",
|
|
389
|
+
"required": ["url"],
|
|
390
|
+
"properties": {
|
|
391
|
+
"url": {
|
|
392
|
+
"type": "string",
|
|
393
|
+
"description": "The URL to extract keywords from",
|
|
394
|
+
},
|
|
395
|
+
"count": {
|
|
396
|
+
"type": "integer",
|
|
397
|
+
"description": "Number of keyword suggestions (default: 10)",
|
|
398
|
+
"default": 10,
|
|
399
|
+
},
|
|
400
|
+
},
|
|
401
|
+
},
|
|
402
|
+
),
|
|
403
|
+
types.Tool(
|
|
404
|
+
name="seo_analyze_speed_factors",
|
|
405
|
+
description=(
|
|
406
|
+
"Analyze page weight and resource loading. Measures total HTML "
|
|
407
|
+
"size, number of resources (scripts, stylesheets, images), "
|
|
408
|
+
"compression (gzip/brotli), and keep-alive connection status."
|
|
409
|
+
),
|
|
410
|
+
inputSchema={
|
|
411
|
+
"type": "object",
|
|
412
|
+
"required": ["url"],
|
|
413
|
+
"properties": {
|
|
414
|
+
"url": {
|
|
415
|
+
"type": "string",
|
|
416
|
+
"description": "The URL to analyze speed factors for",
|
|
417
|
+
}
|
|
418
|
+
},
|
|
419
|
+
},
|
|
420
|
+
),
|
|
421
|
+
]
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
@server.call_tool()
|
|
425
|
+
async def handle_call_tool(
|
|
426
|
+
name: str, arguments: dict | None
|
|
427
|
+
) -> list[types.TextContent]:
|
|
428
|
+
if arguments is None:
|
|
429
|
+
arguments = {}
|
|
430
|
+
|
|
431
|
+
# Rate limit check
|
|
432
|
+
limit_check = check_rate_limit()
|
|
433
|
+
if limit_check:
|
|
434
|
+
return [types.TextContent(type="text", text=json.dumps(limit_check, indent=2))]
|
|
435
|
+
|
|
436
|
+
url = arguments.get("url", "")
|
|
437
|
+
|
|
438
|
+
if name == "seo_analyze_url":
|
|
439
|
+
return [types.TextContent(type="text", text=json.dumps(
|
|
440
|
+
await seo_analyze_url(url), indent=2
|
|
441
|
+
))]
|
|
442
|
+
elif name == "seo_check_headers":
|
|
443
|
+
return [types.TextContent(type="text", text=json.dumps(
|
|
444
|
+
await seo_check_headers(url), indent=2
|
|
445
|
+
))]
|
|
446
|
+
elif name == "seo_suggest_keywords":
|
|
447
|
+
count = arguments.get("count", 10)
|
|
448
|
+
return [types.TextContent(type="text", text=json.dumps(
|
|
449
|
+
await seo_suggest_keywords(url, count), indent=2
|
|
450
|
+
))]
|
|
451
|
+
elif name == "seo_analyze_speed_factors":
|
|
452
|
+
return [types.TextContent(type="text", text=json.dumps(
|
|
453
|
+
await seo_analyze_speed_factors(url), indent=2
|
|
454
|
+
))]
|
|
455
|
+
else:
|
|
456
|
+
raise ValueError(f"Unknown tool: {name}")
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
async def seo_analyze_url(url: str) -> dict[str, Any]:
|
|
460
|
+
"""Full on-page SEO audit."""
|
|
461
|
+
try:
|
|
462
|
+
response = fetch_url(url)
|
|
463
|
+
html = response.text
|
|
464
|
+
except Exception as e:
|
|
465
|
+
return {"url": url, "error": f"Failed to fetch URL: {str(e)}", "score": 0}
|
|
466
|
+
|
|
467
|
+
issues: list[str] = []
|
|
468
|
+
|
|
469
|
+
# Title
|
|
470
|
+
title_info = parse_title(html)
|
|
471
|
+
if title_info["status"] == "poor" and title_info["length"] == 0:
|
|
472
|
+
issues.append("Missing title tag")
|
|
473
|
+
elif title_info["status"] == "poor":
|
|
474
|
+
issues.append("Title tag too short")
|
|
475
|
+
elif title_info["status"] == "fair":
|
|
476
|
+
issues.append("Title tag too long" if title_info["length"] > 60 else "Title tag too short")
|
|
477
|
+
|
|
478
|
+
# Meta description
|
|
479
|
+
meta_desc = parse_meta_description(html)
|
|
480
|
+
if meta_desc["status"] == "poor" and meta_desc["length"] == 0:
|
|
481
|
+
issues.append("Missing meta description")
|
|
482
|
+
elif meta_desc["status"] == "poor":
|
|
483
|
+
issues.append("Meta description too short")
|
|
484
|
+
elif meta_desc["status"] == "fair":
|
|
485
|
+
issues.append("Meta description too long" if meta_desc["length"] > 158 else "Meta description too short")
|
|
486
|
+
|
|
487
|
+
# Headings
|
|
488
|
+
headings_info = parse_headings(html)
|
|
489
|
+
issues.extend(headings_info.get("issues", []))
|
|
490
|
+
|
|
491
|
+
# Images
|
|
492
|
+
images_info = parse_images(html)
|
|
493
|
+
if images_info["missing_alt"] > 0:
|
|
494
|
+
issues.append(f"Missing alt text on images ({images_info['missing_alt']} of {images_info['total']})")
|
|
495
|
+
|
|
496
|
+
# SSL
|
|
497
|
+
ssl_info = check_ssl(url)
|
|
498
|
+
if not ssl_info["valid"]:
|
|
499
|
+
issues.append("SSL certificate issue")
|
|
500
|
+
|
|
501
|
+
# Viewport (mobile-friendly)
|
|
502
|
+
viewport = extract_meta_content(html, "viewport")
|
|
503
|
+
if not viewport:
|
|
504
|
+
issues.append("No viewport meta tag")
|
|
505
|
+
|
|
506
|
+
# Open Graph
|
|
507
|
+
og_info = parse_open_graph(html)
|
|
508
|
+
if not og_info["og_title"]:
|
|
509
|
+
issues.append("No Open Graph title")
|
|
510
|
+
if not og_info["og_description"]:
|
|
511
|
+
issues.append("No Open Graph description")
|
|
512
|
+
if not og_info["og_image"]:
|
|
513
|
+
issues.append("No Open Graph image")
|
|
514
|
+
|
|
515
|
+
# Canonical URL
|
|
516
|
+
canonical = extract_attr(html, "link", "href") if 'rel="canonical"' in html.lower() or "rel='canonical'" in html.lower() else None
|
|
517
|
+
# Actually, let's properly extract canonical
|
|
518
|
+
canonical_href = None
|
|
519
|
+
link_pattern = re.compile(
|
|
520
|
+
r'<link[^>]*\brel\s*=\s*["\']canonical["\'][^>]*>',
|
|
521
|
+
re.IGNORECASE,
|
|
522
|
+
)
|
|
523
|
+
for link_tag in link_pattern.finditer(html):
|
|
524
|
+
href_m = re.search(r'\bhref\s*=\s*["\']([^"\']*)["\']', link_tag.group(0), re.IGNORECASE)
|
|
525
|
+
if href_m:
|
|
526
|
+
canonical_href = href_m.group(1)
|
|
527
|
+
break
|
|
528
|
+
if not canonical_href:
|
|
529
|
+
issues.append("No canonical URL")
|
|
530
|
+
|
|
531
|
+
# Hreflang
|
|
532
|
+
hreflang_tags = parse_hreflang(html)
|
|
533
|
+
|
|
534
|
+
# Robots meta
|
|
535
|
+
robots = extract_meta_content(html, "robots")
|
|
536
|
+
|
|
537
|
+
# Structured data
|
|
538
|
+
sd_info = parse_structured_data(html)
|
|
539
|
+
if not sd_info["present"]:
|
|
540
|
+
issues.append("No structured data")
|
|
541
|
+
|
|
542
|
+
# Word count
|
|
543
|
+
body_text = ""
|
|
544
|
+
body_m = re.search(r'<body[^>]*>(.*)</body>', html, re.IGNORECASE | re.DOTALL)
|
|
545
|
+
if body_m:
|
|
546
|
+
body_text = strip_html(body_m.group(1))
|
|
547
|
+
else:
|
|
548
|
+
body_text = strip_html(html)
|
|
549
|
+
word_count = len(body_text.split())
|
|
550
|
+
|
|
551
|
+
# Score
|
|
552
|
+
score = compute_score(issues)
|
|
553
|
+
|
|
554
|
+
return {
|
|
555
|
+
"url": url,
|
|
556
|
+
"title": title_info,
|
|
557
|
+
"meta_description": meta_desc,
|
|
558
|
+
"headings": headings_info,
|
|
559
|
+
"images": images_info,
|
|
560
|
+
"ssl": ssl_info,
|
|
561
|
+
"viewport": viewport if viewport else None,
|
|
562
|
+
"open_graph": og_info,
|
|
563
|
+
"canonical_url": canonical_href,
|
|
564
|
+
"hreflang_tags": hreflang_tags,
|
|
565
|
+
"robots_meta": robots,
|
|
566
|
+
"structured_data": sd_info,
|
|
567
|
+
"word_count": word_count,
|
|
568
|
+
"issues": issues,
|
|
569
|
+
"score": score,
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
async def seo_check_headers(url: str) -> dict[str, Any]:
|
|
574
|
+
"""HTTP headers audit."""
|
|
575
|
+
try:
|
|
576
|
+
# Don't follow redirects to capture the first response
|
|
577
|
+
response = fetch_url(url, follow_redirects=False)
|
|
578
|
+
except Exception as e:
|
|
579
|
+
return {"url": url, "error": f"Failed to fetch URL: {str(e)}"}
|
|
580
|
+
|
|
581
|
+
headers = response.headers
|
|
582
|
+
# Extract link header for canonical
|
|
583
|
+
link_header = headers.get("link", "")
|
|
584
|
+
canonical_from_link = None
|
|
585
|
+
if link_header:
|
|
586
|
+
cm = re.search(r'<([^>]+)>\s*;\s*rel\s*=\s*["\']?canonical["\']?', link_header, re.IGNORECASE)
|
|
587
|
+
if cm:
|
|
588
|
+
canonical_from_link = cm.group(1)
|
|
589
|
+
|
|
590
|
+
return {
|
|
591
|
+
"url": str(response.url),
|
|
592
|
+
"status_code": response.status_code,
|
|
593
|
+
"content_type": headers.get("content-type"),
|
|
594
|
+
"x_robots_tag": headers.get("x-robots-tag"),
|
|
595
|
+
"link_canonical": canonical_from_link,
|
|
596
|
+
"cache_control": headers.get("cache-control"),
|
|
597
|
+
"server": headers.get("server"),
|
|
598
|
+
"last_modified": headers.get("last-modified"),
|
|
599
|
+
"content_length": headers.get("content-length"),
|
|
600
|
+
"all_headers": dict(headers),
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
|
|
604
|
+
async def seo_suggest_keywords(url: str, count: int = 10) -> dict[str, Any]:
|
|
605
|
+
"""Extract keyword suggestions from page content."""
|
|
606
|
+
try:
|
|
607
|
+
response = fetch_url(url)
|
|
608
|
+
html = response.text
|
|
609
|
+
except Exception as e:
|
|
610
|
+
return {"url": url, "error": f"Failed to fetch URL: {str(e)}"}
|
|
611
|
+
|
|
612
|
+
# Extract text
|
|
613
|
+
body_m = re.search(r'<body[^>]*>(.*)</body>', html, re.IGNORECASE | re.DOTALL)
|
|
614
|
+
body_html = body_m.group(1) if body_m else html
|
|
615
|
+
body_text = strip_html(body_html)
|
|
616
|
+
|
|
617
|
+
# Title text
|
|
618
|
+
title_text = ""
|
|
619
|
+
titles = extract_tag(html, "title")
|
|
620
|
+
if titles:
|
|
621
|
+
title_text = strip_html(titles[0])
|
|
622
|
+
|
|
623
|
+
# Heading texts
|
|
624
|
+
heading_text = ""
|
|
625
|
+
for level in range(1, 7):
|
|
626
|
+
for h in extract_tag(html, f"h{level}"):
|
|
627
|
+
heading_text += " " + strip_html(h)
|
|
628
|
+
|
|
629
|
+
# Tokenize (simple: lowercase, keep alphabetic words >= 3 chars)
|
|
630
|
+
def tokenize(text: str) -> list[str]:
|
|
631
|
+
words = re.findall(r'[a-zA-Z]{3,}', text.lower())
|
|
632
|
+
# Filter common stop words
|
|
633
|
+
stop_words = {
|
|
634
|
+
"the", "and", "for", "are", "but", "not", "you", "all", "can",
|
|
635
|
+
"had", "her", "was", "one", "our", "out", "has", "have", "been",
|
|
636
|
+
"some", "same", "also", "just", "than", "that", "this", "with",
|
|
637
|
+
"from", "they", "their", "them", "would", "could", "should",
|
|
638
|
+
"about", "into", "over", "after", "what", "when", "where", "which",
|
|
639
|
+
"will", "been", "were", "being", "does", "more", "most", "other",
|
|
640
|
+
"such", "here", "there", "each", "like", "very", "your",
|
|
641
|
+
}
|
|
642
|
+
return [w for w in words if w not in stop_words]
|
|
643
|
+
|
|
644
|
+
body_words = tokenize(body_text)
|
|
645
|
+
title_words = tokenize(title_text)
|
|
646
|
+
heading_words = tokenize(heading_text)
|
|
647
|
+
|
|
648
|
+
# Count frequencies
|
|
649
|
+
from collections import Counter
|
|
650
|
+
body_freq = Counter(body_words)
|
|
651
|
+
heading_freq = Counter(heading_words)
|
|
652
|
+
title_freq = Counter(title_words)
|
|
653
|
+
|
|
654
|
+
# Suggest primary keywords (top from headings + title)
|
|
655
|
+
primary_candidates: list[str] = []
|
|
656
|
+
seen: set[str] = set()
|
|
657
|
+
for word, _ in heading_freq.most_common(20):
|
|
658
|
+
if word not in seen:
|
|
659
|
+
primary_candidates.append(word)
|
|
660
|
+
seen.add(word)
|
|
661
|
+
for word, _ in title_freq.most_common(20):
|
|
662
|
+
if word not in seen:
|
|
663
|
+
primary_candidates.append(word)
|
|
664
|
+
seen.add(word)
|
|
665
|
+
|
|
666
|
+
# Secondary keywords (top from body minus already seen)
|
|
667
|
+
secondary_candidates: list[str] = []
|
|
668
|
+
for word, _ in body_freq.most_common(50):
|
|
669
|
+
if word not in seen:
|
|
670
|
+
secondary_candidates.append(word)
|
|
671
|
+
seen.add(word)
|
|
672
|
+
|
|
673
|
+
# Bigrams as additional suggestions (for secondary)
|
|
674
|
+
bigrams = re.findall(r'(?=(\b[a-zA-Z]{3,}\s+[a-zA-Z]{3,}\b))', body_text.lower())
|
|
675
|
+
bigram_freq = Counter(bigrams)
|
|
676
|
+
bigram_suggestions = [bg for bg, _ in bigram_freq.most_common(count) if bg not in seen]
|
|
677
|
+
|
|
678
|
+
return {
|
|
679
|
+
"url": url,
|
|
680
|
+
"word_count": len(body_words),
|
|
681
|
+
"primary_keywords": primary_candidates[:max(5, count // 2)],
|
|
682
|
+
"secondary_keywords": secondary_candidates[:count],
|
|
683
|
+
"title_words": title_freq.most_common(10),
|
|
684
|
+
"heading_words": heading_freq.most_common(15),
|
|
685
|
+
"top_body_words": body_freq.most_common(30),
|
|
686
|
+
"bigram_suggestions": bigram_suggestions[:5],
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
|
|
690
|
+
async def seo_analyze_speed_factors(url: str) -> dict[str, Any]:
|
|
691
|
+
"""Analyze page weight and resource loading."""
|
|
692
|
+
try:
|
|
693
|
+
response = fetch_url(url, follow_redirects=True)
|
|
694
|
+
html = response.text
|
|
695
|
+
except Exception as e:
|
|
696
|
+
return {"url": url, "error": f"Failed to fetch URL: {str(e)}"}
|
|
697
|
+
|
|
698
|
+
headers = response.headers
|
|
699
|
+
html_size_bytes = len(html)
|
|
700
|
+
html_size_kb = round(html_size_bytes / 1024, 2)
|
|
701
|
+
|
|
702
|
+
# Count resources
|
|
703
|
+
scripts = len(re.findall(r'<script[^>]*src\s*=', html, re.IGNORECASE))
|
|
704
|
+
stylesheets = len(re.findall(
|
|
705
|
+
r'<link[^>]*\brel\s*=\s*["\']stylesheet["\']', html, re.IGNORECASE
|
|
706
|
+
))
|
|
707
|
+
images = len(re.findall(r'<img[^>]*src\s*=', html, re.IGNORECASE))
|
|
708
|
+
inline_scripts = len(re.findall(
|
|
709
|
+
r'<script[^>]*>(?!.*src=)(.*?)</script>', html, re.IGNORECASE | re.DOTALL
|
|
710
|
+
))
|
|
711
|
+
inline_styles = len(re.findall(r'<style[^>]*>(.*?)</style>', html, re.IGNORECASE | re.DOTALL))
|
|
712
|
+
fonts = len(re.findall(r'<link[^>]*\brel\s*=\s*["\']font["\']', html, re.IGNORECASE))
|
|
713
|
+
|
|
714
|
+
# Compression
|
|
715
|
+
content_encoding = headers.get("content-encoding", "none")
|
|
716
|
+
transfer_encoding = headers.get("transfer-encoding", "")
|
|
717
|
+
connection = headers.get("connection", "")
|
|
718
|
+
keep_alive = headers.get("keep-alive", "")
|
|
719
|
+
|
|
720
|
+
# Check if keep-alive is indicated
|
|
721
|
+
is_keep_alive = False
|
|
722
|
+
if keep_alive:
|
|
723
|
+
is_keep_alive = True
|
|
724
|
+
elif connection and connection.lower() == "keep-alive":
|
|
725
|
+
is_keep_alive = True
|
|
726
|
+
|
|
727
|
+
# HTTP version (httpx doesn't expose this directly, but we can infer)
|
|
728
|
+
http_version = response.http_version if hasattr(response, 'http_version') else "unknown"
|
|
729
|
+
|
|
730
|
+
return {
|
|
731
|
+
"url": url,
|
|
732
|
+
"html_size_bytes": html_size_bytes,
|
|
733
|
+
"html_size_kb": html_size_kb,
|
|
734
|
+
"resources": {
|
|
735
|
+
"scripts_external": scripts,
|
|
736
|
+
"scripts_inline": inline_scripts,
|
|
737
|
+
"stylesheets_external": stylesheets,
|
|
738
|
+
"stylesheets_inline": inline_styles,
|
|
739
|
+
"images": images,
|
|
740
|
+
"fonts": fonts,
|
|
741
|
+
"total_resources": scripts + stylesheets + images + fonts,
|
|
742
|
+
},
|
|
743
|
+
"compression": {
|
|
744
|
+
"content_encoding": content_encoding if content_encoding != "none" else None,
|
|
745
|
+
"is_compressed": content_encoding != "none",
|
|
746
|
+
},
|
|
747
|
+
"connection": {
|
|
748
|
+
"keep_alive": is_keep_alive,
|
|
749
|
+
"connection_header": connection if connection else None,
|
|
750
|
+
"keep_alive_header": keep_alive if keep_alive else None,
|
|
751
|
+
},
|
|
752
|
+
"http_version": http_version,
|
|
753
|
+
"response_headers": {
|
|
754
|
+
"content_type": headers.get("content-type"),
|
|
755
|
+
"content_length": headers.get("content-length"),
|
|
756
|
+
},
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
|
|
760
|
+
# ─── Main entry point ─────────────────────────────────────────────────────
|
|
761
|
+
|
|
762
|
+
async def main():
|
|
763
|
+
from mcp.server.stdio import stdio_server
|
|
764
|
+
|
|
765
|
+
async with stdio_server() as (read_stream, write_stream):
|
|
766
|
+
await server.run(
|
|
767
|
+
read_stream,
|
|
768
|
+
write_stream,
|
|
769
|
+
InitializationOptions(
|
|
770
|
+
server_name="seo-audit-mcp",
|
|
771
|
+
server_version="1.0.0",
|
|
772
|
+
capabilities=server.get_capabilities(
|
|
773
|
+
notification_options=NotificationOptions(),
|
|
774
|
+
experimental_capabilities={},
|
|
775
|
+
),
|
|
776
|
+
),
|
|
777
|
+
)
|
|
778
|
+
|
|
779
|
+
|
|
780
|
+
if __name__ == "__main__":
|
|
781
|
+
import asyncio
|
|
782
|
+
asyncio.run(main())
|