agentpay-seo-audit-mcp 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +7 -0
  2. package/server.py +782 -0
package/package.json ADDED
@@ -0,0 +1,7 @@
1
+ {
2
+ "name": "agentpay-seo-audit-mcp",
3
+ "version": "1.2.0",
4
+ "description": "MCP server with rate-limited free tier. 50 free calls, then Pro subscription at $19/mo.",
5
+ "type": "module",
6
+ "license": "MIT"
7
+ }
package/server.py ADDED
@@ -0,0 +1,782 @@
1
+ """
2
+ SEO Audit MCP Server
3
+ Provides AI agents with the ability to audit any URL's on-page SEO.
4
+
5
+ Usage:
6
+ python3 server.py # Free tier (50 calls/instance)
7
+ python3 server.py --pro-key PROL_XXX # Pro tier (unlimited)
8
+
9
+ Tools:
10
+ - seo_analyze_url: Full on-page SEO audit
11
+ - seo_check_headers: HTTP headers audit
12
+ - seo_suggest_keywords: Extract keywords from page content
13
+ - seo_analyze_speed_factors: Page weight and resource analysis
14
+
15
+ Pricing: $19/mo — https://buy.stripe.com/dRm6oJ4Hd2Jugek0wz1oI0m
16
+ """
17
+
18
+ import json
19
+ import re
20
+ import ssl as ssl_module
21
+ import sys
22
+ from datetime import datetime
23
+ from typing import Any, Optional
24
+ from urllib.parse import urlparse
25
+
26
+ import httpx
27
+ from mcp.server.models import InitializationOptions
28
+ import mcp.types as types
29
+ from mcp.server import NotificationOptions, Server
30
+ from pydantic import AnyUrl
31
+
32
+ server = Server("seo-audit-mcp")
33
+
34
+ # ─── Rate Limiting & Pro Key ───────────────────────────────────────────
35
+ FREE_LIMIT = 50
36
+ PRO_KEYS = {"PROL_AGENTPAY_DEMO": "demo"} # Demo key for testing
37
+
38
+ # Parse --pro-key from command line
39
+ PRO_KEY = None
40
+ for i, arg in enumerate(sys.argv):
41
+ if arg == "--pro-key" and i + 1 < len(sys.argv):
42
+ PRO_KEY = sys.argv[i + 1]
43
+ break
44
+
45
+ IS_PRO = PRO_KEY in PRO_KEYS
46
+ call_counter = 0
47
+
48
+ STRIPE_LINK = "https://buy.stripe.com/dRm6oJ4Hd2Jugek0wz1oI0m" # $19/mo
49
+
50
+ def check_rate_limit():
51
+ """Check if free tier has exceeded limit. Returns error dict or None."""
52
+ global call_counter
53
+ if IS_PRO:
54
+ return None
55
+ call_counter += 1
56
+ if call_counter > FREE_LIMIT:
57
+ remaining = call_counter - FREE_LIMIT
58
+ return {
59
+ "error": f"Free tier limit reached ({FREE_LIMIT} calls). Upgrade to Pro for unlimited access.",
60
+ "isError": True,
61
+ "next_steps": [
62
+ f"Purchase Pro at {STRIPE_LINK} ($19/mo, unlimited)",
63
+ "Restart the server to reset the free counter",
64
+ "Use --pro-key PROL_XXX to run in Pro mode"
65
+ ],
66
+ "calls_used": call_counter,
67
+ "limit": FREE_LIMIT,
68
+ "over_by": remaining
69
+ }
70
+ return None
71
+
72
+
73
+ # ─── Helpers ───────────────────────────────────────────────────────────────
74
+
75
+ def fetch_url(url: str, follow_redirects: bool = True) -> httpx.Response:
76
+ """Fetch a URL with sensible defaults. Raises on network errors."""
77
+ with httpx.Client(
78
+ follow_redirects=follow_redirects,
79
+ timeout=30.0,
80
+ headers={
81
+ "User-Agent": (
82
+ "Mozilla/5.0 (compatible; SEOAuditBot/1.0; "
83
+ "+https://seo-audit-mcp.example.com)"
84
+ ),
85
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
86
+ "Accept-Language": "en-US,en;q=0.5",
87
+ },
88
+ ) as client:
89
+ return client.get(url)
90
+
91
+
92
+ def extract_tag(html: str, tag: str) -> list[str]:
93
+ """Extract full <tag ...>content</tag> from HTML using regex."""
94
+ pattern = re.compile(
95
+ rf'<{tag}[^>]*>(.*?)</{tag}>', re.IGNORECASE | re.DOTALL
96
+ )
97
+ return pattern.findall(html)
98
+
99
+
100
+ def extract_attr(html: str, tag: str, attr: str) -> list[str]:
101
+ """Extract attribute values from specific tags."""
102
+ pattern = re.compile(
103
+ rf'<{tag}[^>]*\s{attr}\s*=\s*["\']([^"\']*)["\']',
104
+ re.IGNORECASE | re.DOTALL,
105
+ )
106
+ return pattern.findall(html)
107
+
108
+
109
+ def extract_meta_content(html: str, name_or_prop: str) -> Optional[str]:
110
+ """Extract content from a <meta> tag by name or property."""
111
+ # Try property first (Open Graph)
112
+ pattern = re.compile(
113
+ rf'<meta[^>]*(?:property|name)\s*=\s*["\']{re.escape(name_or_prop)}["\'][^>]*'
114
+ rf'\s+content\s*=\s*["\']([^"\']*)["\']',
115
+ re.IGNORECASE,
116
+ )
117
+ m = pattern.search(html)
118
+ if m:
119
+ return m.group(1)
120
+ # Try reversed attribute order
121
+ pattern2 = re.compile(
122
+ rf'<meta[^>]*\s+content\s*=\s*["\']([^"\']*)["\'][^>]*'
123
+ rf'(?:property|name)\s*=\s*["\']{re.escape(name_or_prop)}["\']',
124
+ re.IGNORECASE,
125
+ )
126
+ m2 = pattern2.search(html)
127
+ if m2:
128
+ return m2.group(1)
129
+ return None
130
+
131
+
132
+ def extract_all_meta(html: str) -> dict[str, str]:
133
+ """Extract all meta tags into a dict keyed by name/property."""
134
+ metas: dict[str, str] = {}
135
+ pattern = re.compile(
136
+ r'<meta[^>]*(?:name|property)\s*=\s*["\']([^"\']*)["\']'
137
+ r'[^>]*\s+content\s*=\s*["\']([^"\']*)["\']',
138
+ re.IGNORECASE,
139
+ )
140
+ for m in pattern.finditer(html):
141
+ metas[m.group(1).strip()] = m.group(2).strip()
142
+ # reversed order
143
+ pattern2 = re.compile(
144
+ r'<meta[^>]*\s+content\s*=\s*["\']([^"\']*)["\']'
145
+ r'[^>]*(?:name|property)\s*=\s*["\']([^"\']*)["\']',
146
+ re.IGNORECASE,
147
+ )
148
+ for m in pattern2.finditer(html):
149
+ metas[m.group(2).strip()] = m.group(1).strip()
150
+ return metas
151
+
152
+
153
+ def strip_html(html: str) -> str:
154
+ """Remove all HTML tags, returning only visible text."""
155
+ text = re.sub(r'<[^>]+>', ' ', html)
156
+ text = re.sub(r'\s+', ' ', text).strip()
157
+ return text
158
+
159
+
160
+ def check_ssl(url: str) -> dict[str, Any]:
161
+ """Check SSL certificate validity for a URL."""
162
+ parsed = urlparse(url)
163
+ hostname = parsed.hostname or ""
164
+ port = parsed.port or 443
165
+ result: dict[str, Any] = {"valid": False, "issuer": None, "error": None}
166
+ try:
167
+ ctx = ssl_module.create_default_context()
168
+ with ctx.wrap_socket(
169
+ ssl_module.SSLSocket(socket=None, family=0, type=0, proto=0), # type: ignore
170
+ server_hostname=hostname,
171
+ ) as sock:
172
+ pass
173
+ except Exception as e:
174
+ # Try a simpler check via httpx
175
+ try:
176
+ with httpx.Client(verify=True, timeout=10.0) as client:
177
+ resp = client.get(f"https://{hostname}:{port}", headers={"User-Agent": "SEOAuditBot/1.0"})
178
+ result["valid"] = True
179
+ result["issuer"] = "verified (via httpx)"
180
+ except Exception as e2:
181
+ result["valid"] = False
182
+ result["error"] = str(e2)
183
+ return result
184
+
185
+
186
+ def status_for_length(length: int, min_val: int, max_val: int) -> str:
187
+ """Return 'good', 'fair', or 'poor' based on length bounds."""
188
+ if min_val <= length <= max_val:
189
+ return "good"
190
+ if length == 0:
191
+ return "poor"
192
+ return "fair"
193
+
194
+
195
+ def compute_score(issues: list[str]) -> int:
196
+ """Compute an SEO score out of 100 based on issues list."""
197
+ penalties = {
198
+ "Missing title tag": 20,
199
+ "Title tag too short": 10,
200
+ "Title tag too long": 10,
201
+ "Missing meta description": 20,
202
+ "Meta description too short": 5,
203
+ "Meta description too long": 5,
204
+ "No H1 heading": 15,
205
+ "Multiple H1 headings": 10,
206
+ "H1 too long": 5,
207
+ "Missing alt text on images": 5,
208
+ "No Open Graph title": 5,
209
+ "No Open Graph description": 3,
210
+ "No Open Graph image": 3,
211
+ "No viewport meta tag": 10,
212
+ "No canonical URL": 5,
213
+ "No structured data": 5,
214
+ "SSL certificate issue": 15,
215
+ }
216
+ score = 100
217
+ for issue in issues:
218
+ penalty = penalties.get(issue, 2)
219
+ score -= penalty
220
+ return max(0, min(100, score))
221
+
222
+
223
+ # ─── Parse helpers ─────────────────────────────────────────────────────────
224
+
225
+ def parse_headings(html: str) -> dict[str, Any]:
226
+ """Analyze heading structure (H1-H6)."""
227
+ result: dict[str, Any] = {
228
+ "h1_count": 0, "h2_count": 0, "h3_count": 0,
229
+ "h4_count": 0, "h5_count": 0, "h6_count": 0,
230
+ "h1_contents": [], "issues": [],
231
+ }
232
+ for level in range(1, 7):
233
+ tag = f"h{level}"
234
+ contents = extract_tag(html, tag)
235
+ result[f"h{level}_count"] = len(contents)
236
+ if level == 1:
237
+ result["h1_contents"] = [strip_html(c)[:100] for c in contents]
238
+
239
+ # H1 issues
240
+ if result["h1_count"] == 0:
241
+ result["issues"].append("No H1 heading")
242
+ elif result["h1_count"] > 1:
243
+ result["issues"].append("Multiple H1 headings")
244
+ if result["h1_contents"]:
245
+ for h1 in result["h1_contents"]:
246
+ if len(h1) > 70:
247
+ result["issues"].append("H1 too long")
248
+ break
249
+ return result
250
+
251
+
252
+ def parse_images(html: str) -> dict[str, Any]:
253
+ """Analyze image tags for alt text coverage."""
254
+ img_tags = re.findall(r'<img[^>]*>', html, re.IGNORECASE)
255
+ total = len(img_tags)
256
+ missing_alt = 0
257
+ for img in img_tags:
258
+ if not re.search(r'\salt\s*=\s*["\']', img, re.IGNORECASE):
259
+ missing_alt += 1
260
+ return {"total": total, "missing_alt": missing_alt}
261
+
262
+
263
+ def parse_open_graph(html: str) -> dict[str, bool]:
264
+ """Check for Open Graph meta tags."""
265
+ og_title = extract_meta_content(html, "og:title") is not None
266
+ og_description = extract_meta_content(html, "og:description") is not None
267
+ og_image = extract_meta_content(html, "og:image") is not None
268
+ og_url = extract_meta_content(html, "og:url") is not None
269
+ og_type = extract_meta_content(html, "og:type") is not None
270
+ return {
271
+ "og_title": og_title,
272
+ "og_description": og_description,
273
+ "og_image": og_image,
274
+ "og_url": og_url,
275
+ "og_type": og_type,
276
+ }
277
+
278
+
279
+ def parse_title(html: str) -> dict[str, Any]:
280
+ """Extract and analyze the <title> tag."""
281
+ titles = extract_tag(html, "title")
282
+ if not titles:
283
+ return {"content": "", "length": 0, "status": "poor"}
284
+ content = strip_html(titles[0])
285
+ length = len(content)
286
+ if length == 0:
287
+ return {"content": "", "length": 0, "status": "poor"}
288
+ status = status_for_length(length, 30, 60)
289
+ return {"content": content, "length": length, "status": status}
290
+
291
+
292
+ def parse_meta_description(html: str) -> dict[str, Any]:
293
+ """Extract and analyze the meta description."""
294
+ content = extract_meta_content(html, "description")
295
+ if not content:
296
+ return {"content": "", "length": 0, "status": "poor"}
297
+ length = len(content)
298
+ status = status_for_length(length, 120, 158)
299
+ return {"content": content, "length": length, "status": status}
300
+
301
+
302
+ def parse_structured_data(html: str) -> dict[str, Any]:
303
+ """Detect structured data (JSON-LD, Microdata, RDFa)."""
304
+ # JSON-LD
305
+ jsonld = bool(re.search(
306
+ r'<script[^>]*type\s*=\s*["\']application/ld\+json["\']',
307
+ html, re.IGNORECASE,
308
+ ))
309
+ # Microdata (itemscope/itemprop)
310
+ microdata = bool(re.search(r'\bitemscope\b', html, re.IGNORECASE))
311
+ # RDFa
312
+ rdfa = bool(re.search(r'\btypeof\s*=\s*["\']', html, re.IGNORECASE))
313
+ return {
314
+ "json_ld": jsonld,
315
+ "microdata": microdata,
316
+ "rdfa": rdfa,
317
+ "present": jsonld or microdata or rdfa,
318
+ }
319
+
320
+
321
+ def parse_hreflang(html: str) -> list[dict[str, str]]:
322
+ """Extract hreflang tags."""
323
+ tags = []
324
+ pattern = re.compile(
325
+ r'<link[^>]*\brel\s*=\s*["\']alternate["\'][^>]*\bhreflang\s*=\s*["\']([^"\']*)["\']',
326
+ re.IGNORECASE,
327
+ )
328
+ for m in pattern.finditer(html):
329
+ link_tag = m.group(0)
330
+ href_m = re.search(r'\bhref\s*=\s*["\']([^"\']*)["\']', link_tag, re.IGNORECASE)
331
+ href = href_m.group(1) if href_m else ""
332
+ tags.append({"hreflang": m.group(1), "href": href})
333
+ return tags
334
+
335
+
336
+ # ─── MCP Tools ─────────────────────────────────────────────────────────────
337
+
338
+ @server.list_tools()
339
+ async def handle_list_tools() -> list[types.Tool]:
340
+ return [
341
+ types.Tool(
342
+ name="seo_analyze_url",
343
+ description=(
344
+ "Full on-page SEO audit of any URL. Analyzes title tag, meta "
345
+ "description, headings (H1-H6), image alt texts, word count, "
346
+ "SSL status, mobile-friendly viewport tag, Open Graph tags, "
347
+ "canonical URL, hreflang tags, robots meta, and structured "
348
+ "data presence. Returns a comprehensive JSON report with an "
349
+ "SEO score out of 100."
350
+ ),
351
+ inputSchema={
352
+ "type": "object",
353
+ "required": ["url"],
354
+ "properties": {
355
+ "url": {
356
+ "type": "string",
357
+ "description": "The URL to audit (e.g., https://example.com)",
358
+ }
359
+ },
360
+ },
361
+ ),
362
+ types.Tool(
363
+ name="seo_check_headers",
364
+ description=(
365
+ "HTTP headers audit of a URL. Checks status code, content-type, "
366
+ "x-robots-tag, link rel=canonical, cache-control, server, "
367
+ "last-modified headers."
368
+ ),
369
+ inputSchema={
370
+ "type": "object",
371
+ "required": ["url"],
372
+ "properties": {
373
+ "url": {
374
+ "type": "string",
375
+ "description": "The URL to check headers for",
376
+ }
377
+ },
378
+ },
379
+ ),
380
+ types.Tool(
381
+ name="seo_suggest_keywords",
382
+ description=(
383
+ "Extract keyword suggestions from page content. Analyzes most "
384
+ "frequent words, words in headings, and words in the title. "
385
+ "Suggests primary and secondary keywords."
386
+ ),
387
+ inputSchema={
388
+ "type": "object",
389
+ "required": ["url"],
390
+ "properties": {
391
+ "url": {
392
+ "type": "string",
393
+ "description": "The URL to extract keywords from",
394
+ },
395
+ "count": {
396
+ "type": "integer",
397
+ "description": "Number of keyword suggestions (default: 10)",
398
+ "default": 10,
399
+ },
400
+ },
401
+ },
402
+ ),
403
+ types.Tool(
404
+ name="seo_analyze_speed_factors",
405
+ description=(
406
+ "Analyze page weight and resource loading. Measures total HTML "
407
+ "size, number of resources (scripts, stylesheets, images), "
408
+ "compression (gzip/brotli), and keep-alive connection status."
409
+ ),
410
+ inputSchema={
411
+ "type": "object",
412
+ "required": ["url"],
413
+ "properties": {
414
+ "url": {
415
+ "type": "string",
416
+ "description": "The URL to analyze speed factors for",
417
+ }
418
+ },
419
+ },
420
+ ),
421
+ ]
422
+
423
+
424
+ @server.call_tool()
425
+ async def handle_call_tool(
426
+ name: str, arguments: dict | None
427
+ ) -> list[types.TextContent]:
428
+ if arguments is None:
429
+ arguments = {}
430
+
431
+ # Rate limit check
432
+ limit_check = check_rate_limit()
433
+ if limit_check:
434
+ return [types.TextContent(type="text", text=json.dumps(limit_check, indent=2))]
435
+
436
+ url = arguments.get("url", "")
437
+
438
+ if name == "seo_analyze_url":
439
+ return [types.TextContent(type="text", text=json.dumps(
440
+ await seo_analyze_url(url), indent=2
441
+ ))]
442
+ elif name == "seo_check_headers":
443
+ return [types.TextContent(type="text", text=json.dumps(
444
+ await seo_check_headers(url), indent=2
445
+ ))]
446
+ elif name == "seo_suggest_keywords":
447
+ count = arguments.get("count", 10)
448
+ return [types.TextContent(type="text", text=json.dumps(
449
+ await seo_suggest_keywords(url, count), indent=2
450
+ ))]
451
+ elif name == "seo_analyze_speed_factors":
452
+ return [types.TextContent(type="text", text=json.dumps(
453
+ await seo_analyze_speed_factors(url), indent=2
454
+ ))]
455
+ else:
456
+ raise ValueError(f"Unknown tool: {name}")
457
+
458
+
459
+ async def seo_analyze_url(url: str) -> dict[str, Any]:
460
+ """Full on-page SEO audit."""
461
+ try:
462
+ response = fetch_url(url)
463
+ html = response.text
464
+ except Exception as e:
465
+ return {"url": url, "error": f"Failed to fetch URL: {str(e)}", "score": 0}
466
+
467
+ issues: list[str] = []
468
+
469
+ # Title
470
+ title_info = parse_title(html)
471
+ if title_info["status"] == "poor" and title_info["length"] == 0:
472
+ issues.append("Missing title tag")
473
+ elif title_info["status"] == "poor":
474
+ issues.append("Title tag too short")
475
+ elif title_info["status"] == "fair":
476
+ issues.append("Title tag too long" if title_info["length"] > 60 else "Title tag too short")
477
+
478
+ # Meta description
479
+ meta_desc = parse_meta_description(html)
480
+ if meta_desc["status"] == "poor" and meta_desc["length"] == 0:
481
+ issues.append("Missing meta description")
482
+ elif meta_desc["status"] == "poor":
483
+ issues.append("Meta description too short")
484
+ elif meta_desc["status"] == "fair":
485
+ issues.append("Meta description too long" if meta_desc["length"] > 158 else "Meta description too short")
486
+
487
+ # Headings
488
+ headings_info = parse_headings(html)
489
+ issues.extend(headings_info.get("issues", []))
490
+
491
+ # Images
492
+ images_info = parse_images(html)
493
+ if images_info["missing_alt"] > 0:
494
+ issues.append(f"Missing alt text on images ({images_info['missing_alt']} of {images_info['total']})")
495
+
496
+ # SSL
497
+ ssl_info = check_ssl(url)
498
+ if not ssl_info["valid"]:
499
+ issues.append("SSL certificate issue")
500
+
501
+ # Viewport (mobile-friendly)
502
+ viewport = extract_meta_content(html, "viewport")
503
+ if not viewport:
504
+ issues.append("No viewport meta tag")
505
+
506
+ # Open Graph
507
+ og_info = parse_open_graph(html)
508
+ if not og_info["og_title"]:
509
+ issues.append("No Open Graph title")
510
+ if not og_info["og_description"]:
511
+ issues.append("No Open Graph description")
512
+ if not og_info["og_image"]:
513
+ issues.append("No Open Graph image")
514
+
515
+ # Canonical URL
516
+ canonical = extract_attr(html, "link", "href") if 'rel="canonical"' in html.lower() or "rel='canonical'" in html.lower() else None
517
+ # Actually, let's properly extract canonical
518
+ canonical_href = None
519
+ link_pattern = re.compile(
520
+ r'<link[^>]*\brel\s*=\s*["\']canonical["\'][^>]*>',
521
+ re.IGNORECASE,
522
+ )
523
+ for link_tag in link_pattern.finditer(html):
524
+ href_m = re.search(r'\bhref\s*=\s*["\']([^"\']*)["\']', link_tag.group(0), re.IGNORECASE)
525
+ if href_m:
526
+ canonical_href = href_m.group(1)
527
+ break
528
+ if not canonical_href:
529
+ issues.append("No canonical URL")
530
+
531
+ # Hreflang
532
+ hreflang_tags = parse_hreflang(html)
533
+
534
+ # Robots meta
535
+ robots = extract_meta_content(html, "robots")
536
+
537
+ # Structured data
538
+ sd_info = parse_structured_data(html)
539
+ if not sd_info["present"]:
540
+ issues.append("No structured data")
541
+
542
+ # Word count
543
+ body_text = ""
544
+ body_m = re.search(r'<body[^>]*>(.*)</body>', html, re.IGNORECASE | re.DOTALL)
545
+ if body_m:
546
+ body_text = strip_html(body_m.group(1))
547
+ else:
548
+ body_text = strip_html(html)
549
+ word_count = len(body_text.split())
550
+
551
+ # Score
552
+ score = compute_score(issues)
553
+
554
+ return {
555
+ "url": url,
556
+ "title": title_info,
557
+ "meta_description": meta_desc,
558
+ "headings": headings_info,
559
+ "images": images_info,
560
+ "ssl": ssl_info,
561
+ "viewport": viewport if viewport else None,
562
+ "open_graph": og_info,
563
+ "canonical_url": canonical_href,
564
+ "hreflang_tags": hreflang_tags,
565
+ "robots_meta": robots,
566
+ "structured_data": sd_info,
567
+ "word_count": word_count,
568
+ "issues": issues,
569
+ "score": score,
570
+ }
571
+
572
+
573
+ async def seo_check_headers(url: str) -> dict[str, Any]:
574
+ """HTTP headers audit."""
575
+ try:
576
+ # Don't follow redirects to capture the first response
577
+ response = fetch_url(url, follow_redirects=False)
578
+ except Exception as e:
579
+ return {"url": url, "error": f"Failed to fetch URL: {str(e)}"}
580
+
581
+ headers = response.headers
582
+ # Extract link header for canonical
583
+ link_header = headers.get("link", "")
584
+ canonical_from_link = None
585
+ if link_header:
586
+ cm = re.search(r'<([^>]+)>\s*;\s*rel\s*=\s*["\']?canonical["\']?', link_header, re.IGNORECASE)
587
+ if cm:
588
+ canonical_from_link = cm.group(1)
589
+
590
+ return {
591
+ "url": str(response.url),
592
+ "status_code": response.status_code,
593
+ "content_type": headers.get("content-type"),
594
+ "x_robots_tag": headers.get("x-robots-tag"),
595
+ "link_canonical": canonical_from_link,
596
+ "cache_control": headers.get("cache-control"),
597
+ "server": headers.get("server"),
598
+ "last_modified": headers.get("last-modified"),
599
+ "content_length": headers.get("content-length"),
600
+ "all_headers": dict(headers),
601
+ }
602
+
603
+
604
+ async def seo_suggest_keywords(url: str, count: int = 10) -> dict[str, Any]:
605
+ """Extract keyword suggestions from page content."""
606
+ try:
607
+ response = fetch_url(url)
608
+ html = response.text
609
+ except Exception as e:
610
+ return {"url": url, "error": f"Failed to fetch URL: {str(e)}"}
611
+
612
+ # Extract text
613
+ body_m = re.search(r'<body[^>]*>(.*)</body>', html, re.IGNORECASE | re.DOTALL)
614
+ body_html = body_m.group(1) if body_m else html
615
+ body_text = strip_html(body_html)
616
+
617
+ # Title text
618
+ title_text = ""
619
+ titles = extract_tag(html, "title")
620
+ if titles:
621
+ title_text = strip_html(titles[0])
622
+
623
+ # Heading texts
624
+ heading_text = ""
625
+ for level in range(1, 7):
626
+ for h in extract_tag(html, f"h{level}"):
627
+ heading_text += " " + strip_html(h)
628
+
629
+ # Tokenize (simple: lowercase, keep alphabetic words >= 3 chars)
630
+ def tokenize(text: str) -> list[str]:
631
+ words = re.findall(r'[a-zA-Z]{3,}', text.lower())
632
+ # Filter common stop words
633
+ stop_words = {
634
+ "the", "and", "for", "are", "but", "not", "you", "all", "can",
635
+ "had", "her", "was", "one", "our", "out", "has", "have", "been",
636
+ "some", "same", "also", "just", "than", "that", "this", "with",
637
+ "from", "they", "their", "them", "would", "could", "should",
638
+ "about", "into", "over", "after", "what", "when", "where", "which",
639
+ "will", "been", "were", "being", "does", "more", "most", "other",
640
+ "such", "here", "there", "each", "like", "very", "your",
641
+ }
642
+ return [w for w in words if w not in stop_words]
643
+
644
+ body_words = tokenize(body_text)
645
+ title_words = tokenize(title_text)
646
+ heading_words = tokenize(heading_text)
647
+
648
+ # Count frequencies
649
+ from collections import Counter
650
+ body_freq = Counter(body_words)
651
+ heading_freq = Counter(heading_words)
652
+ title_freq = Counter(title_words)
653
+
654
+ # Suggest primary keywords (top from headings + title)
655
+ primary_candidates: list[str] = []
656
+ seen: set[str] = set()
657
+ for word, _ in heading_freq.most_common(20):
658
+ if word not in seen:
659
+ primary_candidates.append(word)
660
+ seen.add(word)
661
+ for word, _ in title_freq.most_common(20):
662
+ if word not in seen:
663
+ primary_candidates.append(word)
664
+ seen.add(word)
665
+
666
+ # Secondary keywords (top from body minus already seen)
667
+ secondary_candidates: list[str] = []
668
+ for word, _ in body_freq.most_common(50):
669
+ if word not in seen:
670
+ secondary_candidates.append(word)
671
+ seen.add(word)
672
+
673
+ # Bigrams as additional suggestions (for secondary)
674
+ bigrams = re.findall(r'(?=(\b[a-zA-Z]{3,}\s+[a-zA-Z]{3,}\b))', body_text.lower())
675
+ bigram_freq = Counter(bigrams)
676
+ bigram_suggestions = [bg for bg, _ in bigram_freq.most_common(count) if bg not in seen]
677
+
678
+ return {
679
+ "url": url,
680
+ "word_count": len(body_words),
681
+ "primary_keywords": primary_candidates[:max(5, count // 2)],
682
+ "secondary_keywords": secondary_candidates[:count],
683
+ "title_words": title_freq.most_common(10),
684
+ "heading_words": heading_freq.most_common(15),
685
+ "top_body_words": body_freq.most_common(30),
686
+ "bigram_suggestions": bigram_suggestions[:5],
687
+ }
688
+
689
+
690
+ async def seo_analyze_speed_factors(url: str) -> dict[str, Any]:
691
+ """Analyze page weight and resource loading."""
692
+ try:
693
+ response = fetch_url(url, follow_redirects=True)
694
+ html = response.text
695
+ except Exception as e:
696
+ return {"url": url, "error": f"Failed to fetch URL: {str(e)}"}
697
+
698
+ headers = response.headers
699
+ html_size_bytes = len(html)
700
+ html_size_kb = round(html_size_bytes / 1024, 2)
701
+
702
+ # Count resources
703
+ scripts = len(re.findall(r'<script[^>]*src\s*=', html, re.IGNORECASE))
704
+ stylesheets = len(re.findall(
705
+ r'<link[^>]*\brel\s*=\s*["\']stylesheet["\']', html, re.IGNORECASE
706
+ ))
707
+ images = len(re.findall(r'<img[^>]*src\s*=', html, re.IGNORECASE))
708
+ inline_scripts = len(re.findall(
709
+ r'<script[^>]*>(?!.*src=)(.*?)</script>', html, re.IGNORECASE | re.DOTALL
710
+ ))
711
+ inline_styles = len(re.findall(r'<style[^>]*>(.*?)</style>', html, re.IGNORECASE | re.DOTALL))
712
+ fonts = len(re.findall(r'<link[^>]*\brel\s*=\s*["\']font["\']', html, re.IGNORECASE))
713
+
714
+ # Compression
715
+ content_encoding = headers.get("content-encoding", "none")
716
+ transfer_encoding = headers.get("transfer-encoding", "")
717
+ connection = headers.get("connection", "")
718
+ keep_alive = headers.get("keep-alive", "")
719
+
720
+ # Check if keep-alive is indicated
721
+ is_keep_alive = False
722
+ if keep_alive:
723
+ is_keep_alive = True
724
+ elif connection and connection.lower() == "keep-alive":
725
+ is_keep_alive = True
726
+
727
+ # HTTP version (httpx doesn't expose this directly, but we can infer)
728
+ http_version = response.http_version if hasattr(response, 'http_version') else "unknown"
729
+
730
+ return {
731
+ "url": url,
732
+ "html_size_bytes": html_size_bytes,
733
+ "html_size_kb": html_size_kb,
734
+ "resources": {
735
+ "scripts_external": scripts,
736
+ "scripts_inline": inline_scripts,
737
+ "stylesheets_external": stylesheets,
738
+ "stylesheets_inline": inline_styles,
739
+ "images": images,
740
+ "fonts": fonts,
741
+ "total_resources": scripts + stylesheets + images + fonts,
742
+ },
743
+ "compression": {
744
+ "content_encoding": content_encoding if content_encoding != "none" else None,
745
+ "is_compressed": content_encoding != "none",
746
+ },
747
+ "connection": {
748
+ "keep_alive": is_keep_alive,
749
+ "connection_header": connection if connection else None,
750
+ "keep_alive_header": keep_alive if keep_alive else None,
751
+ },
752
+ "http_version": http_version,
753
+ "response_headers": {
754
+ "content_type": headers.get("content-type"),
755
+ "content_length": headers.get("content-length"),
756
+ },
757
+ }
758
+
759
+
760
+ # ─── Main entry point ─────────────────────────────────────────────────────
761
+
762
+ async def main():
763
+ from mcp.server.stdio import stdio_server
764
+
765
+ async with stdio_server() as (read_stream, write_stream):
766
+ await server.run(
767
+ read_stream,
768
+ write_stream,
769
+ InitializationOptions(
770
+ server_name="seo-audit-mcp",
771
+ server_version="1.0.0",
772
+ capabilities=server.get_capabilities(
773
+ notification_options=NotificationOptions(),
774
+ experimental_capabilities={},
775
+ ),
776
+ ),
777
+ )
778
+
779
+
780
+ if __name__ == "__main__":
781
+ import asyncio
782
+ asyncio.run(main())