bmad-plus 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,231 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ SEO Fetch — Secure HTTP page fetcher for SEO analysis.
4
+
5
+ Features:
6
+ - SSRF protection (blocks private/loopback/reserved IPs)
7
+ - Multi-UA support (standard, Googlebot, GPTBot, ClaudeBot)
8
+ - Redirect chain tracking
9
+ - Cookie handling
10
+ - Configurable timeout
11
+
12
+ Author: Laurent Rochetta
13
+ License: MIT
14
+ """
15
+
16
+ import argparse
17
+ import ipaddress
18
+ import json
19
+ import socket
20
+ import sys
21
+ from typing import Optional
22
+ from urllib.parse import urlparse
23
+
24
+ try:
25
+ import requests
26
+ except ImportError:
27
+ print("Error: requests library required. Install: pip install requests", file=sys.stderr)
28
+ sys.exit(1)
29
+
30
+
31
+ # ── User-Agent Presets ──────────────────────────────────────────────
32
+
33
+ USER_AGENTS = {
34
+ "default": (
35
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
36
+ "(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 BMADSEOEngine/2.0"
37
+ ),
38
+ "googlebot": (
39
+ "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
40
+ ),
41
+ "gptbot": (
42
+ "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.2; "
43
+ "+https://openai.com/gptbot)"
44
+ ),
45
+ "claudebot": (
46
+ "Mozilla/5.0 (compatible; ClaudeBot/1.0; +https://www.anthropic.com/claudebot)"
47
+ ),
48
+ "mobile": (
49
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) "
50
+ "AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1"
51
+ ),
52
+ }
53
+
54
+ DEFAULT_HEADERS = {
55
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
56
+ "Accept-Language": "en-US,en;q=0.9,fr;q=0.8",
57
+ "Accept-Encoding": "gzip, deflate, br",
58
+ "Connection": "keep-alive",
59
+ "Cache-Control": "no-cache",
60
+ }
61
+
62
+
63
+ # ── Security: SSRF Prevention ──────────────────────────────────────
64
+
65
+ def is_safe_url(url: str) -> bool:
66
+ """Block requests to private, loopback, and reserved IP addresses."""
67
+ parsed = urlparse(url)
68
+ hostname = parsed.hostname
69
+
70
+ if not hostname:
71
+ return False
72
+
73
+ try:
74
+ resolved_ip = socket.gethostbyname(hostname)
75
+ ip = ipaddress.ip_address(resolved_ip)
76
+ if ip.is_private or ip.is_loopback or ip.is_reserved or ip.is_link_local:
77
+ return False
78
+ except (socket.gaierror, ValueError):
79
+ pass # DNS failure handled by requests
80
+
81
+ return True
82
+
83
+
84
+ # ── Core Fetcher ───────────────────────────────────────────────────
85
+
86
+ def fetch_page(
87
+ url: str,
88
+ timeout: int = 30,
89
+ follow_redirects: bool = True,
90
+ max_redirects: int = 5,
91
+ user_agent: str = "default",
92
+ ) -> dict:
93
+ """
94
+ Fetch a web page with security checks and detailed response tracking.
95
+
96
+ Returns dict with: url, status_code, content, headers, redirect_chain,
97
+ content_length, response_time_ms, error
98
+ """
99
+ result = {
100
+ "url": url,
101
+ "final_url": None,
102
+ "status_code": None,
103
+ "content": None,
104
+ "headers": {},
105
+ "redirect_chain": [],
106
+ "content_length": 0,
107
+ "response_time_ms": 0,
108
+ "error": None,
109
+ }
110
+
111
+ # Normalize URL
112
+ parsed = urlparse(url)
113
+ if not parsed.scheme:
114
+ url = f"https://{url}"
115
+ parsed = urlparse(url)
116
+
117
+ if parsed.scheme not in ("http", "https"):
118
+ result["error"] = f"Invalid URL scheme: {parsed.scheme}"
119
+ return result
120
+
121
+ # SSRF check
122
+ if not is_safe_url(url):
123
+ resolved = "unknown"
124
+ try:
125
+ resolved = socket.gethostbyname(parsed.hostname)
126
+ except Exception:
127
+ pass
128
+ result["error"] = f"Blocked: URL resolves to private/internal IP ({resolved})"
129
+ return result
130
+
131
+ try:
132
+ session = requests.Session()
133
+ session.max_redirects = max_redirects
134
+
135
+ headers = dict(DEFAULT_HEADERS)
136
+ ua_string = USER_AGENTS.get(user_agent, user_agent)
137
+ headers["User-Agent"] = ua_string
138
+
139
+ import time
140
+ start = time.monotonic()
141
+
142
+ response = session.get(
143
+ url,
144
+ headers=headers,
145
+ timeout=timeout,
146
+ allow_redirects=follow_redirects,
147
+ )
148
+
149
+ elapsed_ms = round((time.monotonic() - start) * 1000)
150
+
151
+ result["final_url"] = response.url
152
+ result["status_code"] = response.status_code
153
+ result["content"] = response.text
154
+ result["headers"] = dict(response.headers)
155
+ result["content_length"] = len(response.content)
156
+ result["response_time_ms"] = elapsed_ms
157
+
158
+ if response.history:
159
+ result["redirect_chain"] = [
160
+ {"url": r.url, "status": r.status_code}
161
+ for r in response.history
162
+ ]
163
+
164
+ except requests.exceptions.Timeout:
165
+ result["error"] = f"Request timed out after {timeout}s"
166
+ except requests.exceptions.TooManyRedirects:
167
+ result["error"] = f"Too many redirects (max {max_redirects})"
168
+ except requests.exceptions.SSLError as e:
169
+ result["error"] = f"SSL error: {e}"
170
+ except requests.exceptions.ConnectionError as e:
171
+ result["error"] = f"Connection error: {e}"
172
+ except requests.exceptions.RequestException as e:
173
+ result["error"] = f"Request failed: {e}"
174
+
175
+ return result
176
+
177
+
178
+ # ── CLI ────────────────────────────────────────────────────────────
179
+
180
+ def main():
181
+ parser = argparse.ArgumentParser(
182
+ description="SEO Fetch — Secure HTTP fetcher for SEO analysis (BMAD+ SEO Engine)"
183
+ )
184
+ parser.add_argument("url", help="URL to fetch")
185
+ parser.add_argument("--output", "-o", help="Save HTML to file")
186
+ parser.add_argument("--timeout", "-t", type=int, default=30, help="Timeout in seconds")
187
+ parser.add_argument("--no-redirects", action="store_true", help="Don't follow redirects")
188
+ parser.add_argument(
189
+ "--ua", choices=list(USER_AGENTS.keys()), default="default",
190
+ help="User-Agent preset (default, googlebot, gptbot, claudebot, mobile)"
191
+ )
192
+ parser.add_argument("--json", "-j", action="store_true", help="Output full result as JSON")
193
+
194
+ args = parser.parse_args()
195
+
196
+ result = fetch_page(
197
+ args.url,
198
+ timeout=args.timeout,
199
+ follow_redirects=not args.no_redirects,
200
+ user_agent=args.ua,
201
+ )
202
+
203
+ if result["error"]:
204
+ print(f"Error: {result['error']}", file=sys.stderr)
205
+ sys.exit(1)
206
+
207
+ if args.json:
208
+ # Output metadata as JSON (without full HTML content for readability)
209
+ output = {k: v for k, v in result.items() if k != "content"}
210
+ output["content_preview"] = result["content"][:500] if result["content"] else None
211
+ print(json.dumps(output, indent=2))
212
+ elif args.output:
213
+ with open(args.output, "w", encoding="utf-8") as f:
214
+ f.write(result["content"])
215
+ print(f"Saved to {args.output}")
216
+ else:
217
+ print(result["content"])
218
+
219
+ # Metadata to stderr
220
+ print(f"\n--- Fetch Summary ---", file=sys.stderr)
221
+ print(f"Final URL: {result['final_url']}", file=sys.stderr)
222
+ print(f"Status: {result['status_code']}", file=sys.stderr)
223
+ print(f"Size: {result['content_length']:,} bytes", file=sys.stderr)
224
+ print(f"Time: {result['response_time_ms']}ms", file=sys.stderr)
225
+ if result["redirect_chain"]:
226
+ chain = " → ".join(r["url"] for r in result["redirect_chain"])
227
+ print(f"Redirects: {chain}", file=sys.stderr)
228
+
229
+
230
+ if __name__ == "__main__":
231
+ main()
@@ -0,0 +1,255 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ SEO Parse — HTML parser for SEO element extraction.
4
+
5
+ Extracts: title, meta tags, canonicals, headings, images, links (internal/external),
6
+ schema (JSON-LD), Open Graph, Twitter Cards, hreflang, word count, text/code ratio.
7
+
8
+ Author: Laurent Rochetta
9
+ License: MIT
10
+ """
11
+
12
+ import argparse
13
+ import json
14
+ import os
15
+ import re
16
+ import sys
17
+ from typing import Optional
18
+ from urllib.parse import urljoin, urlparse
19
+
20
+ try:
21
+ from bs4 import BeautifulSoup
22
+ except ImportError:
23
+ print("Error: beautifulsoup4 required. Install: pip install beautifulsoup4", file=sys.stderr)
24
+ sys.exit(1)
25
+
26
+ # Use lxml if available for speed, fallback to html.parser
27
+ try:
28
+ import lxml # noqa: F401
29
+ HTML_PARSER = "lxml"
30
+ except ImportError:
31
+ HTML_PARSER = "html.parser"
32
+
33
+
34
+ def parse_html(html: str, base_url: Optional[str] = None) -> dict:
35
+ """
36
+ Parse HTML and extract all SEO-relevant elements.
37
+
38
+ Args:
39
+ html: Raw HTML content
40
+ base_url: Base URL for resolving relative links
41
+
42
+ Returns:
43
+ Comprehensive dictionary of SEO data
44
+ """
45
+ soup = BeautifulSoup(html, HTML_PARSER)
46
+
47
+ result = {
48
+ "title": None,
49
+ "title_length": 0,
50
+ "meta_description": None,
51
+ "meta_description_length": 0,
52
+ "meta_robots": None,
53
+ "meta_viewport": None,
54
+ "canonical": None,
55
+ "headings": {"h1": [], "h2": [], "h3": [], "h4": []},
56
+ "images": [],
57
+ "links": {"internal": [], "external": [], "broken_candidates": []},
58
+ "schema_blocks": [],
59
+ "open_graph": {},
60
+ "twitter_card": {},
61
+ "hreflang": [],
62
+ "word_count": 0,
63
+ "html_size_bytes": len(html.encode("utf-8")),
64
+ "text_ratio": 0.0,
65
+ "has_lang_attr": False,
66
+ "lang": None,
67
+ "scripts_count": 0,
68
+ "stylesheets_count": 0,
69
+ "dom_depth_estimate": 0,
70
+ "security_headers_hints": {},
71
+ }
72
+
73
+ # ── Title ──
74
+ title_tag = soup.find("title")
75
+ if title_tag:
76
+ result["title"] = title_tag.get_text(strip=True)
77
+ result["title_length"] = len(result["title"])
78
+
79
+ # ── Meta Tags ──
80
+ for meta in soup.find_all("meta"):
81
+ name = (meta.get("name") or "").lower()
82
+ property_attr = (meta.get("property") or "").lower()
83
+ content = meta.get("content", "")
84
+
85
+ if name == "description":
86
+ result["meta_description"] = content
87
+ result["meta_description_length"] = len(content)
88
+ elif name == "robots":
89
+ result["meta_robots"] = content
90
+ elif name == "viewport":
91
+ result["meta_viewport"] = content
92
+
93
+ # Open Graph
94
+ if property_attr.startswith("og:"):
95
+ result["open_graph"][property_attr] = content
96
+
97
+ # Twitter Card
98
+ if name.startswith("twitter:"):
99
+ result["twitter_card"][name] = content
100
+
101
+ # ── Language ──
102
+ html_tag = soup.find("html")
103
+ if html_tag and html_tag.get("lang"):
104
+ result["has_lang_attr"] = True
105
+ result["lang"] = html_tag.get("lang")
106
+
107
+ # ── Canonical ──
108
+ canonical = soup.find("link", rel="canonical")
109
+ if canonical:
110
+ result["canonical"] = canonical.get("href")
111
+
112
+ # ── Hreflang ──
113
+ for link in soup.find_all("link", rel="alternate"):
114
+ hreflang = link.get("hreflang")
115
+ if hreflang:
116
+ result["hreflang"].append({
117
+ "lang": hreflang,
118
+ "href": link.get("href"),
119
+ })
120
+
121
+ # ── Headings ──
122
+ for level in ["h1", "h2", "h3", "h4"]:
123
+ for tag in soup.find_all(level):
124
+ text = tag.get_text(strip=True)
125
+ if text:
126
+ result["headings"][level].append(text)
127
+
128
+ # ── Images ──
129
+ for img in soup.find_all("img"):
130
+ src = img.get("src", "")
131
+ if base_url and src:
132
+ src = urljoin(base_url, src)
133
+
134
+ has_alt = img.get("alt") is not None
135
+ alt_text = img.get("alt", "")
136
+ has_dimensions = bool(img.get("width") and img.get("height"))
137
+
138
+ result["images"].append({
139
+ "src": src,
140
+ "alt": alt_text,
141
+ "has_alt": has_alt,
142
+ "alt_empty": has_alt and alt_text.strip() == "",
143
+ "width": img.get("width"),
144
+ "height": img.get("height"),
145
+ "has_dimensions": has_dimensions,
146
+ "loading": img.get("loading"),
147
+ "srcset": img.get("srcset") is not None,
148
+ })
149
+
150
+ # ── Links ──
151
+ if base_url:
152
+ base_domain = urlparse(base_url).netloc
153
+
154
+ for a in soup.find_all("a", href=True):
155
+ href = a.get("href", "")
156
+ if not href or href.startswith("#") or href.startswith("javascript:"):
157
+ continue
158
+
159
+ full_url = urljoin(base_url, href)
160
+ parsed = urlparse(full_url)
161
+
162
+ link_data = {
163
+ "href": full_url,
164
+ "text": a.get_text(strip=True)[:100],
165
+ "rel": a.get("rel", []),
166
+ "is_nofollow": "nofollow" in (a.get("rel") or []),
167
+ "target": a.get("target"),
168
+ }
169
+
170
+ if parsed.netloc == base_domain:
171
+ result["links"]["internal"].append(link_data)
172
+ else:
173
+ result["links"]["external"].append(link_data)
174
+
175
+ # ── Schema (JSON-LD) ──
176
+ for script in soup.find_all("script", type="application/ld+json"):
177
+ try:
178
+ schema_data = json.loads(script.string)
179
+ if isinstance(schema_data, dict):
180
+ result["schema_blocks"].append({
181
+ "type": schema_data.get("@type", "unknown"),
182
+ "data": schema_data,
183
+ })
184
+ elif isinstance(schema_data, list):
185
+ for item in schema_data:
186
+ if isinstance(item, dict):
187
+ result["schema_blocks"].append({
188
+ "type": item.get("@type", "unknown"),
189
+ "data": item,
190
+ })
191
+ except (json.JSONDecodeError, TypeError):
192
+ result["schema_blocks"].append({"type": "PARSE_ERROR", "data": None})
193
+
194
+ # ── Resource Counts ──
195
+ result["scripts_count"] = len(soup.find_all("script"))
196
+ result["stylesheets_count"] = len(soup.find_all("link", rel="stylesheet"))
197
+
198
+ # ── Word Count & Text Ratio ──
199
+ text_soup = BeautifulSoup(html, HTML_PARSER)
200
+ for element in text_soup(["script", "style", "nav", "footer", "header", "noscript"]):
201
+ element.decompose()
202
+
203
+ visible_text = text_soup.get_text(separator=" ", strip=True)
204
+ words = re.findall(r"\b\w+\b", visible_text)
205
+ result["word_count"] = len(words)
206
+
207
+ text_bytes = len(visible_text.encode("utf-8"))
208
+ if result["html_size_bytes"] > 0:
209
+ result["text_ratio"] = round(text_bytes / result["html_size_bytes"], 3)
210
+
211
+ return result
212
+
213
+
214
+ # ── CLI ────────────────────────────────────────────────────────────
215
+
216
+ def main():
217
+ parser = argparse.ArgumentParser(
218
+ description="SEO Parse — HTML parser for SEO analysis (BMAD+ SEO Engine)"
219
+ )
220
+ parser.add_argument("file", nargs="?", help="HTML file to parse")
221
+ parser.add_argument("--url", "-u", help="Base URL for resolving relative links")
222
+ parser.add_argument("--json", "-j", action="store_true", help="Output as JSON")
223
+
224
+ args = parser.parse_args()
225
+
226
+ if args.file:
227
+ real_path = os.path.realpath(args.file)
228
+ if not os.path.isfile(real_path):
229
+ print(f"Error: File not found: {args.file}", file=sys.stderr)
230
+ sys.exit(1)
231
+ with open(real_path, "r", encoding="utf-8") as f:
232
+ html = f.read()
233
+ else:
234
+ html = sys.stdin.read()
235
+
236
+ result = parse_html(html, args.url)
237
+
238
+ if args.json:
239
+ print(json.dumps(result, indent=2, ensure_ascii=False))
240
+ else:
241
+ print(f"Title: {result['title']} ({result['title_length']} chars)")
242
+ print(f"Meta Description: {result['meta_description'][:80] + '...' if result['meta_description'] and len(result['meta_description']) > 80 else result['meta_description']}")
243
+ print(f"Canonical: {result['canonical']}")
244
+ print(f"Language: {result['lang']}")
245
+ print(f"H1: {len(result['headings']['h1'])} | H2: {len(result['headings']['h2'])} | H3: {len(result['headings']['h3'])}")
246
+ print(f"Images: {len(result['images'])} (missing alt: {sum(1 for i in result['images'] if not i['has_alt'])})")
247
+ print(f"Internal Links: {len(result['links']['internal'])} | External: {len(result['links']['external'])}")
248
+ print(f"Schema Blocks: {len(result['schema_blocks'])} ({', '.join(s['type'] for s in result['schema_blocks'])})")
249
+ print(f"Word Count: {result['word_count']:,}")
250
+ print(f"Text/HTML Ratio: {result['text_ratio']:.1%}")
251
+ print(f"Scripts: {result['scripts_count']} | Stylesheets: {result['stylesheets_count']}")
252
+
253
+
254
+ if __name__ == "__main__":
255
+ main()
@@ -0,0 +1,202 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ SEO Screenshot — Viewport screenshot capture for visual SEO analysis.
4
+
5
+ Features:
6
+ - Mobile and desktop viewport presets
7
+ - Above-the-fold element detection
8
+ - Full-page capture option
9
+ - PNG output with configurable quality
10
+
11
+ Requires: playwright (pip install playwright && playwright install chromium)
12
+
13
+ Author: Laurent Rochetta
14
+ License: MIT
15
+ """
16
+
17
+ import argparse
18
+ import sys
19
+
20
+
21
+ VIEWPORTS = {
22
+ "mobile": {"width": 375, "height": 812, "device_scale_factor": 3, "is_mobile": True},
23
+ "tablet": {"width": 768, "height": 1024, "device_scale_factor": 2, "is_mobile": True},
24
+ "desktop": {"width": 1440, "height": 900, "device_scale_factor": 1, "is_mobile": False},
25
+ "desktop-hd": {"width": 1920, "height": 1080, "device_scale_factor": 1, "is_mobile": False},
26
+ }
27
+
28
+
29
+ def capture_screenshot(
30
+ url: str,
31
+ output: str = "screenshot.png",
32
+ viewport: str = "desktop",
33
+ full_page: bool = False,
34
+ wait_ms: int = 2000,
35
+ ):
36
+ """
37
+ Capture a viewport screenshot of a URL using Playwright.
38
+
39
+ Args:
40
+ url: URL to capture
41
+ output: Output file path (.png)
42
+ viewport: Viewport preset (mobile, tablet, desktop, desktop-hd)
43
+ full_page: Capture full page scroll or just viewport
44
+ wait_ms: Wait time after page load (ms)
45
+ """
46
+ try:
47
+ from playwright.sync_api import sync_playwright
48
+ except ImportError:
49
+ print(
50
+ "Error: playwright required.\n"
51
+ "Install: pip install playwright && playwright install chromium",
52
+ file=sys.stderr,
53
+ )
54
+ sys.exit(1)
55
+
56
+ vp = VIEWPORTS.get(viewport, VIEWPORTS["desktop"])
57
+
58
+ with sync_playwright() as p:
59
+ browser = p.chromium.launch(headless=True)
60
+ context = browser.new_context(
61
+ viewport={"width": vp["width"], "height": vp["height"]},
62
+ device_scale_factor=vp["device_scale_factor"],
63
+ is_mobile=vp["is_mobile"],
64
+ user_agent=(
65
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) "
66
+ "AppleWebKit/605.1.15 Mobile/15E148 Safari/604.1"
67
+ if vp["is_mobile"]
68
+ else "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
69
+ "(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 BMADSEOEngine/2.0"
70
+ ),
71
+ )
72
+
73
+ page = context.new_page()
74
+
75
+ try:
76
+ page.goto(url, wait_until="networkidle", timeout=30000)
77
+ except Exception:
78
+ # Fallback: wait for load event instead
79
+ page.goto(url, wait_until="load", timeout=30000)
80
+
81
+ # Wait for dynamic content
82
+ page.wait_for_timeout(wait_ms)
83
+
84
+ # Capture screenshot
85
+ page.screenshot(path=output, full_page=full_page)
86
+
87
+ # Gather above-the-fold metrics
88
+ metrics = page.evaluate("""() => {
89
+ const viewportHeight = window.innerHeight;
90
+ const viewportWidth = window.innerWidth;
91
+
92
+ // Find CTAs above the fold
93
+ const ctas = [];
94
+ const buttons = document.querySelectorAll('a, button, [role="button"]');
95
+ buttons.forEach(el => {
96
+ const rect = el.getBoundingClientRect();
97
+ if (rect.top < viewportHeight && rect.bottom > 0) {
98
+ const text = el.textContent.trim().substring(0, 50);
99
+ if (text && (
100
+ /sign.?up|get.?start|try|buy|contact|demo|free|download|subscribe/i.test(text)
101
+ )) {
102
+ ctas.push({
103
+ text: text,
104
+ tag: el.tagName,
105
+ top: Math.round(rect.top),
106
+ visible: rect.width > 0 && rect.height > 0,
107
+ });
108
+ }
109
+ }
110
+ });
111
+
112
+ // Find hero/LCP candidate
113
+ const images = document.querySelectorAll('img');
114
+ let largestImage = null;
115
+ let largestArea = 0;
116
+ images.forEach(img => {
117
+ const rect = img.getBoundingClientRect();
118
+ const area = rect.width * rect.height;
119
+ if (area > largestArea && rect.top < viewportHeight) {
120
+ largestArea = area;
121
+ largestImage = {
122
+ src: img.src.substring(0, 100),
123
+ width: Math.round(rect.width),
124
+ height: Math.round(rect.height),
125
+ top: Math.round(rect.top),
126
+ };
127
+ }
128
+ });
129
+
130
+ // Check for horizontal scroll
131
+ const hasHorizontalScroll = document.documentElement.scrollWidth > viewportWidth;
132
+
133
+ // Font size check
134
+ const body = document.body;
135
+ const bodyFontSize = body ? parseFloat(getComputedStyle(body).fontSize) : 16;
136
+
137
+ return {
138
+ viewportWidth,
139
+ viewportHeight,
140
+ ctas_above_fold: ctas.length,
141
+ cta_details: ctas.slice(0, 5),
142
+ largest_image_above_fold: largestImage,
143
+ has_horizontal_scroll: hasHorizontalScroll,
144
+ body_font_size_px: bodyFontSize,
145
+ dom_element_count: document.querySelectorAll('*').length,
146
+ };
147
+ }""")
148
+
149
+ browser.close()
150
+
151
+ return metrics
152
+
153
+
154
+ # ── CLI ────────────────────────────────────────────────────────────
155
+
156
+ def main():
157
+ parser = argparse.ArgumentParser(
158
+ description="SEO Screenshot — Viewport capture (BMAD+ SEO Engine)"
159
+ )
160
+ parser.add_argument("url", help="URL to capture")
161
+ parser.add_argument("--output", "-o", default="screenshot.png", help="Output file path")
162
+ parser.add_argument(
163
+ "--viewport", "-v",
164
+ choices=list(VIEWPORTS.keys()), default="desktop",
165
+ help="Viewport preset"
166
+ )
167
+ parser.add_argument("--full", action="store_true", help="Capture full page (not just viewport)")
168
+ parser.add_argument("--wait", "-w", type=int, default=2000, help="Wait after load (ms)")
169
+ parser.add_argument("--json", "-j", action="store_true", help="Output metrics as JSON")
170
+
171
+ args = parser.parse_args()
172
+
173
+ import json
174
+
175
+ metrics = capture_screenshot(
176
+ url=args.url,
177
+ output=args.output,
178
+ viewport=args.viewport,
179
+ full_page=args.full,
180
+ wait_ms=args.wait,
181
+ )
182
+
183
+ print(f"Screenshot saved: {args.output}", file=sys.stderr)
184
+
185
+ if args.json:
186
+ print(json.dumps(metrics, indent=2))
187
+ else:
188
+ print(f"\nAbove-the-Fold Analysis ({args.viewport}):")
189
+ print(f" Viewport: {metrics['viewportWidth']}×{metrics['viewportHeight']}")
190
+ print(f" CTAs above fold: {metrics['ctas_above_fold']}")
191
+ for cta in metrics.get("cta_details", []):
192
+ print(f" - \"{cta['text']}\" ({cta['tag']}, top: {cta['top']}px)")
193
+ if metrics.get("largest_image_above_fold"):
194
+ img = metrics["largest_image_above_fold"]
195
+ print(f" Largest image: {img['width']}×{img['height']} at y={img['top']}px")
196
+ print(f" Horizontal scroll: {'⚠️ YES' if metrics['has_horizontal_scroll'] else '✅ No'}")
197
+ print(f" Body font size: {metrics['body_font_size_px']}px {'✅' if metrics['body_font_size_px'] >= 16 else '⚠️ <16px'}")
198
+ print(f" DOM elements: {metrics['dom_element_count']:,}")
199
+
200
+
201
+ if __name__ == "__main__":
202
+ main()