bmad-plus 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,403 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ SEO Report — Professional HTML audit report generator.
4
+
5
+ Features:
6
+ - Single-file HTML with inline CSS (no external deps)
7
+ - SVG radar chart for score visualization
8
+ - Color-coded issue cards (Critical/High/Medium/Low)
9
+ - Quick Wins section
10
+ - Print-friendly (@media print)
11
+ - Responsive (mobile-readable)
12
+
13
+ Author: Laurent Rochetta
14
+ License: MIT
15
+ """
16
+
17
+ import argparse
18
+ import json
19
+ import math
20
+ import os
21
+ import sys
22
+ from datetime import datetime
23
+
24
+
25
+ def generate_radar_svg(scores: dict, size: int = 300) -> str:
26
+ """Generate an SVG radar chart for the 7 score categories."""
27
+ categories = list(scores.keys())
28
+ values = list(scores.values())
29
+ n = len(categories)
30
+
31
+ if n == 0:
32
+ return ""
33
+
34
+ cx, cy = size // 2, size // 2
35
+ radius = size // 2 - 40
36
+
37
+ # Short labels for display
38
+ short_labels = {
39
+ "technical": "Tech",
40
+ "content_eeat": "E-E-A-T",
41
+ "on_page": "On-Page",
42
+ "schema": "Schema",
43
+ "performance": "Perf",
44
+ "ai_readiness": "AI/GEO",
45
+ "images": "Images",
46
+ }
47
+
48
+ def point(angle_deg, r):
49
+ angle_rad = math.radians(angle_deg - 90)
50
+ x = cx + r * math.cos(angle_rad)
51
+ y = cy + r * math.sin(angle_rad)
52
+ return x, y
53
+
54
+ svg_parts = [f'<svg viewBox="0 0 {size} {size}" xmlns="http://www.w3.org/2000/svg" style="max-width:{size}px;margin:auto;display:block;">']
55
+
56
+ # Background circles
57
+ for pct in [25, 50, 75, 100]:
58
+ r = radius * pct / 100
59
+ svg_parts.append(f'<circle cx="{cx}" cy="{cy}" r="{r}" fill="none" stroke="#e2e8f0" stroke-width="1" opacity="0.5"/>')
60
+
61
+ # Axis lines + labels
62
+ for i in range(n):
63
+ angle = (360 / n) * i
64
+ x1, y1 = point(angle, 0)
65
+ x2, y2 = point(angle, radius)
66
+ svg_parts.append(f'<line x1="{cx}" y1="{cy}" x2="{x2}" y2="{y2}" stroke="#e2e8f0" stroke-width="1"/>')
67
+
68
+ lx, ly = point(angle, radius + 20)
69
+ label = short_labels.get(categories[i], categories[i][:6])
70
+ svg_parts.append(f'<text x="{lx}" y="{ly}" text-anchor="middle" font-size="11" fill="#64748b" font-family="Inter,sans-serif">{label}</text>')
71
+
72
+ # Data polygon
73
+ data_points = []
74
+ for i in range(n):
75
+ angle = (360 / n) * i
76
+ r = radius * min(values[i], 100) / 100
77
+ x, y = point(angle, r)
78
+ data_points.append(f"{x},{y}")
79
+
80
+ poly = " ".join(data_points)
81
+ svg_parts.append(f'<polygon points="{poly}" fill="rgba(59,130,246,0.2)" stroke="#3b82f6" stroke-width="2"/>')
82
+
83
+ # Data points
84
+ for i in range(n):
85
+ angle = (360 / n) * i
86
+ r = radius * min(values[i], 100) / 100
87
+ x, y = point(angle, r)
88
+ color = "#22c55e" if values[i] >= 80 else "#f59e0b" if values[i] >= 50 else "#ef4444"
89
+ svg_parts.append(f'<circle cx="{x}" cy="{y}" r="4" fill="{color}" stroke="white" stroke-width="2"/>')
90
+
91
+ svg_parts.append('</svg>')
92
+ return "\n".join(svg_parts)
93
+
94
+
95
+ def severity_color(severity: str) -> str:
96
+ """Get color for severity level."""
97
+ return {
98
+ "critical": "#ef4444",
99
+ "high": "#f97316",
100
+ "medium": "#f59e0b",
101
+ "low": "#22c55e",
102
+ }.get(severity, "#64748b")
103
+
104
+
105
+ def severity_icon(severity: str) -> str:
106
+ return {
107
+ "critical": "🔴",
108
+ "high": "🟠",
109
+ "medium": "🟡",
110
+ "low": "🟢",
111
+ }.get(severity, "⚪")
112
+
113
+
114
+ def score_color(score: int) -> str:
115
+ if score >= 90:
116
+ return "#22c55e"
117
+ elif score >= 70:
118
+ return "#84cc16"
119
+ elif score >= 50:
120
+ return "#f59e0b"
121
+ else:
122
+ return "#ef4444"
123
+
124
+
125
+ def generate_html_report(audit_data: dict) -> str:
126
+ """Generate a complete HTML report from audit JSON data."""
127
+
128
+ domain = audit_data.get("domain", "Unknown")
129
+ timestamp = audit_data.get("timestamp", datetime.now().isoformat())
130
+ total_score = audit_data.get("score", {}).get("total", 0)
131
+ categories = audit_data.get("score", {}).get("categories", {})
132
+ issues = audit_data.get("issues", [])
133
+ pages = audit_data.get("pages", [])
134
+
135
+ # Generate radar chart
136
+ radar_svg = generate_radar_svg(categories) if categories else ""
137
+
138
+ # Sort issues by severity
139
+ severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
140
+ sorted_issues = sorted(issues, key=lambda x: severity_order.get(x.get("severity", "low"), 4))
141
+
142
+ # Count by severity
143
+ counts = {"critical": 0, "high": 0, "medium": 0, "low": 0}
144
+ for issue in issues:
145
+ sev = issue.get("severity", "low")
146
+ counts[sev] = counts.get(sev, 0) + 1
147
+
148
+ # Quick wins
149
+ quick_wins = [i for i in issues if i.get("quick_win", False)][:5]
150
+
151
+ # Build issue cards HTML
152
+ issue_cards = ""
153
+ for issue in sorted_issues:
154
+ sev = issue.get("severity", "low")
155
+ fix_html = ""
156
+ if issue.get("fix"):
157
+ fix_html = f'<div class="fix-block"><strong>Fix:</strong><pre><code>{issue["fix"]}</code></pre></div>'
158
+
159
+ issue_cards += f'''
160
+ <div class="issue-card" style="border-left: 4px solid {severity_color(sev)}">
161
+ <div class="issue-header">
162
+ <span class="severity-badge" style="background:{severity_color(sev)}">{sev.upper()}</span>
163
+ <span class="issue-category">{issue.get("category", "")}</span>
164
+ </div>
165
+ <h4>{issue.get("title", "")}</h4>
166
+ <p>{issue.get("description", "")}</p>
167
+ {fix_html}
168
+ </div>'''
169
+
170
+ # Quick wins HTML
171
+ qw_html = ""
172
+ if quick_wins:
173
+ qw_items = ""
174
+ for qw in quick_wins:
175
+ qw_items += f'<li>{severity_icon(qw.get("severity", ""))} {qw.get("title", "")}</li>'
176
+ qw_html = f'<div class="quick-wins"><h3>⚡ Quick Wins</h3><ul>{qw_items}</ul></div>'
177
+
178
+ # Category scores table
179
+ cat_rows = ""
180
+ for cat, score in categories.items():
181
+ cat_name = cat.replace("_", " ").title()
182
+ cat_rows += f'''
183
+ <tr>
184
+ <td>{cat_name}</td>
185
+ <td>
186
+ <div class="score-bar-bg">
187
+ <div class="score-bar" style="width:{score}%;background:{score_color(score)}"></div>
188
+ </div>
189
+ </td>
190
+ <td style="color:{score_color(score)};font-weight:700">{score}</td>
191
+ </tr>'''
192
+
193
+ html = f'''<!DOCTYPE html>
194
+ <html lang="en">
195
+ <head>
196
+ <meta charset="UTF-8">
197
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
198
+ <title>SEO Audit Report — {domain}</title>
199
+ <style>
200
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
201
+
202
+ * {{ margin: 0; padding: 0; box-sizing: border-box; }}
203
+ body {{
204
+ font-family: 'Inter', -apple-system, sans-serif;
205
+ background: #f8fafc;
206
+ color: #1e293b;
207
+ line-height: 1.6;
208
+ }}
209
+ .container {{ max-width: 900px; margin: 0 auto; padding: 2rem; }}
210
+
211
+ /* Header */
212
+ .header {{
213
+ background: linear-gradient(135deg, #0f172a 0%, #1e3a5f 100%);
214
+ color: white;
215
+ padding: 3rem 2rem;
216
+ border-radius: 16px;
217
+ margin-bottom: 2rem;
218
+ text-align: center;
219
+ }}
220
+ .header h1 {{ font-size: 2rem; margin-bottom: 0.5rem; }}
221
+ .header .domain {{ font-size: 1.2rem; opacity: 0.8; }}
222
+ .header .date {{ font-size: 0.85rem; opacity: 0.6; margin-top: 0.5rem; }}
223
+
224
+ /* Score circle */
225
+ .score-hero {{
226
+ display: flex;
227
+ align-items: center;
228
+ justify-content: center;
229
+ gap: 3rem;
230
+ margin: 2rem 0;
231
+ flex-wrap: wrap;
232
+ }}
233
+ .score-circle {{
234
+ width: 150px;
235
+ height: 150px;
236
+ border-radius: 50%;
237
+ display: flex;
238
+ flex-direction: column;
239
+ align-items: center;
240
+ justify-content: center;
241
+ border: 6px solid {score_color(total_score)};
242
+ background: white;
243
+ box-shadow: 0 4px 24px rgba(0,0,0,0.08);
244
+ }}
245
+ .score-number {{ font-size: 3rem; font-weight: 700; color: {score_color(total_score)}; }}
246
+ .score-label {{ font-size: 0.75rem; text-transform: uppercase; color: #64748b; letter-spacing: 1px; }}
247
+
248
+ /* Summary cards */
249
+ .summary-grid {{
250
+ display: grid;
251
+ grid-template-columns: repeat(4, 1fr);
252
+ gap: 1rem;
253
+ margin-bottom: 2rem;
254
+ }}
255
+ .summary-card {{
256
+ background: white;
257
+ border-radius: 12px;
258
+ padding: 1.2rem;
259
+ text-align: center;
260
+ box-shadow: 0 2px 8px rgba(0,0,0,0.04);
261
+ }}
262
+ .summary-card .count {{ font-size: 2rem; font-weight: 700; }}
263
+ .summary-card .label {{ font-size: 0.8rem; color: #64748b; }}
264
+
265
+ /* Sections */
266
+ .section {{ background: white; border-radius: 12px; padding: 2rem; margin-bottom: 1.5rem; box-shadow: 0 2px 8px rgba(0,0,0,0.04); }}
267
+ .section h2 {{ margin-bottom: 1rem; font-size: 1.3rem; }}
268
+ .section h3 {{ margin-bottom: 0.8rem; font-size: 1.1rem; }}
269
+
270
+ /* Score bars */
271
+ table {{ width: 100%; border-collapse: collapse; }}
272
+ td {{ padding: 0.6rem 0; }}
273
+ .score-bar-bg {{ width: 100%; height: 8px; background: #e2e8f0; border-radius: 4px; overflow: hidden; margin: 0 1rem; }}
274
+ .score-bar {{ height: 100%; border-radius: 4px; transition: width 0.5s ease; }}
275
+
276
+ /* Issue cards */
277
+ .issue-card {{
278
+ border: 1px solid #e2e8f0;
279
+ border-radius: 8px;
280
+ padding: 1rem;
281
+ margin-bottom: 0.8rem;
282
+ }}
283
+ .issue-header {{ display: flex; gap: 0.5rem; margin-bottom: 0.3rem; align-items: center; }}
284
+ .severity-badge {{ color: white; padding: 2px 8px; border-radius: 4px; font-size: 0.7rem; font-weight: 600; }}
285
+ .issue-category {{ font-size: 0.8rem; color: #64748b; }}
286
+ .issue-card h4 {{ margin-bottom: 0.3rem; }}
287
+ .issue-card p {{ color: #475569; font-size: 0.9rem; }}
288
+ .fix-block {{ background: #f1f5f9; border-radius: 6px; padding: 0.8rem; margin-top: 0.5rem; }}
289
+ .fix-block pre {{ overflow-x: auto; font-size: 0.8rem; }}
290
+
291
+ /* Quick wins */
292
+ .quick-wins {{ background: #f0fdf4; border: 1px solid #bbf7d0; border-radius: 12px; padding: 1.5rem; margin-bottom: 1.5rem; }}
293
+ .quick-wins ul {{ list-style: none; padding: 0; }}
294
+ .quick-wins li {{ padding: 0.3rem 0; }}
295
+
296
+ /* Footer */
297
+ .footer {{ text-align: center; color: #94a3b8; font-size: 0.8rem; padding: 2rem 0; }}
298
+
299
+ /* Print */
300
+ @media print {{
301
+ body {{ background: white; }}
302
+ .container {{ max-width: 100%; padding: 0; }}
303
+ .header {{ break-after: avoid; }}
304
+ .section {{ break-inside: avoid; box-shadow: none; border: 1px solid #e2e8f0; }}
305
+ }}
306
+
307
+ /* Mobile */
308
+ @media (max-width: 640px) {{
309
+ .summary-grid {{ grid-template-columns: repeat(2, 1fr); }}
310
+ .score-hero {{ flex-direction: column; gap: 1.5rem; }}
311
+ }}
312
+ </style>
313
+ </head>
314
+ <body>
315
+ <div class="container">
316
+ <div class="header">
317
+ <h1>SEO Audit Report</h1>
318
+ <div class="domain">{domain}</div>
319
+ <div class="date">{timestamp[:10]}</div>
320
+ </div>
321
+
322
+ <div class="score-hero">
323
+ <div class="score-circle">
324
+ <div class="score-number">{total_score}</div>
325
+ <div class="score-label">SEO Score</div>
326
+ </div>
327
+ <div>
328
+ {radar_svg}
329
+ </div>
330
+ </div>
331
+
332
+ <div class="summary-grid">
333
+ <div class="summary-card">
334
+ <div class="count" style="color:#ef4444">{counts["critical"]}</div>
335
+ <div class="label">Critical</div>
336
+ </div>
337
+ <div class="summary-card">
338
+ <div class="count" style="color:#f97316">{counts["high"]}</div>
339
+ <div class="label">High</div>
340
+ </div>
341
+ <div class="summary-card">
342
+ <div class="count" style="color:#f59e0b">{counts["medium"]}</div>
343
+ <div class="label">Medium</div>
344
+ </div>
345
+ <div class="summary-card">
346
+ <div class="count" style="color:#22c55e">{counts["low"]}</div>
347
+ <div class="label">Low</div>
348
+ </div>
349
+ </div>
350
+
351
+ {qw_html}
352
+
353
+ <div class="section">
354
+ <h2>📊 Category Scores</h2>
355
+ <table>{cat_rows}</table>
356
+ </div>
357
+
358
+ <div class="section">
359
+ <h2>🔍 Issues ({len(issues)})</h2>
360
+ {issue_cards}
361
+ </div>
362
+
363
+ <div class="footer">
364
+ Generated by BMAD+ SEO Engine v2.1 — By Laurent Rochetta
365
+ </div>
366
+ </div>
367
+ </body>
368
+ </html>'''
369
+
370
+ return html
371
+
372
+
373
+ # ── CLI ────────────────────────────────────────────────────────────
374
+
375
+ def main():
376
+ parser = argparse.ArgumentParser(
377
+ description="SEO Report — HTML audit report generator (BMAD+ SEO Engine)"
378
+ )
379
+ parser.add_argument("input", help="Audit JSON file")
380
+ parser.add_argument("--output", "-o", default="seo-report.html", help="Output HTML file")
381
+
382
+ args = parser.parse_args()
383
+
384
+ if not os.path.isfile(args.input):
385
+ print(f"Error: File not found: {args.input}", file=sys.stderr)
386
+ sys.exit(1)
387
+
388
+ with open(args.input, "r", encoding="utf-8") as f:
389
+ audit_data = json.load(f)
390
+
391
+ html = generate_html_report(audit_data)
392
+
393
+ with open(args.output, "w", encoding="utf-8") as f:
394
+ f.write(html)
395
+
396
+ print(f"✅ Report generated: {args.output}", file=sys.stderr)
397
+ print(f" Domain: {audit_data.get('domain', 'Unknown')}")
398
+ print(f" Score: {audit_data.get('score', {}).get('total', 0)}/100")
399
+ print(f" Issues: {len(audit_data.get('issues', []))}")
400
+
401
+
402
+ if __name__ == "__main__":
403
+ main()
@@ -0,0 +1,62 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <title>SEO Test Page — BMAD+ Fixture</title>
6
+ <meta name="description" content="A test page for validating the SEO parse module with known elements.">
7
+ <meta name="robots" content="index, follow">
8
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
9
+ <meta property="og:title" content="SEO Test Page">
10
+ <meta property="og:type" content="website">
11
+ <meta property="og:url" content="https://example.com/test">
12
+ <meta name="twitter:card" content="summary_large_image">
13
+ <meta name="twitter:title" content="SEO Test Page">
14
+ <link rel="canonical" href="https://example.com/test">
15
+ <link rel="alternate" hreflang="en" href="https://example.com/en/test">
16
+ <link rel="alternate" hreflang="fr" href="https://example.com/fr/test">
17
+ <link rel="alternate" hreflang="x-default" href="https://example.com/test">
18
+ </head>
19
+ <body>
20
+ <h1>Main Heading of the Page</h1>
21
+ <p>This is a test paragraph with enough words to verify word count functionality in the parser module. We need at least a few sentences to make the test meaningful and realistic.</p>
22
+
23
+ <h2>Second Level Heading One</h2>
24
+ <p>Content under the first H2. This paragraph adds more text to increase the word count.</p>
25
+
26
+ <h2>Second Level Heading Two</h2>
27
+ <p>Another section with different content about SEO analysis and testing.</p>
28
+
29
+ <h3>Third Level Heading</h3>
30
+ <p>Detailed information under the H3 heading for testing hierarchy detection.</p>
31
+
32
+ <img src="/images/hero.jpg" alt="Hero image for testing" width="800" height="400" loading="lazy">
33
+ <img src="/images/no-alt.jpg" width="200" height="200">
34
+ <img src="/images/empty-alt.jpg" alt="" width="100" height="100">
35
+
36
+ <a href="https://example.com/about">About Us</a>
37
+ <a href="https://example.com/services">Our Services</a>
38
+ <a href="https://external.com/partner" rel="nofollow" target="_blank">Partner Link</a>
39
+ <a href="/relative-link">Relative Link</a>
40
+
41
+ <script type="application/ld+json">
42
+ {
43
+ "@context": "https://schema.org",
44
+ "@type": "Organization",
45
+ "name": "Test Company",
46
+ "url": "https://example.com",
47
+ "logo": "https://example.com/logo.png"
48
+ }
49
+ </script>
50
+
51
+ <script type="application/ld+json">
52
+ {
53
+ "@context": "https://schema.org",
54
+ "@type": "BreadcrumbList",
55
+ "itemListElement": [
56
+ {"@type": "ListItem", "position": 1, "name": "Home", "item": "https://example.com"},
57
+ {"@type": "ListItem", "position": 2, "name": "Test", "item": "https://example.com/test"}
58
+ ]
59
+ }
60
+ </script>
61
+ </body>
62
+ </html>
@@ -0,0 +1,75 @@
1
+ """
2
+ Tests for seo_apis.py — API response parsing and error handling.
3
+
4
+ Author: Laurent Rochetta
5
+ """
6
+
7
+ import sys
8
+ import os
9
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "scripts"))
10
+
11
+ # Temporarily unset API key for error tests
12
+ original_key = os.environ.get("GOOGLE_API_KEY", "")
13
+
14
+
15
+ class TestAPIKeyMissing:
16
+ """Test behavior when GOOGLE_API_KEY is not set."""
17
+
18
+ def setup_method(self):
19
+ os.environ.pop("GOOGLE_API_KEY", None)
20
+ # Reimport to pick up empty key
21
+ import importlib
22
+ import seo_apis
23
+ importlib.reload(seo_apis)
24
+ self.seo_apis = seo_apis
25
+
26
+ def teardown_method(self):
27
+ if original_key:
28
+ os.environ["GOOGLE_API_KEY"] = original_key
29
+
30
+ def test_pagespeed_without_key(self):
31
+ # Force the module to use an empty key
32
+ self.seo_apis.API_KEY = ""
33
+ result = self.seo_apis.run_pagespeed("https://example.com")
34
+ assert result.get("error") is not None
35
+ assert "GOOGLE_API_KEY" in result["error"]
36
+
37
+ def test_crux_without_key(self):
38
+ self.seo_apis.API_KEY = ""
39
+ result = self.seo_apis.run_crux("https://example.com")
40
+ assert result.get("error") is not None
41
+
42
+ def test_rich_results_without_key(self):
43
+ self.seo_apis.API_KEY = ""
44
+ result = self.seo_apis.run_rich_results_test("https://example.com")
45
+ assert result.get("error") is not None
46
+
47
+
48
+ class TestResultStructure:
49
+ """Test that API functions return expected structures."""
50
+
51
+ def setup_method(self):
52
+ import importlib
53
+ import seo_apis
54
+ importlib.reload(seo_apis)
55
+ self.seo_apis = seo_apis
56
+
57
+ def test_pagespeed_result_keys(self):
58
+ self.seo_apis.API_KEY = ""
59
+ result = self.seo_apis.run_pagespeed("https://example.com")
60
+ # Even on error, should have expected structure
61
+ assert "error" in result
62
+
63
+ def test_crux_result_keys(self):
64
+ self.seo_apis.API_KEY = ""
65
+ result = self.seo_apis.run_crux("https://example.com")
66
+ assert "error" in result
67
+
68
+ def test_run_all_structure(self):
69
+ self.seo_apis.API_KEY = ""
70
+ result = self.seo_apis.run_all("https://example.com")
71
+ assert "pagespeed" in result
72
+ assert "crux" in result
73
+ assert "mobile_friendly" in result
74
+ assert "url" in result
75
+ assert "timestamp" in result
@@ -0,0 +1,121 @@
1
+ """
2
+ Tests for seo_crawl.py — URL normalization, link extraction, depth limiting.
3
+
4
+ Author: Laurent Rochetta
5
+ """
6
+
7
+ import sys
8
+ import os
9
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "scripts"))
10
+
11
+ from seo_crawl import SEOCrawler
12
+
13
+
14
+ class TestURLNormalization:
15
+ """Test URL normalization for deduplication."""
16
+
17
+ def setup_method(self):
18
+ self.crawler = SEOCrawler("https://example.com", max_depth=2, max_pages=25)
19
+
20
+ def test_strips_trailing_slash(self):
21
+ assert self.crawler.normalize_url("https://example.com/page/") == "https://example.com/page"
22
+
23
+ def test_preserves_root(self):
24
+ assert self.crawler.normalize_url("https://example.com/") == "https://example.com/"
25
+
26
+ def test_normalizes_scheme(self):
27
+ result = self.crawler.normalize_url("https://example.com/page")
28
+ assert result.startswith("https://")
29
+
30
+ def test_deduplicates(self):
31
+ url1 = self.crawler.normalize_url("https://example.com/page/")
32
+ url2 = self.crawler.normalize_url("https://example.com/page")
33
+ assert url1 == url2
34
+
35
+
36
+ class TestInternalDetection:
37
+ def setup_method(self):
38
+ self.crawler = SEOCrawler("https://example.com", max_depth=2, max_pages=25)
39
+
40
+ def test_internal_url(self):
41
+ assert self.crawler.is_internal("https://example.com/about") is True
42
+
43
+ def test_external_url(self):
44
+ assert self.crawler.is_internal("https://other.com/page") is False
45
+
46
+ def test_subdomain_is_external(self):
47
+ assert self.crawler.is_internal("https://blog.example.com/post") is False
48
+
49
+
50
+ class TestLinkExtraction:
51
+ def setup_method(self):
52
+ self.crawler = SEOCrawler("https://example.com", max_depth=2, max_pages=25)
53
+
54
+ def test_extracts_internal_links(self):
55
+ html = '''
56
+ <a href="/about">About</a>
57
+ <a href="https://example.com/contact">Contact</a>
58
+ '''
59
+ links = self.crawler.extract_links(html, "https://example.com/")
60
+ assert len(links) == 2
61
+
62
+ def test_ignores_external_links(self):
63
+ html = '<a href="https://other.com/page">External</a>'
64
+ links = self.crawler.extract_links(html, "https://example.com/")
65
+ assert len(links) == 0
66
+
67
+ def test_ignores_anchors(self):
68
+ html = '<a href="#section">Anchor</a>'
69
+ links = self.crawler.extract_links(html, "https://example.com/")
70
+ assert len(links) == 0
71
+
72
+ def test_ignores_javascript(self):
73
+ html = '<a href="javascript:void(0)">JS Link</a>'
74
+ links = self.crawler.extract_links(html, "https://example.com/")
75
+ assert len(links) == 0
76
+
77
+ def test_ignores_mailto(self):
78
+ html = '<a href="mailto:test@example.com">Email</a>'
79
+ links = self.crawler.extract_links(html, "https://example.com/")
80
+ assert len(links) == 0
81
+
82
+
83
+ class TestTitleExtraction:
84
+ def setup_method(self):
85
+ self.crawler = SEOCrawler("https://example.com")
86
+
87
+ def test_extracts_title(self):
88
+ html = "<html><head><title>Test Page</title></head><body></body></html>"
89
+ assert self.crawler.extract_title(html) == "Test Page"
90
+
91
+ def test_missing_title(self):
92
+ html = "<html><body></body></html>"
93
+ assert self.crawler.extract_title(html) is None
94
+
95
+
96
+ class TestWordCount:
97
+ def setup_method(self):
98
+ self.crawler = SEOCrawler("https://example.com")
99
+
100
+ def test_counts_visible_words(self):
101
+ html = "<html><body><p>This is a test with seven words.</p></body></html>"
102
+ assert self.crawler.count_words(html) == 7
103
+
104
+ def test_excludes_script_content(self):
105
+ html = '<html><body><p>Visible</p><script>var hidden = true;</script></body></html>'
106
+ count = self.crawler.count_words(html)
107
+ assert count == 1 # Only "Visible"
108
+
109
+
110
+ class TestCrawlerConfig:
111
+ def test_max_pages_respected(self):
112
+ crawler = SEOCrawler("https://example.com", max_pages=5)
113
+ assert crawler.max_pages == 5
114
+
115
+ def test_max_depth_respected(self):
116
+ crawler = SEOCrawler("https://example.com", max_depth=1)
117
+ assert crawler.max_depth == 1
118
+
119
+ def test_base_domain_extracted(self):
120
+ crawler = SEOCrawler("https://www.example.com/page")
121
+ assert crawler.base_domain == "www.example.com"