bmad-plus 0.3.2 โ†’ 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -5,6 +5,18 @@ All notable changes to BMAD+ will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [0.3.3] โ€” 2026-03-19
9
+
10
+ ### ๐Ÿงช SEO Engine โ€” Quality & Security (Sprint 3)
11
+
12
+ ### Added
13
+ - **Unit tests** โ€” 50 pytest tests covering all Python scripts (fetch, parse, crawl, APIs)
14
+ - **Pre-commit hook** โ€” `hooks/seo-check.sh` validates HTML for title, meta, alt, H1 before commit
15
+ - **Audit JSON schema** โ€” `ref/audit-schema.json` standardized export format for dashboard/API integration
16
+ - **Test fixture** โ€” `tests/fixtures/sample_page.html` with known SEO elements
17
+
18
+ ---
19
+
8
20
  ## [0.3.2] โ€” 2026-03-19
9
21
 
10
22
  ### ๐Ÿ“Š SEO Engine โ€” Reports, Competitor & Hreflang (Sprint 2)
@@ -0,0 +1,95 @@
1
+ #!/bin/bash
2
+ # SEO Pre-Commit Hook โ€” Catches common SEO issues before commit.
3
+ # Install: cp hooks/seo-check.sh .git/hooks/pre-commit && chmod +x .git/hooks/pre-commit
4
+ #
5
+ # Author: Laurent Rochetta | BMAD+ SEO Engine
6
+
7
+ ERRORS=0
8
+ WARNINGS=0
9
+
10
+ echo "๐Ÿ”Ž BMAD+ SEO Pre-Commit Check..."
11
+
12
+ # Only check staged HTML files
13
+ HTML_FILES=$(git diff --cached --name-only --diff-filter=ACM | grep -iE '\.(html|htm|php|jsx|tsx)$')
14
+
15
+ if [ -z "$HTML_FILES" ]; then
16
+ echo " No HTML files staged, skipping SEO check."
17
+ exit 0
18
+ fi
19
+
20
+ for FILE in $HTML_FILES; do
21
+ # Skip node_modules and vendor
22
+ if echo "$FILE" | grep -qE '(node_modules|vendor|dist|build|\.min\.)'; then
23
+ continue
24
+ fi
25
+
26
+ CONTENT=$(git show ":$FILE" 2>/dev/null)
27
+ if [ -z "$CONTENT" ]; then
28
+ continue
29
+ fi
30
+
31
+ # Check 1: Missing <title> tag
32
+ if ! echo "$CONTENT" | grep -qi '<title'; then
33
+ echo " ๐Ÿ”ด $FILE โ€” Missing <title> tag"
34
+ ERRORS=$((ERRORS + 1))
35
+ fi
36
+
37
+ # Check 2: Empty <title> tag
38
+ if echo "$CONTENT" | grep -qiE '<title>\s*</title>'; then
39
+ echo " ๐Ÿ”ด $FILE โ€” Empty <title> tag"
40
+ ERRORS=$((ERRORS + 1))
41
+ fi
42
+
43
+ # Check 3: Missing meta description
44
+ if ! echo "$CONTENT" | grep -qi 'name="description"'; then
45
+ echo " ๐ŸŸ  $FILE โ€” Missing <meta name=\"description\">"
46
+ WARNINGS=$((WARNINGS + 1))
47
+ fi
48
+
49
+ # Check 4: Images without alt attribute
50
+ IMG_NO_ALT=$(echo "$CONTENT" | grep -ciE '<img[^>]*(?!alt)[^>]*>' 2>/dev/null || echo "0")
51
+ # More reliable: count <img> without alt
52
+ TOTAL_IMGS=$(echo "$CONTENT" | grep -ci '<img' 2>/dev/null || echo "0")
53
+ IMGS_WITH_ALT=$(echo "$CONTENT" | grep -ci '<img[^>]*alt=' 2>/dev/null || echo "0")
54
+ MISSING_ALT=$((TOTAL_IMGS - IMGS_WITH_ALT))
55
+
56
+ if [ "$MISSING_ALT" -gt 0 ]; then
57
+ echo " ๐ŸŸ  $FILE โ€” $MISSING_ALT image(s) without alt attribute"
58
+ WARNINGS=$((WARNINGS + 1))
59
+ fi
60
+
61
+ # Check 5: Multiple H1 tags
62
+ H1_COUNT=$(echo "$CONTENT" | grep -ci '<h1' 2>/dev/null || echo "0")
63
+ if [ "$H1_COUNT" -gt 1 ]; then
64
+ echo " ๐ŸŸก $FILE โ€” Multiple H1 tags ($H1_COUNT found, should be 1)"
65
+ WARNINGS=$((WARNINGS + 1))
66
+ fi
67
+
68
+ # Check 6: No H1 tag at all
69
+ if [ "$H1_COUNT" -eq 0 ]; then
70
+ echo " ๐ŸŸ  $FILE โ€” No H1 tag found"
71
+ WARNINGS=$((WARNINGS + 1))
72
+ fi
73
+
74
+ # Check 7: "Click here" or "Learn more" anchor text
75
+ BAD_ANCHORS=$(echo "$CONTENT" | grep -ciE '>click here<|>learn more<|>read more<|>here<' 2>/dev/null || echo "0")
76
+ if [ "$BAD_ANCHORS" -gt 0 ]; then
77
+ echo " ๐ŸŸก $FILE โ€” $BAD_ANCHORS link(s) with generic anchor text (\"click here\", \"learn more\")"
78
+ WARNINGS=$((WARNINGS + 1))
79
+ fi
80
+ done
81
+
82
+ echo ""
83
+ echo " Results: $ERRORS error(s), $WARNINGS warning(s)"
84
+
85
+ if [ "$ERRORS" -gt 0 ]; then
86
+ echo " โŒ Commit blocked โ€” fix critical SEO issues first!"
87
+ exit 1
88
+ else
89
+ if [ "$WARNINGS" -gt 0 ]; then
90
+ echo " โš ๏ธ Commit allowed with warnings โ€” consider fixing these issues."
91
+ else
92
+ echo " โœ… All SEO checks passed!"
93
+ fi
94
+ exit 0
95
+ fi
@@ -0,0 +1,187 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "title": "BMAD+ SEO Audit Result",
4
+ "description": "Standardized JSON format for SEO audit results. Compatible with dashboards, MCP Server, and external tools.",
5
+ "version": "1.0.0",
6
+ "author": "Laurent Rochetta",
7
+ "type": "object",
8
+ "required": ["engine", "version", "domain", "timestamp", "score", "issues"],
9
+ "properties": {
10
+ "engine": {
11
+ "type": "string",
12
+ "const": "bmad-seo-engine",
13
+ "description": "Engine identifier"
14
+ },
15
+ "version": {
16
+ "type": "string",
17
+ "description": "Engine version",
18
+ "examples": ["2.1.0"]
19
+ },
20
+ "domain": {
21
+ "type": "string",
22
+ "description": "Audited domain (without protocol)",
23
+ "examples": ["example.com"]
24
+ },
25
+ "url": {
26
+ "type": "string",
27
+ "format": "uri",
28
+ "description": "Full URL audited"
29
+ },
30
+ "timestamp": {
31
+ "type": "string",
32
+ "format": "date-time",
33
+ "description": "ISO 8601 timestamp of audit completion"
34
+ },
35
+ "business_type": {
36
+ "type": "string",
37
+ "enum": ["saas", "ecommerce", "local", "publisher", "agency", "other"],
38
+ "description": "Detected business type"
39
+ },
40
+ "pages_analyzed": {
41
+ "type": "integer",
42
+ "minimum": 1,
43
+ "description": "Number of pages analyzed"
44
+ },
45
+ "score": {
46
+ "type": "object",
47
+ "required": ["total", "categories"],
48
+ "properties": {
49
+ "total": {
50
+ "type": "integer",
51
+ "minimum": 0,
52
+ "maximum": 100,
53
+ "description": "Weighted SEO Health Score"
54
+ },
55
+ "rating": {
56
+ "type": "string",
57
+ "enum": ["excellent", "good", "needs_work", "poor", "critical"],
58
+ "description": "Score interpretation"
59
+ },
60
+ "categories": {
61
+ "type": "object",
62
+ "properties": {
63
+ "technical": { "type": "integer", "minimum": 0, "maximum": 100 },
64
+ "content_eeat": { "type": "integer", "minimum": 0, "maximum": 100 },
65
+ "on_page": { "type": "integer", "minimum": 0, "maximum": 100 },
66
+ "schema": { "type": "integer", "minimum": 0, "maximum": 100 },
67
+ "performance": { "type": "integer", "minimum": 0, "maximum": 100 },
68
+ "ai_readiness": { "type": "integer", "minimum": 0, "maximum": 100 },
69
+ "images": { "type": "integer", "minimum": 0, "maximum": 100 }
70
+ },
71
+ "description": "Score per category (0-100)"
72
+ }
73
+ }
74
+ },
75
+ "issues": {
76
+ "type": "array",
77
+ "items": {
78
+ "type": "object",
79
+ "required": ["id", "severity", "category", "title"],
80
+ "properties": {
81
+ "id": {
82
+ "type": "string",
83
+ "description": "Unique issue identifier",
84
+ "examples": ["missing-meta-description", "multiple-h1"]
85
+ },
86
+ "severity": {
87
+ "type": "string",
88
+ "enum": ["critical", "high", "medium", "low"]
89
+ },
90
+ "category": {
91
+ "type": "string",
92
+ "enum": ["technical", "content", "on_page", "schema", "performance", "geo", "images"]
93
+ },
94
+ "title": {
95
+ "type": "string",
96
+ "description": "Human-readable issue title"
97
+ },
98
+ "description": {
99
+ "type": "string",
100
+ "description": "Detailed explanation"
101
+ },
102
+ "affected_urls": {
103
+ "type": "array",
104
+ "items": { "type": "string", "format": "uri" },
105
+ "description": "Pages affected by this issue"
106
+ },
107
+ "fix": {
108
+ "type": "string",
109
+ "description": "Auto-generated fix code (HTML, JSON-LD, etc.)"
110
+ },
111
+ "quick_win": {
112
+ "type": "boolean",
113
+ "description": "True if high impact / low effort"
114
+ },
115
+ "impact": {
116
+ "type": "string",
117
+ "enum": ["high", "medium", "low"]
118
+ },
119
+ "effort": {
120
+ "type": "string",
121
+ "enum": ["high", "medium", "low"]
122
+ }
123
+ }
124
+ }
125
+ },
126
+ "pages": {
127
+ "type": "array",
128
+ "items": {
129
+ "type": "object",
130
+ "properties": {
131
+ "url": { "type": "string", "format": "uri" },
132
+ "status": { "type": "integer" },
133
+ "title": { "type": "string" },
134
+ "word_count": { "type": "integer" },
135
+ "schema_types": { "type": "array", "items": { "type": "string" } },
136
+ "eeat_score": { "type": "integer", "minimum": 0, "maximum": 100 }
137
+ }
138
+ },
139
+ "description": "Per-page analysis data"
140
+ },
141
+ "geo": {
142
+ "type": "object",
143
+ "properties": {
144
+ "ai_readiness_score": { "type": "integer", "minimum": 0, "maximum": 100 },
145
+ "ai_crawlers_allowed": { "type": "array", "items": { "type": "string" } },
146
+ "has_llms_txt": { "type": "boolean" },
147
+ "citability_score": { "type": "integer", "minimum": 0, "maximum": 100 }
148
+ },
149
+ "description": "GEO / AI search readiness metrics"
150
+ },
151
+ "pagespeed": {
152
+ "type": "object",
153
+ "properties": {
154
+ "mobile": {
155
+ "type": "object",
156
+ "properties": {
157
+ "performance": { "type": "integer" },
158
+ "accessibility": { "type": "integer" },
159
+ "best_practices": { "type": "integer" },
160
+ "seo": { "type": "integer" }
161
+ }
162
+ },
163
+ "desktop": {
164
+ "type": "object",
165
+ "properties": {
166
+ "performance": { "type": "integer" },
167
+ "accessibility": { "type": "integer" },
168
+ "best_practices": { "type": "integer" },
169
+ "seo": { "type": "integer" }
170
+ }
171
+ }
172
+ },
173
+ "description": "PageSpeed Insights scores"
174
+ },
175
+ "monitoring": {
176
+ "type": "object",
177
+ "properties": {
178
+ "previous_score": { "type": "integer" },
179
+ "previous_date": { "type": "string", "format": "date" },
180
+ "delta": { "type": "integer" },
181
+ "issues_resolved": { "type": "integer" },
182
+ "issues_new": { "type": "integer" }
183
+ },
184
+ "description": "Comparison with previous audit (if available)"
185
+ }
186
+ }
187
+ }
@@ -0,0 +1,62 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <title>SEO Test Page โ€” BMAD+ Fixture</title>
6
+ <meta name="description" content="A test page for validating the SEO parse module with known elements.">
7
+ <meta name="robots" content="index, follow">
8
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
9
+ <meta property="og:title" content="SEO Test Page">
10
+ <meta property="og:type" content="website">
11
+ <meta property="og:url" content="https://example.com/test">
12
+ <meta name="twitter:card" content="summary_large_image">
13
+ <meta name="twitter:title" content="SEO Test Page">
14
+ <link rel="canonical" href="https://example.com/test">
15
+ <link rel="alternate" hreflang="en" href="https://example.com/en/test">
16
+ <link rel="alternate" hreflang="fr" href="https://example.com/fr/test">
17
+ <link rel="alternate" hreflang="x-default" href="https://example.com/test">
18
+ </head>
19
+ <body>
20
+ <h1>Main Heading of the Page</h1>
21
+ <p>This is a test paragraph with enough words to verify word count functionality in the parser module. We need at least a few sentences to make the test meaningful and realistic.</p>
22
+
23
+ <h2>Second Level Heading One</h2>
24
+ <p>Content under the first H2. This paragraph adds more text to increase the word count.</p>
25
+
26
+ <h2>Second Level Heading Two</h2>
27
+ <p>Another section with different content about SEO analysis and testing.</p>
28
+
29
+ <h3>Third Level Heading</h3>
30
+ <p>Detailed information under the H3 heading for testing hierarchy detection.</p>
31
+
32
+ <img src="/images/hero.jpg" alt="Hero image for testing" width="800" height="400" loading="lazy">
33
+ <img src="/images/no-alt.jpg" width="200" height="200">
34
+ <img src="/images/empty-alt.jpg" alt="" width="100" height="100">
35
+
36
+ <a href="https://example.com/about">About Us</a>
37
+ <a href="https://example.com/services">Our Services</a>
38
+ <a href="https://external.com/partner" rel="nofollow" target="_blank">Partner Link</a>
39
+ <a href="/relative-link">Relative Link</a>
40
+
41
+ <script type="application/ld+json">
42
+ {
43
+ "@context": "https://schema.org",
44
+ "@type": "Organization",
45
+ "name": "Test Company",
46
+ "url": "https://example.com",
47
+ "logo": "https://example.com/logo.png"
48
+ }
49
+ </script>
50
+
51
+ <script type="application/ld+json">
52
+ {
53
+ "@context": "https://schema.org",
54
+ "@type": "BreadcrumbList",
55
+ "itemListElement": [
56
+ {"@type": "ListItem", "position": 1, "name": "Home", "item": "https://example.com"},
57
+ {"@type": "ListItem", "position": 2, "name": "Test", "item": "https://example.com/test"}
58
+ ]
59
+ }
60
+ </script>
61
+ </body>
62
+ </html>
@@ -0,0 +1,75 @@
1
+ """
2
+ Tests for seo_apis.py โ€” API response parsing and error handling.
3
+
4
+ Author: Laurent Rochetta
5
+ """
6
+
7
+ import sys
8
+ import os
9
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "scripts"))
10
+
11
+ # Temporarily unset API key for error tests
12
+ original_key = os.environ.get("GOOGLE_API_KEY", "")
13
+
14
+
15
+ class TestAPIKeyMissing:
16
+ """Test behavior when GOOGLE_API_KEY is not set."""
17
+
18
+ def setup_method(self):
19
+ os.environ.pop("GOOGLE_API_KEY", None)
20
+ # Reimport to pick up empty key
21
+ import importlib
22
+ import seo_apis
23
+ importlib.reload(seo_apis)
24
+ self.seo_apis = seo_apis
25
+
26
+ def teardown_method(self):
27
+ if original_key:
28
+ os.environ["GOOGLE_API_KEY"] = original_key
29
+
30
+ def test_pagespeed_without_key(self):
31
+ # Force the module to use an empty key
32
+ self.seo_apis.API_KEY = ""
33
+ result = self.seo_apis.run_pagespeed("https://example.com")
34
+ assert result.get("error") is not None
35
+ assert "GOOGLE_API_KEY" in result["error"]
36
+
37
+ def test_crux_without_key(self):
38
+ self.seo_apis.API_KEY = ""
39
+ result = self.seo_apis.run_crux("https://example.com")
40
+ assert result.get("error") is not None
41
+
42
+ def test_rich_results_without_key(self):
43
+ self.seo_apis.API_KEY = ""
44
+ result = self.seo_apis.run_rich_results_test("https://example.com")
45
+ assert result.get("error") is not None
46
+
47
+
48
+ class TestResultStructure:
49
+ """Test that API functions return expected structures."""
50
+
51
+ def setup_method(self):
52
+ import importlib
53
+ import seo_apis
54
+ importlib.reload(seo_apis)
55
+ self.seo_apis = seo_apis
56
+
57
+ def test_pagespeed_result_keys(self):
58
+ self.seo_apis.API_KEY = ""
59
+ result = self.seo_apis.run_pagespeed("https://example.com")
60
+ # Even on error, should have expected structure
61
+ assert "error" in result
62
+
63
+ def test_crux_result_keys(self):
64
+ self.seo_apis.API_KEY = ""
65
+ result = self.seo_apis.run_crux("https://example.com")
66
+ assert "error" in result
67
+
68
+ def test_run_all_structure(self):
69
+ self.seo_apis.API_KEY = ""
70
+ result = self.seo_apis.run_all("https://example.com")
71
+ assert "pagespeed" in result
72
+ assert "crux" in result
73
+ assert "mobile_friendly" in result
74
+ assert "url" in result
75
+ assert "timestamp" in result
@@ -0,0 +1,121 @@
1
+ """
2
+ Tests for seo_crawl.py โ€” URL normalization, link extraction, depth limiting.
3
+
4
+ Author: Laurent Rochetta
5
+ """
6
+
7
+ import sys
8
+ import os
9
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "scripts"))
10
+
11
+ from seo_crawl import SEOCrawler
12
+
13
+
14
+ class TestURLNormalization:
15
+ """Test URL normalization for deduplication."""
16
+
17
+ def setup_method(self):
18
+ self.crawler = SEOCrawler("https://example.com", max_depth=2, max_pages=25)
19
+
20
+ def test_strips_trailing_slash(self):
21
+ assert self.crawler.normalize_url("https://example.com/page/") == "https://example.com/page"
22
+
23
+ def test_preserves_root(self):
24
+ assert self.crawler.normalize_url("https://example.com/") == "https://example.com/"
25
+
26
+ def test_normalizes_scheme(self):
27
+ result = self.crawler.normalize_url("https://example.com/page")
28
+ assert result.startswith("https://")
29
+
30
+ def test_deduplicates(self):
31
+ url1 = self.crawler.normalize_url("https://example.com/page/")
32
+ url2 = self.crawler.normalize_url("https://example.com/page")
33
+ assert url1 == url2
34
+
35
+
36
+ class TestInternalDetection:
37
+ def setup_method(self):
38
+ self.crawler = SEOCrawler("https://example.com", max_depth=2, max_pages=25)
39
+
40
+ def test_internal_url(self):
41
+ assert self.crawler.is_internal("https://example.com/about") is True
42
+
43
+ def test_external_url(self):
44
+ assert self.crawler.is_internal("https://other.com/page") is False
45
+
46
+ def test_subdomain_is_external(self):
47
+ assert self.crawler.is_internal("https://blog.example.com/post") is False
48
+
49
+
50
+ class TestLinkExtraction:
51
+ def setup_method(self):
52
+ self.crawler = SEOCrawler("https://example.com", max_depth=2, max_pages=25)
53
+
54
+ def test_extracts_internal_links(self):
55
+ html = '''
56
+ <a href="/about">About</a>
57
+ <a href="https://example.com/contact">Contact</a>
58
+ '''
59
+ links = self.crawler.extract_links(html, "https://example.com/")
60
+ assert len(links) == 2
61
+
62
+ def test_ignores_external_links(self):
63
+ html = '<a href="https://other.com/page">External</a>'
64
+ links = self.crawler.extract_links(html, "https://example.com/")
65
+ assert len(links) == 0
66
+
67
+ def test_ignores_anchors(self):
68
+ html = '<a href="#section">Anchor</a>'
69
+ links = self.crawler.extract_links(html, "https://example.com/")
70
+ assert len(links) == 0
71
+
72
+ def test_ignores_javascript(self):
73
+ html = '<a href="javascript:void(0)">JS Link</a>'
74
+ links = self.crawler.extract_links(html, "https://example.com/")
75
+ assert len(links) == 0
76
+
77
+ def test_ignores_mailto(self):
78
+ html = '<a href="mailto:test@example.com">Email</a>'
79
+ links = self.crawler.extract_links(html, "https://example.com/")
80
+ assert len(links) == 0
81
+
82
+
83
+ class TestTitleExtraction:
84
+ def setup_method(self):
85
+ self.crawler = SEOCrawler("https://example.com")
86
+
87
+ def test_extracts_title(self):
88
+ html = "<html><head><title>Test Page</title></head><body></body></html>"
89
+ assert self.crawler.extract_title(html) == "Test Page"
90
+
91
+ def test_missing_title(self):
92
+ html = "<html><body></body></html>"
93
+ assert self.crawler.extract_title(html) is None
94
+
95
+
96
+ class TestWordCount:
97
+ def setup_method(self):
98
+ self.crawler = SEOCrawler("https://example.com")
99
+
100
+ def test_counts_visible_words(self):
101
+ html = "<html><body><p>This is a test with seven words.</p></body></html>"
102
+ assert self.crawler.count_words(html) == 7
103
+
104
+ def test_excludes_script_content(self):
105
+ html = '<html><body><p>Visible</p><script>var hidden = true;</script></body></html>'
106
+ count = self.crawler.count_words(html)
107
+ assert count == 1 # Only "Visible"
108
+
109
+
110
+ class TestCrawlerConfig:
111
+ def test_max_pages_respected(self):
112
+ crawler = SEOCrawler("https://example.com", max_pages=5)
113
+ assert crawler.max_pages == 5
114
+
115
+ def test_max_depth_respected(self):
116
+ crawler = SEOCrawler("https://example.com", max_depth=1)
117
+ assert crawler.max_depth == 1
118
+
119
+ def test_base_domain_extracted(self):
120
+ crawler = SEOCrawler("https://www.example.com/page")
121
+ assert crawler.base_domain == "www.example.com"
@@ -0,0 +1,70 @@
1
+ """
2
+ Tests for seo_fetch.py โ€” SSRF protection, URL handling, error cases.
3
+
4
+ Author: Laurent Rochetta
5
+ """
6
+
7
+ import sys
8
+ import os
9
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "scripts"))
10
+
11
+ from seo_fetch import is_safe_url, fetch_page
12
+
13
+
14
+ class TestSSRFProtection:
15
+ """Test SSRF prevention blocks private/loopback/reserved IPs."""
16
+
17
+ def test_blocks_localhost(self):
18
+ assert is_safe_url("http://127.0.0.1/admin") is False
19
+
20
+ def test_blocks_private_10(self):
21
+ assert is_safe_url("http://10.0.0.1/secret") is False
22
+
23
+ def test_blocks_private_192(self):
24
+ assert is_safe_url("http://192.168.1.1/") is False
25
+
26
+ def test_blocks_private_172(self):
27
+ assert is_safe_url("http://172.16.0.1/") is False
28
+
29
+ def test_allows_public_ip(self):
30
+ assert is_safe_url("https://93.184.216.34/") is True
31
+
32
+ def test_allows_public_domain(self):
33
+ assert is_safe_url("https://example.com/") is True
34
+
35
+ def test_blocks_empty_hostname(self):
36
+ assert is_safe_url("http:///nohost") is False
37
+
38
+ def test_blocks_zero_ip(self):
39
+ assert is_safe_url("http://0.0.0.0/") is False
40
+
41
+
42
+ class TestFetchPage:
43
+ """Test fetch_page function behavior."""
44
+
45
+ def test_normalizes_url_without_scheme(self):
46
+ result = fetch_page("example.com", timeout=5)
47
+ assert result["url"] == "example.com"
48
+ # Should have attempted https://example.com
49
+
50
+ def test_blocks_invalid_scheme(self):
51
+ result = fetch_page("ftp://example.com/file")
52
+ assert result["error"] is not None
53
+ assert "Invalid URL scheme" in result["error"]
54
+
55
+ def test_blocks_ssrf(self):
56
+ result = fetch_page("http://127.0.0.1/admin")
57
+ assert result["error"] is not None
58
+ assert "Blocked" in result["error"]
59
+
60
+ def test_result_structure(self):
61
+ """Verify the result dict has all expected keys."""
62
+ result = fetch_page("https://example.com", timeout=5)
63
+ expected_keys = {"url", "final_url", "status_code", "content", "headers",
64
+ "redirect_chain", "content_length", "response_time_ms", "error"}
65
+ assert expected_keys == set(result.keys())
66
+
67
+ def test_timeout_returns_error(self):
68
+ # Use a non-routable IP to force timeout
69
+ result = fetch_page("http://192.0.2.1/", timeout=1)
70
+ assert result["error"] is not None
@@ -0,0 +1,184 @@
1
+ """
2
+ Tests for seo_parse.py โ€” HTML parsing and SEO element extraction.
3
+
4
+ Author: Laurent Rochetta
5
+ """
6
+
7
+ import json
8
+ import os
9
+ import sys
10
+
11
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "scripts"))
12
+
13
+ from seo_parse import parse_html
14
+
15
+ FIXTURES_DIR = os.path.join(os.path.dirname(__file__), "fixtures")
16
+
17
+
18
+ def load_fixture(name: str) -> str:
19
+ with open(os.path.join(FIXTURES_DIR, name), "r", encoding="utf-8") as f:
20
+ return f.read()
21
+
22
+
23
+ class TestTitleParsing:
24
+ def test_extracts_title(self):
25
+ result = parse_html(load_fixture("sample_page.html"))
26
+ assert result["title"] == "SEO Test Page โ€” BMAD+ Fixture"
27
+
28
+ def test_title_length(self):
29
+ result = parse_html(load_fixture("sample_page.html"))
30
+ assert result["title_length"] == len("SEO Test Page โ€” BMAD+ Fixture")
31
+
32
+ def test_missing_title(self):
33
+ result = parse_html("<html><body><p>No title</p></body></html>")
34
+ assert result["title"] is None
35
+ assert result["title_length"] == 0
36
+
37
+
38
+ class TestMetaTags:
39
+ def test_extracts_description(self):
40
+ result = parse_html(load_fixture("sample_page.html"))
41
+ assert "test page" in result["meta_description"].lower()
42
+
43
+ def test_extracts_robots(self):
44
+ result = parse_html(load_fixture("sample_page.html"))
45
+ assert result["meta_robots"] == "index, follow"
46
+
47
+ def test_extracts_viewport(self):
48
+ result = parse_html(load_fixture("sample_page.html"))
49
+ assert "width=device-width" in result["meta_viewport"]
50
+
51
+ def test_missing_description(self):
52
+ result = parse_html("<html><head><title>T</title></head><body></body></html>")
53
+ assert result["meta_description"] is None
54
+
55
+
56
+ class TestCanonical:
57
+ def test_extracts_canonical(self):
58
+ result = parse_html(load_fixture("sample_page.html"))
59
+ assert result["canonical"] == "https://example.com/test"
60
+
61
+ def test_missing_canonical(self):
62
+ result = parse_html("<html><body></body></html>")
63
+ assert result["canonical"] is None
64
+
65
+
66
+ class TestHeadings:
67
+ def test_h1_count(self):
68
+ result = parse_html(load_fixture("sample_page.html"))
69
+ assert len(result["headings"]["h1"]) == 1
70
+
71
+ def test_h2_count(self):
72
+ result = parse_html(load_fixture("sample_page.html"))
73
+ assert len(result["headings"]["h2"]) == 2
74
+
75
+ def test_h3_count(self):
76
+ result = parse_html(load_fixture("sample_page.html"))
77
+ assert len(result["headings"]["h3"]) == 1
78
+
79
+ def test_multiple_h1_detection(self):
80
+ html = "<html><body><h1>First</h1><h1>Second</h1></body></html>"
81
+ result = parse_html(html)
82
+ assert len(result["headings"]["h1"]) == 2
83
+
84
+
85
+ class TestImages:
86
+ def test_image_count(self):
87
+ result = parse_html(load_fixture("sample_page.html"))
88
+ assert len(result["images"]) == 3
89
+
90
+ def test_image_with_alt(self):
91
+ result = parse_html(load_fixture("sample_page.html"))
92
+ hero = [i for i in result["images"] if "hero" in i["src"]]
93
+ assert len(hero) == 1
94
+ assert hero[0]["has_alt"] is True
95
+ assert hero[0]["alt"] == "Hero image for testing"
96
+
97
+ def test_image_without_alt(self):
98
+ result = parse_html(load_fixture("sample_page.html"))
99
+ no_alt = [i for i in result["images"] if "no-alt" in i["src"]]
100
+ assert len(no_alt) == 1
101
+ assert no_alt[0]["has_alt"] is False
102
+
103
+ def test_image_with_empty_alt(self):
104
+ result = parse_html(load_fixture("sample_page.html"))
105
+ empty = [i for i in result["images"] if "empty-alt" in i["src"]]
106
+ assert len(empty) == 1
107
+ assert empty[0]["has_alt"] is True
108
+ assert empty[0]["alt_empty"] is True
109
+
110
+
111
+ class TestLinks:
112
+ def test_internal_links(self):
113
+ result = parse_html(load_fixture("sample_page.html"), base_url="https://example.com")
114
+ assert len(result["links"]["internal"]) >= 2
115
+
116
+ def test_external_links(self):
117
+ result = parse_html(load_fixture("sample_page.html"), base_url="https://example.com")
118
+ assert len(result["links"]["external"]) >= 1
119
+
120
+ def test_nofollow_detection(self):
121
+ result = parse_html(load_fixture("sample_page.html"), base_url="https://example.com")
122
+ nofollow = [l for l in result["links"]["external"] if l["is_nofollow"]]
123
+ assert len(nofollow) >= 1
124
+
125
+
126
+ class TestSchema:
127
+ def test_schema_block_count(self):
128
+ result = parse_html(load_fixture("sample_page.html"))
129
+ assert len(result["schema_blocks"]) == 2
130
+
131
+ def test_schema_types(self):
132
+ result = parse_html(load_fixture("sample_page.html"))
133
+ types = [s["type"] for s in result["schema_blocks"]]
134
+ assert "Organization" in types
135
+ assert "BreadcrumbList" in types
136
+
137
+ def test_schema_parse_error(self):
138
+ html = '<html><body><script type="application/ld+json">{invalid json}</script></body></html>'
139
+ result = parse_html(html)
140
+ assert len(result["schema_blocks"]) == 1
141
+ assert result["schema_blocks"][0]["type"] == "PARSE_ERROR"
142
+
143
+
144
+ class TestOpenGraph:
145
+ def test_og_title(self):
146
+ result = parse_html(load_fixture("sample_page.html"))
147
+ assert result["open_graph"].get("og:title") == "SEO Test Page"
148
+
149
+ def test_og_type(self):
150
+ result = parse_html(load_fixture("sample_page.html"))
151
+ assert result["open_graph"].get("og:type") == "website"
152
+
153
+
154
+ class TestHreflang:
155
+ def test_hreflang_count(self):
156
+ result = parse_html(load_fixture("sample_page.html"))
157
+ assert len(result["hreflang"]) == 3 # en, fr, x-default
158
+
159
+ def test_hreflang_languages(self):
160
+ result = parse_html(load_fixture("sample_page.html"))
161
+ langs = [h["lang"] for h in result["hreflang"]]
162
+ assert "en" in langs
163
+ assert "fr" in langs
164
+ assert "x-default" in langs
165
+
166
+
167
+ class TestContentMetrics:
168
+ def test_word_count_positive(self):
169
+ result = parse_html(load_fixture("sample_page.html"))
170
+ assert result["word_count"] > 30
171
+
172
+ def test_text_ratio_range(self):
173
+ result = parse_html(load_fixture("sample_page.html"))
174
+ assert 0 < result["text_ratio"] < 1
175
+
176
+ def test_has_lang_attr(self):
177
+ result = parse_html(load_fixture("sample_page.html"))
178
+ assert result["has_lang_attr"] is True
179
+ assert result["lang"] == "en"
180
+
181
+ def test_html_size(self):
182
+ html = load_fixture("sample_page.html")
183
+ result = parse_html(html)
184
+ assert result["html_size_bytes"] == len(html.encode("utf-8"))
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "$schema": "https://json.schemastore.org/package.json",
3
3
  "name": "bmad-plus",
4
- "version": "0.3.2",
4
+ "version": "0.3.3",
5
5
  "description": "BMAD+ โ€” Augmented AI-Driven Development Framework with multi-role agents, autopilot, and parallel execution",
6
6
  "keywords": [
7
7
  "bmad",