bmad-plus 0.3.2 โ 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/oveanet-pack/seo-audit-360/hooks/seo-check.sh +95 -0
- package/oveanet-pack/seo-audit-360/ref/audit-schema.json +187 -0
- package/oveanet-pack/seo-audit-360/scripts/__pycache__/seo_crawl.cpython-314.pyc +0 -0
- package/oveanet-pack/seo-audit-360/scripts/__pycache__/seo_parse.cpython-314.pyc +0 -0
- package/oveanet-pack/seo-audit-360/tests/__pycache__/test_crawl.cpython-314-pytest-9.0.2.pyc +0 -0
- package/oveanet-pack/seo-audit-360/tests/__pycache__/test_parse.cpython-314-pytest-9.0.2.pyc +0 -0
- package/oveanet-pack/seo-audit-360/tests/fixtures/sample_page.html +62 -0
- package/oveanet-pack/seo-audit-360/tests/test_apis.py +75 -0
- package/oveanet-pack/seo-audit-360/tests/test_crawl.py +121 -0
- package/oveanet-pack/seo-audit-360/tests/test_fetch.py +70 -0
- package/oveanet-pack/seo-audit-360/tests/test_parse.py +184 -0
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,18 @@ All notable changes to BMAD+ will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [0.3.3] โ 2026-03-19
|
|
9
|
+
|
|
10
|
+
### ๐งช SEO Engine โ Quality & Security (Sprint 3)
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- **Unit tests** โ 50 pytest tests covering all Python scripts (fetch, parse, crawl, APIs)
|
|
14
|
+
- **Pre-commit hook** โ `hooks/seo-check.sh` validates HTML for title, meta, alt, H1 before commit
|
|
15
|
+
- **Audit JSON schema** โ `ref/audit-schema.json` standardized export format for dashboard/API integration
|
|
16
|
+
- **Test fixture** โ `tests/fixtures/sample_page.html` with known SEO elements
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
8
20
|
## [0.3.2] โ 2026-03-19
|
|
9
21
|
|
|
10
22
|
### ๐ SEO Engine โ Reports, Competitor & Hreflang (Sprint 2)
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# SEO Pre-Commit Hook โ Catches common SEO issues before commit.
|
|
3
|
+
# Install: cp hooks/seo-check.sh .git/hooks/pre-commit && chmod +x .git/hooks/pre-commit
|
|
4
|
+
#
|
|
5
|
+
# Author: Laurent Rochetta | BMAD+ SEO Engine
|
|
6
|
+
|
|
7
|
+
ERRORS=0
|
|
8
|
+
WARNINGS=0
|
|
9
|
+
|
|
10
|
+
echo "๐ BMAD+ SEO Pre-Commit Check..."
|
|
11
|
+
|
|
12
|
+
# Only check staged HTML files
|
|
13
|
+
HTML_FILES=$(git diff --cached --name-only --diff-filter=ACM | grep -iE '\.(html|htm|php|jsx|tsx)$')
|
|
14
|
+
|
|
15
|
+
if [ -z "$HTML_FILES" ]; then
|
|
16
|
+
echo " No HTML files staged, skipping SEO check."
|
|
17
|
+
exit 0
|
|
18
|
+
fi
|
|
19
|
+
|
|
20
|
+
for FILE in $HTML_FILES; do
|
|
21
|
+
# Skip node_modules and vendor
|
|
22
|
+
if echo "$FILE" | grep -qE '(node_modules|vendor|dist|build|\.min\.)'; then
|
|
23
|
+
continue
|
|
24
|
+
fi
|
|
25
|
+
|
|
26
|
+
CONTENT=$(git show ":$FILE" 2>/dev/null)
|
|
27
|
+
if [ -z "$CONTENT" ]; then
|
|
28
|
+
continue
|
|
29
|
+
fi
|
|
30
|
+
|
|
31
|
+
# Check 1: Missing <title> tag
|
|
32
|
+
if ! echo "$CONTENT" | grep -qi '<title'; then
|
|
33
|
+
echo " ๐ด $FILE โ Missing <title> tag"
|
|
34
|
+
ERRORS=$((ERRORS + 1))
|
|
35
|
+
fi
|
|
36
|
+
|
|
37
|
+
# Check 2: Empty <title> tag
|
|
38
|
+
if echo "$CONTENT" | grep -qiE '<title>\s*</title>'; then
|
|
39
|
+
echo " ๐ด $FILE โ Empty <title> tag"
|
|
40
|
+
ERRORS=$((ERRORS + 1))
|
|
41
|
+
fi
|
|
42
|
+
|
|
43
|
+
# Check 3: Missing meta description
|
|
44
|
+
if ! echo "$CONTENT" | grep -qi 'name="description"'; then
|
|
45
|
+
echo " ๐ $FILE โ Missing <meta name=\"description\">"
|
|
46
|
+
WARNINGS=$((WARNINGS + 1))
|
|
47
|
+
fi
|
|
48
|
+
|
|
49
|
+
# Check 4: Images without alt attribute
|
|
50
|
+
IMG_NO_ALT=$(echo "$CONTENT" | grep -ciE '<img[^>]*(?!alt)[^>]*>' 2>/dev/null || echo "0")
|
|
51
|
+
# More reliable: count <img> without alt
|
|
52
|
+
TOTAL_IMGS=$(echo "$CONTENT" | grep -ci '<img' 2>/dev/null || echo "0")
|
|
53
|
+
IMGS_WITH_ALT=$(echo "$CONTENT" | grep -ci '<img[^>]*alt=' 2>/dev/null || echo "0")
|
|
54
|
+
MISSING_ALT=$((TOTAL_IMGS - IMGS_WITH_ALT))
|
|
55
|
+
|
|
56
|
+
if [ "$MISSING_ALT" -gt 0 ]; then
|
|
57
|
+
echo " ๐ $FILE โ $MISSING_ALT image(s) without alt attribute"
|
|
58
|
+
WARNINGS=$((WARNINGS + 1))
|
|
59
|
+
fi
|
|
60
|
+
|
|
61
|
+
# Check 5: Multiple H1 tags
|
|
62
|
+
H1_COUNT=$(echo "$CONTENT" | grep -ci '<h1' 2>/dev/null || echo "0")
|
|
63
|
+
if [ "$H1_COUNT" -gt 1 ]; then
|
|
64
|
+
echo " ๐ก $FILE โ Multiple H1 tags ($H1_COUNT found, should be 1)"
|
|
65
|
+
WARNINGS=$((WARNINGS + 1))
|
|
66
|
+
fi
|
|
67
|
+
|
|
68
|
+
# Check 6: No H1 tag at all
|
|
69
|
+
if [ "$H1_COUNT" -eq 0 ]; then
|
|
70
|
+
echo " ๐ $FILE โ No H1 tag found"
|
|
71
|
+
WARNINGS=$((WARNINGS + 1))
|
|
72
|
+
fi
|
|
73
|
+
|
|
74
|
+
# Check 7: "Click here" or "Learn more" anchor text
|
|
75
|
+
BAD_ANCHORS=$(echo "$CONTENT" | grep -ciE '>click here<|>learn more<|>read more<|>here<' 2>/dev/null || echo "0")
|
|
76
|
+
if [ "$BAD_ANCHORS" -gt 0 ]; then
|
|
77
|
+
echo " ๐ก $FILE โ $BAD_ANCHORS link(s) with generic anchor text (\"click here\", \"learn more\")"
|
|
78
|
+
WARNINGS=$((WARNINGS + 1))
|
|
79
|
+
fi
|
|
80
|
+
done
|
|
81
|
+
|
|
82
|
+
echo ""
|
|
83
|
+
echo " Results: $ERRORS error(s), $WARNINGS warning(s)"
|
|
84
|
+
|
|
85
|
+
if [ "$ERRORS" -gt 0 ]; then
|
|
86
|
+
echo " โ Commit blocked โ fix critical SEO issues first!"
|
|
87
|
+
exit 1
|
|
88
|
+
else
|
|
89
|
+
if [ "$WARNINGS" -gt 0 ]; then
|
|
90
|
+
echo " โ ๏ธ Commit allowed with warnings โ consider fixing these issues."
|
|
91
|
+
else
|
|
92
|
+
echo " โ
All SEO checks passed!"
|
|
93
|
+
fi
|
|
94
|
+
exit 0
|
|
95
|
+
fi
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"title": "BMAD+ SEO Audit Result",
|
|
4
|
+
"description": "Standardized JSON format for SEO audit results. Compatible with dashboards, MCP Server, and external tools.",
|
|
5
|
+
"version": "1.0.0",
|
|
6
|
+
"author": "Laurent Rochetta",
|
|
7
|
+
"type": "object",
|
|
8
|
+
"required": ["engine", "version", "domain", "timestamp", "score", "issues"],
|
|
9
|
+
"properties": {
|
|
10
|
+
"engine": {
|
|
11
|
+
"type": "string",
|
|
12
|
+
"const": "bmad-seo-engine",
|
|
13
|
+
"description": "Engine identifier"
|
|
14
|
+
},
|
|
15
|
+
"version": {
|
|
16
|
+
"type": "string",
|
|
17
|
+
"description": "Engine version",
|
|
18
|
+
"examples": ["2.1.0"]
|
|
19
|
+
},
|
|
20
|
+
"domain": {
|
|
21
|
+
"type": "string",
|
|
22
|
+
"description": "Audited domain (without protocol)",
|
|
23
|
+
"examples": ["example.com"]
|
|
24
|
+
},
|
|
25
|
+
"url": {
|
|
26
|
+
"type": "string",
|
|
27
|
+
"format": "uri",
|
|
28
|
+
"description": "Full URL audited"
|
|
29
|
+
},
|
|
30
|
+
"timestamp": {
|
|
31
|
+
"type": "string",
|
|
32
|
+
"format": "date-time",
|
|
33
|
+
"description": "ISO 8601 timestamp of audit completion"
|
|
34
|
+
},
|
|
35
|
+
"business_type": {
|
|
36
|
+
"type": "string",
|
|
37
|
+
"enum": ["saas", "ecommerce", "local", "publisher", "agency", "other"],
|
|
38
|
+
"description": "Detected business type"
|
|
39
|
+
},
|
|
40
|
+
"pages_analyzed": {
|
|
41
|
+
"type": "integer",
|
|
42
|
+
"minimum": 1,
|
|
43
|
+
"description": "Number of pages analyzed"
|
|
44
|
+
},
|
|
45
|
+
"score": {
|
|
46
|
+
"type": "object",
|
|
47
|
+
"required": ["total", "categories"],
|
|
48
|
+
"properties": {
|
|
49
|
+
"total": {
|
|
50
|
+
"type": "integer",
|
|
51
|
+
"minimum": 0,
|
|
52
|
+
"maximum": 100,
|
|
53
|
+
"description": "Weighted SEO Health Score"
|
|
54
|
+
},
|
|
55
|
+
"rating": {
|
|
56
|
+
"type": "string",
|
|
57
|
+
"enum": ["excellent", "good", "needs_work", "poor", "critical"],
|
|
58
|
+
"description": "Score interpretation"
|
|
59
|
+
},
|
|
60
|
+
"categories": {
|
|
61
|
+
"type": "object",
|
|
62
|
+
"properties": {
|
|
63
|
+
"technical": { "type": "integer", "minimum": 0, "maximum": 100 },
|
|
64
|
+
"content_eeat": { "type": "integer", "minimum": 0, "maximum": 100 },
|
|
65
|
+
"on_page": { "type": "integer", "minimum": 0, "maximum": 100 },
|
|
66
|
+
"schema": { "type": "integer", "minimum": 0, "maximum": 100 },
|
|
67
|
+
"performance": { "type": "integer", "minimum": 0, "maximum": 100 },
|
|
68
|
+
"ai_readiness": { "type": "integer", "minimum": 0, "maximum": 100 },
|
|
69
|
+
"images": { "type": "integer", "minimum": 0, "maximum": 100 }
|
|
70
|
+
},
|
|
71
|
+
"description": "Score per category (0-100)"
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
},
|
|
75
|
+
"issues": {
|
|
76
|
+
"type": "array",
|
|
77
|
+
"items": {
|
|
78
|
+
"type": "object",
|
|
79
|
+
"required": ["id", "severity", "category", "title"],
|
|
80
|
+
"properties": {
|
|
81
|
+
"id": {
|
|
82
|
+
"type": "string",
|
|
83
|
+
"description": "Unique issue identifier",
|
|
84
|
+
"examples": ["missing-meta-description", "multiple-h1"]
|
|
85
|
+
},
|
|
86
|
+
"severity": {
|
|
87
|
+
"type": "string",
|
|
88
|
+
"enum": ["critical", "high", "medium", "low"]
|
|
89
|
+
},
|
|
90
|
+
"category": {
|
|
91
|
+
"type": "string",
|
|
92
|
+
"enum": ["technical", "content", "on_page", "schema", "performance", "geo", "images"]
|
|
93
|
+
},
|
|
94
|
+
"title": {
|
|
95
|
+
"type": "string",
|
|
96
|
+
"description": "Human-readable issue title"
|
|
97
|
+
},
|
|
98
|
+
"description": {
|
|
99
|
+
"type": "string",
|
|
100
|
+
"description": "Detailed explanation"
|
|
101
|
+
},
|
|
102
|
+
"affected_urls": {
|
|
103
|
+
"type": "array",
|
|
104
|
+
"items": { "type": "string", "format": "uri" },
|
|
105
|
+
"description": "Pages affected by this issue"
|
|
106
|
+
},
|
|
107
|
+
"fix": {
|
|
108
|
+
"type": "string",
|
|
109
|
+
"description": "Auto-generated fix code (HTML, JSON-LD, etc.)"
|
|
110
|
+
},
|
|
111
|
+
"quick_win": {
|
|
112
|
+
"type": "boolean",
|
|
113
|
+
"description": "True if high impact / low effort"
|
|
114
|
+
},
|
|
115
|
+
"impact": {
|
|
116
|
+
"type": "string",
|
|
117
|
+
"enum": ["high", "medium", "low"]
|
|
118
|
+
},
|
|
119
|
+
"effort": {
|
|
120
|
+
"type": "string",
|
|
121
|
+
"enum": ["high", "medium", "low"]
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
},
|
|
126
|
+
"pages": {
|
|
127
|
+
"type": "array",
|
|
128
|
+
"items": {
|
|
129
|
+
"type": "object",
|
|
130
|
+
"properties": {
|
|
131
|
+
"url": { "type": "string", "format": "uri" },
|
|
132
|
+
"status": { "type": "integer" },
|
|
133
|
+
"title": { "type": "string" },
|
|
134
|
+
"word_count": { "type": "integer" },
|
|
135
|
+
"schema_types": { "type": "array", "items": { "type": "string" } },
|
|
136
|
+
"eeat_score": { "type": "integer", "minimum": 0, "maximum": 100 }
|
|
137
|
+
}
|
|
138
|
+
},
|
|
139
|
+
"description": "Per-page analysis data"
|
|
140
|
+
},
|
|
141
|
+
"geo": {
|
|
142
|
+
"type": "object",
|
|
143
|
+
"properties": {
|
|
144
|
+
"ai_readiness_score": { "type": "integer", "minimum": 0, "maximum": 100 },
|
|
145
|
+
"ai_crawlers_allowed": { "type": "array", "items": { "type": "string" } },
|
|
146
|
+
"has_llms_txt": { "type": "boolean" },
|
|
147
|
+
"citability_score": { "type": "integer", "minimum": 0, "maximum": 100 }
|
|
148
|
+
},
|
|
149
|
+
"description": "GEO / AI search readiness metrics"
|
|
150
|
+
},
|
|
151
|
+
"pagespeed": {
|
|
152
|
+
"type": "object",
|
|
153
|
+
"properties": {
|
|
154
|
+
"mobile": {
|
|
155
|
+
"type": "object",
|
|
156
|
+
"properties": {
|
|
157
|
+
"performance": { "type": "integer" },
|
|
158
|
+
"accessibility": { "type": "integer" },
|
|
159
|
+
"best_practices": { "type": "integer" },
|
|
160
|
+
"seo": { "type": "integer" }
|
|
161
|
+
}
|
|
162
|
+
},
|
|
163
|
+
"desktop": {
|
|
164
|
+
"type": "object",
|
|
165
|
+
"properties": {
|
|
166
|
+
"performance": { "type": "integer" },
|
|
167
|
+
"accessibility": { "type": "integer" },
|
|
168
|
+
"best_practices": { "type": "integer" },
|
|
169
|
+
"seo": { "type": "integer" }
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
},
|
|
173
|
+
"description": "PageSpeed Insights scores"
|
|
174
|
+
},
|
|
175
|
+
"monitoring": {
|
|
176
|
+
"type": "object",
|
|
177
|
+
"properties": {
|
|
178
|
+
"previous_score": { "type": "integer" },
|
|
179
|
+
"previous_date": { "type": "string", "format": "date" },
|
|
180
|
+
"delta": { "type": "integer" },
|
|
181
|
+
"issues_resolved": { "type": "integer" },
|
|
182
|
+
"issues_new": { "type": "integer" }
|
|
183
|
+
},
|
|
184
|
+
"description": "Comparison with previous audit (if available)"
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
}
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
<!DOCTYPE html>
|
|
2
|
+
<html lang="en">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="UTF-8">
|
|
5
|
+
<title>SEO Test Page โ BMAD+ Fixture</title>
|
|
6
|
+
<meta name="description" content="A test page for validating the SEO parse module with known elements.">
|
|
7
|
+
<meta name="robots" content="index, follow">
|
|
8
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
9
|
+
<meta property="og:title" content="SEO Test Page">
|
|
10
|
+
<meta property="og:type" content="website">
|
|
11
|
+
<meta property="og:url" content="https://example.com/test">
|
|
12
|
+
<meta name="twitter:card" content="summary_large_image">
|
|
13
|
+
<meta name="twitter:title" content="SEO Test Page">
|
|
14
|
+
<link rel="canonical" href="https://example.com/test">
|
|
15
|
+
<link rel="alternate" hreflang="en" href="https://example.com/en/test">
|
|
16
|
+
<link rel="alternate" hreflang="fr" href="https://example.com/fr/test">
|
|
17
|
+
<link rel="alternate" hreflang="x-default" href="https://example.com/test">
|
|
18
|
+
</head>
|
|
19
|
+
<body>
|
|
20
|
+
<h1>Main Heading of the Page</h1>
|
|
21
|
+
<p>This is a test paragraph with enough words to verify word count functionality in the parser module. We need at least a few sentences to make the test meaningful and realistic.</p>
|
|
22
|
+
|
|
23
|
+
<h2>Second Level Heading One</h2>
|
|
24
|
+
<p>Content under the first H2. This paragraph adds more text to increase the word count.</p>
|
|
25
|
+
|
|
26
|
+
<h2>Second Level Heading Two</h2>
|
|
27
|
+
<p>Another section with different content about SEO analysis and testing.</p>
|
|
28
|
+
|
|
29
|
+
<h3>Third Level Heading</h3>
|
|
30
|
+
<p>Detailed information under the H3 heading for testing hierarchy detection.</p>
|
|
31
|
+
|
|
32
|
+
<img src="/images/hero.jpg" alt="Hero image for testing" width="800" height="400" loading="lazy">
|
|
33
|
+
<img src="/images/no-alt.jpg" width="200" height="200">
|
|
34
|
+
<img src="/images/empty-alt.jpg" alt="" width="100" height="100">
|
|
35
|
+
|
|
36
|
+
<a href="https://example.com/about">About Us</a>
|
|
37
|
+
<a href="https://example.com/services">Our Services</a>
|
|
38
|
+
<a href="https://external.com/partner" rel="nofollow" target="_blank">Partner Link</a>
|
|
39
|
+
<a href="/relative-link">Relative Link</a>
|
|
40
|
+
|
|
41
|
+
<script type="application/ld+json">
|
|
42
|
+
{
|
|
43
|
+
"@context": "https://schema.org",
|
|
44
|
+
"@type": "Organization",
|
|
45
|
+
"name": "Test Company",
|
|
46
|
+
"url": "https://example.com",
|
|
47
|
+
"logo": "https://example.com/logo.png"
|
|
48
|
+
}
|
|
49
|
+
</script>
|
|
50
|
+
|
|
51
|
+
<script type="application/ld+json">
|
|
52
|
+
{
|
|
53
|
+
"@context": "https://schema.org",
|
|
54
|
+
"@type": "BreadcrumbList",
|
|
55
|
+
"itemListElement": [
|
|
56
|
+
{"@type": "ListItem", "position": 1, "name": "Home", "item": "https://example.com"},
|
|
57
|
+
{"@type": "ListItem", "position": 2, "name": "Test", "item": "https://example.com/test"}
|
|
58
|
+
]
|
|
59
|
+
}
|
|
60
|
+
</script>
|
|
61
|
+
</body>
|
|
62
|
+
</html>
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tests for seo_apis.py โ API response parsing and error handling.
|
|
3
|
+
|
|
4
|
+
Author: Laurent Rochetta
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import sys
|
|
8
|
+
import os
|
|
9
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "scripts"))
|
|
10
|
+
|
|
11
|
+
# Temporarily unset API key for error tests
|
|
12
|
+
original_key = os.environ.get("GOOGLE_API_KEY", "")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TestAPIKeyMissing:
|
|
16
|
+
"""Test behavior when GOOGLE_API_KEY is not set."""
|
|
17
|
+
|
|
18
|
+
def setup_method(self):
|
|
19
|
+
os.environ.pop("GOOGLE_API_KEY", None)
|
|
20
|
+
# Reimport to pick up empty key
|
|
21
|
+
import importlib
|
|
22
|
+
import seo_apis
|
|
23
|
+
importlib.reload(seo_apis)
|
|
24
|
+
self.seo_apis = seo_apis
|
|
25
|
+
|
|
26
|
+
def teardown_method(self):
|
|
27
|
+
if original_key:
|
|
28
|
+
os.environ["GOOGLE_API_KEY"] = original_key
|
|
29
|
+
|
|
30
|
+
def test_pagespeed_without_key(self):
|
|
31
|
+
# Force the module to use an empty key
|
|
32
|
+
self.seo_apis.API_KEY = ""
|
|
33
|
+
result = self.seo_apis.run_pagespeed("https://example.com")
|
|
34
|
+
assert result.get("error") is not None
|
|
35
|
+
assert "GOOGLE_API_KEY" in result["error"]
|
|
36
|
+
|
|
37
|
+
def test_crux_without_key(self):
|
|
38
|
+
self.seo_apis.API_KEY = ""
|
|
39
|
+
result = self.seo_apis.run_crux("https://example.com")
|
|
40
|
+
assert result.get("error") is not None
|
|
41
|
+
|
|
42
|
+
def test_rich_results_without_key(self):
|
|
43
|
+
self.seo_apis.API_KEY = ""
|
|
44
|
+
result = self.seo_apis.run_rich_results_test("https://example.com")
|
|
45
|
+
assert result.get("error") is not None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class TestResultStructure:
|
|
49
|
+
"""Test that API functions return expected structures."""
|
|
50
|
+
|
|
51
|
+
def setup_method(self):
|
|
52
|
+
import importlib
|
|
53
|
+
import seo_apis
|
|
54
|
+
importlib.reload(seo_apis)
|
|
55
|
+
self.seo_apis = seo_apis
|
|
56
|
+
|
|
57
|
+
def test_pagespeed_result_keys(self):
|
|
58
|
+
self.seo_apis.API_KEY = ""
|
|
59
|
+
result = self.seo_apis.run_pagespeed("https://example.com")
|
|
60
|
+
# Even on error, should have expected structure
|
|
61
|
+
assert "error" in result
|
|
62
|
+
|
|
63
|
+
def test_crux_result_keys(self):
|
|
64
|
+
self.seo_apis.API_KEY = ""
|
|
65
|
+
result = self.seo_apis.run_crux("https://example.com")
|
|
66
|
+
assert "error" in result
|
|
67
|
+
|
|
68
|
+
def test_run_all_structure(self):
|
|
69
|
+
self.seo_apis.API_KEY = ""
|
|
70
|
+
result = self.seo_apis.run_all("https://example.com")
|
|
71
|
+
assert "pagespeed" in result
|
|
72
|
+
assert "crux" in result
|
|
73
|
+
assert "mobile_friendly" in result
|
|
74
|
+
assert "url" in result
|
|
75
|
+
assert "timestamp" in result
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tests for seo_crawl.py โ URL normalization, link extraction, depth limiting.
|
|
3
|
+
|
|
4
|
+
Author: Laurent Rochetta
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import sys
|
|
8
|
+
import os
|
|
9
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "scripts"))
|
|
10
|
+
|
|
11
|
+
from seo_crawl import SEOCrawler
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class TestURLNormalization:
|
|
15
|
+
"""Test URL normalization for deduplication."""
|
|
16
|
+
|
|
17
|
+
def setup_method(self):
|
|
18
|
+
self.crawler = SEOCrawler("https://example.com", max_depth=2, max_pages=25)
|
|
19
|
+
|
|
20
|
+
def test_strips_trailing_slash(self):
|
|
21
|
+
assert self.crawler.normalize_url("https://example.com/page/") == "https://example.com/page"
|
|
22
|
+
|
|
23
|
+
def test_preserves_root(self):
|
|
24
|
+
assert self.crawler.normalize_url("https://example.com/") == "https://example.com/"
|
|
25
|
+
|
|
26
|
+
def test_normalizes_scheme(self):
|
|
27
|
+
result = self.crawler.normalize_url("https://example.com/page")
|
|
28
|
+
assert result.startswith("https://")
|
|
29
|
+
|
|
30
|
+
def test_deduplicates(self):
|
|
31
|
+
url1 = self.crawler.normalize_url("https://example.com/page/")
|
|
32
|
+
url2 = self.crawler.normalize_url("https://example.com/page")
|
|
33
|
+
assert url1 == url2
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class TestInternalDetection:
|
|
37
|
+
def setup_method(self):
|
|
38
|
+
self.crawler = SEOCrawler("https://example.com", max_depth=2, max_pages=25)
|
|
39
|
+
|
|
40
|
+
def test_internal_url(self):
|
|
41
|
+
assert self.crawler.is_internal("https://example.com/about") is True
|
|
42
|
+
|
|
43
|
+
def test_external_url(self):
|
|
44
|
+
assert self.crawler.is_internal("https://other.com/page") is False
|
|
45
|
+
|
|
46
|
+
def test_subdomain_is_external(self):
|
|
47
|
+
assert self.crawler.is_internal("https://blog.example.com/post") is False
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class TestLinkExtraction:
|
|
51
|
+
def setup_method(self):
|
|
52
|
+
self.crawler = SEOCrawler("https://example.com", max_depth=2, max_pages=25)
|
|
53
|
+
|
|
54
|
+
def test_extracts_internal_links(self):
|
|
55
|
+
html = '''
|
|
56
|
+
<a href="/about">About</a>
|
|
57
|
+
<a href="https://example.com/contact">Contact</a>
|
|
58
|
+
'''
|
|
59
|
+
links = self.crawler.extract_links(html, "https://example.com/")
|
|
60
|
+
assert len(links) == 2
|
|
61
|
+
|
|
62
|
+
def test_ignores_external_links(self):
|
|
63
|
+
html = '<a href="https://other.com/page">External</a>'
|
|
64
|
+
links = self.crawler.extract_links(html, "https://example.com/")
|
|
65
|
+
assert len(links) == 0
|
|
66
|
+
|
|
67
|
+
def test_ignores_anchors(self):
|
|
68
|
+
html = '<a href="#section">Anchor</a>'
|
|
69
|
+
links = self.crawler.extract_links(html, "https://example.com/")
|
|
70
|
+
assert len(links) == 0
|
|
71
|
+
|
|
72
|
+
def test_ignores_javascript(self):
|
|
73
|
+
html = '<a href="javascript:void(0)">JS Link</a>'
|
|
74
|
+
links = self.crawler.extract_links(html, "https://example.com/")
|
|
75
|
+
assert len(links) == 0
|
|
76
|
+
|
|
77
|
+
def test_ignores_mailto(self):
|
|
78
|
+
html = '<a href="mailto:test@example.com">Email</a>'
|
|
79
|
+
links = self.crawler.extract_links(html, "https://example.com/")
|
|
80
|
+
assert len(links) == 0
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class TestTitleExtraction:
|
|
84
|
+
def setup_method(self):
|
|
85
|
+
self.crawler = SEOCrawler("https://example.com")
|
|
86
|
+
|
|
87
|
+
def test_extracts_title(self):
|
|
88
|
+
html = "<html><head><title>Test Page</title></head><body></body></html>"
|
|
89
|
+
assert self.crawler.extract_title(html) == "Test Page"
|
|
90
|
+
|
|
91
|
+
def test_missing_title(self):
|
|
92
|
+
html = "<html><body></body></html>"
|
|
93
|
+
assert self.crawler.extract_title(html) is None
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class TestWordCount:
|
|
97
|
+
def setup_method(self):
|
|
98
|
+
self.crawler = SEOCrawler("https://example.com")
|
|
99
|
+
|
|
100
|
+
def test_counts_visible_words(self):
|
|
101
|
+
html = "<html><body><p>This is a test with seven words.</p></body></html>"
|
|
102
|
+
assert self.crawler.count_words(html) == 7
|
|
103
|
+
|
|
104
|
+
def test_excludes_script_content(self):
|
|
105
|
+
html = '<html><body><p>Visible</p><script>var hidden = true;</script></body></html>'
|
|
106
|
+
count = self.crawler.count_words(html)
|
|
107
|
+
assert count == 1 # Only "Visible"
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class TestCrawlerConfig:
|
|
111
|
+
def test_max_pages_respected(self):
|
|
112
|
+
crawler = SEOCrawler("https://example.com", max_pages=5)
|
|
113
|
+
assert crawler.max_pages == 5
|
|
114
|
+
|
|
115
|
+
def test_max_depth_respected(self):
|
|
116
|
+
crawler = SEOCrawler("https://example.com", max_depth=1)
|
|
117
|
+
assert crawler.max_depth == 1
|
|
118
|
+
|
|
119
|
+
def test_base_domain_extracted(self):
|
|
120
|
+
crawler = SEOCrawler("https://www.example.com/page")
|
|
121
|
+
assert crawler.base_domain == "www.example.com"
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tests for seo_fetch.py โ SSRF protection, URL handling, error cases.
|
|
3
|
+
|
|
4
|
+
Author: Laurent Rochetta
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import sys
|
|
8
|
+
import os
|
|
9
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "scripts"))
|
|
10
|
+
|
|
11
|
+
from seo_fetch import is_safe_url, fetch_page
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class TestSSRFProtection:
|
|
15
|
+
"""Test SSRF prevention blocks private/loopback/reserved IPs."""
|
|
16
|
+
|
|
17
|
+
def test_blocks_localhost(self):
|
|
18
|
+
assert is_safe_url("http://127.0.0.1/admin") is False
|
|
19
|
+
|
|
20
|
+
def test_blocks_private_10(self):
|
|
21
|
+
assert is_safe_url("http://10.0.0.1/secret") is False
|
|
22
|
+
|
|
23
|
+
def test_blocks_private_192(self):
|
|
24
|
+
assert is_safe_url("http://192.168.1.1/") is False
|
|
25
|
+
|
|
26
|
+
def test_blocks_private_172(self):
|
|
27
|
+
assert is_safe_url("http://172.16.0.1/") is False
|
|
28
|
+
|
|
29
|
+
def test_allows_public_ip(self):
|
|
30
|
+
assert is_safe_url("https://93.184.216.34/") is True
|
|
31
|
+
|
|
32
|
+
def test_allows_public_domain(self):
|
|
33
|
+
assert is_safe_url("https://example.com/") is True
|
|
34
|
+
|
|
35
|
+
def test_blocks_empty_hostname(self):
|
|
36
|
+
assert is_safe_url("http:///nohost") is False
|
|
37
|
+
|
|
38
|
+
def test_blocks_zero_ip(self):
|
|
39
|
+
assert is_safe_url("http://0.0.0.0/") is False
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class TestFetchPage:
|
|
43
|
+
"""Test fetch_page function behavior."""
|
|
44
|
+
|
|
45
|
+
def test_normalizes_url_without_scheme(self):
|
|
46
|
+
result = fetch_page("example.com", timeout=5)
|
|
47
|
+
assert result["url"] == "example.com"
|
|
48
|
+
# Should have attempted https://example.com
|
|
49
|
+
|
|
50
|
+
def test_blocks_invalid_scheme(self):
|
|
51
|
+
result = fetch_page("ftp://example.com/file")
|
|
52
|
+
assert result["error"] is not None
|
|
53
|
+
assert "Invalid URL scheme" in result["error"]
|
|
54
|
+
|
|
55
|
+
def test_blocks_ssrf(self):
|
|
56
|
+
result = fetch_page("http://127.0.0.1/admin")
|
|
57
|
+
assert result["error"] is not None
|
|
58
|
+
assert "Blocked" in result["error"]
|
|
59
|
+
|
|
60
|
+
def test_result_structure(self):
|
|
61
|
+
"""Verify the result dict has all expected keys."""
|
|
62
|
+
result = fetch_page("https://example.com", timeout=5)
|
|
63
|
+
expected_keys = {"url", "final_url", "status_code", "content", "headers",
|
|
64
|
+
"redirect_chain", "content_length", "response_time_ms", "error"}
|
|
65
|
+
assert expected_keys == set(result.keys())
|
|
66
|
+
|
|
67
|
+
def test_timeout_returns_error(self):
|
|
68
|
+
# Use a non-routable IP to force timeout
|
|
69
|
+
result = fetch_page("http://192.0.2.1/", timeout=1)
|
|
70
|
+
assert result["error"] is not None
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tests for seo_parse.py โ HTML parsing and SEO element extraction.
|
|
3
|
+
|
|
4
|
+
Author: Laurent Rochetta
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
import sys
|
|
10
|
+
|
|
11
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "scripts"))
|
|
12
|
+
|
|
13
|
+
from seo_parse import parse_html
|
|
14
|
+
|
|
15
|
+
FIXTURES_DIR = os.path.join(os.path.dirname(__file__), "fixtures")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def load_fixture(name: str) -> str:
|
|
19
|
+
with open(os.path.join(FIXTURES_DIR, name), "r", encoding="utf-8") as f:
|
|
20
|
+
return f.read()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class TestTitleParsing:
|
|
24
|
+
def test_extracts_title(self):
|
|
25
|
+
result = parse_html(load_fixture("sample_page.html"))
|
|
26
|
+
assert result["title"] == "SEO Test Page โ BMAD+ Fixture"
|
|
27
|
+
|
|
28
|
+
def test_title_length(self):
|
|
29
|
+
result = parse_html(load_fixture("sample_page.html"))
|
|
30
|
+
assert result["title_length"] == len("SEO Test Page โ BMAD+ Fixture")
|
|
31
|
+
|
|
32
|
+
def test_missing_title(self):
|
|
33
|
+
result = parse_html("<html><body><p>No title</p></body></html>")
|
|
34
|
+
assert result["title"] is None
|
|
35
|
+
assert result["title_length"] == 0
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class TestMetaTags:
|
|
39
|
+
def test_extracts_description(self):
|
|
40
|
+
result = parse_html(load_fixture("sample_page.html"))
|
|
41
|
+
assert "test page" in result["meta_description"].lower()
|
|
42
|
+
|
|
43
|
+
def test_extracts_robots(self):
|
|
44
|
+
result = parse_html(load_fixture("sample_page.html"))
|
|
45
|
+
assert result["meta_robots"] == "index, follow"
|
|
46
|
+
|
|
47
|
+
def test_extracts_viewport(self):
|
|
48
|
+
result = parse_html(load_fixture("sample_page.html"))
|
|
49
|
+
assert "width=device-width" in result["meta_viewport"]
|
|
50
|
+
|
|
51
|
+
def test_missing_description(self):
|
|
52
|
+
result = parse_html("<html><head><title>T</title></head><body></body></html>")
|
|
53
|
+
assert result["meta_description"] is None
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class TestCanonical:
|
|
57
|
+
def test_extracts_canonical(self):
|
|
58
|
+
result = parse_html(load_fixture("sample_page.html"))
|
|
59
|
+
assert result["canonical"] == "https://example.com/test"
|
|
60
|
+
|
|
61
|
+
def test_missing_canonical(self):
|
|
62
|
+
result = parse_html("<html><body></body></html>")
|
|
63
|
+
assert result["canonical"] is None
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class TestHeadings:
|
|
67
|
+
def test_h1_count(self):
|
|
68
|
+
result = parse_html(load_fixture("sample_page.html"))
|
|
69
|
+
assert len(result["headings"]["h1"]) == 1
|
|
70
|
+
|
|
71
|
+
def test_h2_count(self):
|
|
72
|
+
result = parse_html(load_fixture("sample_page.html"))
|
|
73
|
+
assert len(result["headings"]["h2"]) == 2
|
|
74
|
+
|
|
75
|
+
def test_h3_count(self):
|
|
76
|
+
result = parse_html(load_fixture("sample_page.html"))
|
|
77
|
+
assert len(result["headings"]["h3"]) == 1
|
|
78
|
+
|
|
79
|
+
def test_multiple_h1_detection(self):
|
|
80
|
+
html = "<html><body><h1>First</h1><h1>Second</h1></body></html>"
|
|
81
|
+
result = parse_html(html)
|
|
82
|
+
assert len(result["headings"]["h1"]) == 2
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class TestImages:
|
|
86
|
+
def test_image_count(self):
|
|
87
|
+
result = parse_html(load_fixture("sample_page.html"))
|
|
88
|
+
assert len(result["images"]) == 3
|
|
89
|
+
|
|
90
|
+
def test_image_with_alt(self):
|
|
91
|
+
result = parse_html(load_fixture("sample_page.html"))
|
|
92
|
+
hero = [i for i in result["images"] if "hero" in i["src"]]
|
|
93
|
+
assert len(hero) == 1
|
|
94
|
+
assert hero[0]["has_alt"] is True
|
|
95
|
+
assert hero[0]["alt"] == "Hero image for testing"
|
|
96
|
+
|
|
97
|
+
def test_image_without_alt(self):
|
|
98
|
+
result = parse_html(load_fixture("sample_page.html"))
|
|
99
|
+
no_alt = [i for i in result["images"] if "no-alt" in i["src"]]
|
|
100
|
+
assert len(no_alt) == 1
|
|
101
|
+
assert no_alt[0]["has_alt"] is False
|
|
102
|
+
|
|
103
|
+
def test_image_with_empty_alt(self):
|
|
104
|
+
result = parse_html(load_fixture("sample_page.html"))
|
|
105
|
+
empty = [i for i in result["images"] if "empty-alt" in i["src"]]
|
|
106
|
+
assert len(empty) == 1
|
|
107
|
+
assert empty[0]["has_alt"] is True
|
|
108
|
+
assert empty[0]["alt_empty"] is True
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class TestLinks:
|
|
112
|
+
def test_internal_links(self):
|
|
113
|
+
result = parse_html(load_fixture("sample_page.html"), base_url="https://example.com")
|
|
114
|
+
assert len(result["links"]["internal"]) >= 2
|
|
115
|
+
|
|
116
|
+
def test_external_links(self):
|
|
117
|
+
result = parse_html(load_fixture("sample_page.html"), base_url="https://example.com")
|
|
118
|
+
assert len(result["links"]["external"]) >= 1
|
|
119
|
+
|
|
120
|
+
def test_nofollow_detection(self):
|
|
121
|
+
result = parse_html(load_fixture("sample_page.html"), base_url="https://example.com")
|
|
122
|
+
nofollow = [l for l in result["links"]["external"] if l["is_nofollow"]]
|
|
123
|
+
assert len(nofollow) >= 1
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
class TestSchema:
|
|
127
|
+
def test_schema_block_count(self):
|
|
128
|
+
result = parse_html(load_fixture("sample_page.html"))
|
|
129
|
+
assert len(result["schema_blocks"]) == 2
|
|
130
|
+
|
|
131
|
+
def test_schema_types(self):
|
|
132
|
+
result = parse_html(load_fixture("sample_page.html"))
|
|
133
|
+
types = [s["type"] for s in result["schema_blocks"]]
|
|
134
|
+
assert "Organization" in types
|
|
135
|
+
assert "BreadcrumbList" in types
|
|
136
|
+
|
|
137
|
+
def test_schema_parse_error(self):
|
|
138
|
+
html = '<html><body><script type="application/ld+json">{invalid json}</script></body></html>'
|
|
139
|
+
result = parse_html(html)
|
|
140
|
+
assert len(result["schema_blocks"]) == 1
|
|
141
|
+
assert result["schema_blocks"][0]["type"] == "PARSE_ERROR"
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class TestOpenGraph:
|
|
145
|
+
def test_og_title(self):
|
|
146
|
+
result = parse_html(load_fixture("sample_page.html"))
|
|
147
|
+
assert result["open_graph"].get("og:title") == "SEO Test Page"
|
|
148
|
+
|
|
149
|
+
def test_og_type(self):
|
|
150
|
+
result = parse_html(load_fixture("sample_page.html"))
|
|
151
|
+
assert result["open_graph"].get("og:type") == "website"
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class TestHreflang:
|
|
155
|
+
def test_hreflang_count(self):
|
|
156
|
+
result = parse_html(load_fixture("sample_page.html"))
|
|
157
|
+
assert len(result["hreflang"]) == 3 # en, fr, x-default
|
|
158
|
+
|
|
159
|
+
def test_hreflang_languages(self):
|
|
160
|
+
result = parse_html(load_fixture("sample_page.html"))
|
|
161
|
+
langs = [h["lang"] for h in result["hreflang"]]
|
|
162
|
+
assert "en" in langs
|
|
163
|
+
assert "fr" in langs
|
|
164
|
+
assert "x-default" in langs
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
class TestContentMetrics:
|
|
168
|
+
def test_word_count_positive(self):
|
|
169
|
+
result = parse_html(load_fixture("sample_page.html"))
|
|
170
|
+
assert result["word_count"] > 30
|
|
171
|
+
|
|
172
|
+
def test_text_ratio_range(self):
|
|
173
|
+
result = parse_html(load_fixture("sample_page.html"))
|
|
174
|
+
assert 0 < result["text_ratio"] < 1
|
|
175
|
+
|
|
176
|
+
def test_has_lang_attr(self):
|
|
177
|
+
result = parse_html(load_fixture("sample_page.html"))
|
|
178
|
+
assert result["has_lang_attr"] is True
|
|
179
|
+
assert result["lang"] == "en"
|
|
180
|
+
|
|
181
|
+
def test_html_size(self):
|
|
182
|
+
html = load_fixture("sample_page.html")
|
|
183
|
+
result = parse_html(html)
|
|
184
|
+
assert result["html_size_bytes"] == len(html.encode("utf-8"))
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"$schema": "https://json.schemastore.org/package.json",
|
|
3
3
|
"name": "bmad-plus",
|
|
4
|
-
"version": "0.3.
|
|
4
|
+
"version": "0.3.3",
|
|
5
5
|
"description": "BMAD+ โ Augmented AI-Driven Development Framework with multi-role agents, autopilot, and parallel execution",
|
|
6
6
|
"keywords": [
|
|
7
7
|
"bmad",
|