bmad-plus 0.4.0 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +29 -0
- package/README.md +13 -56
- package/osint-agent-package/skills/bmad-osint-investigate/osint/SKILL.md +452 -452
- package/osint-agent-package/skills/bmad-osint-investigate/osint/assets/dossier-template.md +116 -116
- package/osint-agent-package/skills/bmad-osint-investigate/osint/references/content-extraction.md +100 -100
- package/osint-agent-package/skills/bmad-osint-investigate/osint/references/platforms.md +130 -130
- package/osint-agent-package/skills/bmad-osint-investigate/osint/references/psychoprofile.md +69 -69
- package/osint-agent-package/skills/bmad-osint-investigate/osint/references/tools.md +281 -281
- package/osint-agent-package/skills/bmad-osint-investigate/osint/scripts/mcp-client.py +136 -136
- package/package.json +1 -1
- package/readme-international/README.de.md +1 -1
- package/readme-international/README.es.md +1 -1
- package/readme-international/README.fr.md +1 -1
- package/tools/cli/commands/install.js +88 -59
- package/tools/cli/i18n.js +501 -0
- package/oveanet-pack/animated-website/DEPLOYMENT.md +0 -104
- package/oveanet-pack/animated-website/README.md +0 -63
- package/oveanet-pack/animated-website/agent.yaml +0 -63
- package/oveanet-pack/seo-audit-360/DEPLOYMENT.md +0 -115
- package/oveanet-pack/seo-audit-360/README.md +0 -66
- package/oveanet-pack/seo-audit-360/agent.yaml +0 -70
- package/oveanet-pack/seo-audit-360/extensions/google-analytics/EXTENSION.md +0 -79
- package/oveanet-pack/seo-audit-360/extensions/google-analytics/ga4_client.py +0 -200
- package/oveanet-pack/seo-audit-360/extensions/google-analytics/requirements.txt +0 -4
- package/oveanet-pack/seo-audit-360/extensions/google-search-console/EXTENSION.md +0 -109
- package/oveanet-pack/seo-audit-360/extensions/google-search-console/gsc_client.py +0 -186
- package/oveanet-pack/seo-audit-360/extensions/google-search-console/requirements.txt +0 -4
- package/oveanet-pack/seo-audit-360/hooks/seo-check.sh +0 -95
- package/oveanet-pack/seo-audit-360/requirements.txt +0 -14
- package/oveanet-pack/seo-audit-360/scripts/__pycache__/seo_crawl.cpython-314.pyc +0 -0
- package/oveanet-pack/seo-audit-360/scripts/__pycache__/seo_parse.cpython-314.pyc +0 -0
- package/oveanet-pack/seo-audit-360/scripts/install.ps1 +0 -53
- package/oveanet-pack/seo-audit-360/scripts/install.sh +0 -48
- package/oveanet-pack/seo-audit-360/scripts/seo_apis.py +0 -464
- package/oveanet-pack/seo-audit-360/scripts/seo_crawl.py +0 -282
- package/oveanet-pack/seo-audit-360/scripts/seo_fetch.py +0 -231
- package/oveanet-pack/seo-audit-360/scripts/seo_parse.py +0 -255
- package/oveanet-pack/seo-audit-360/scripts/seo_report.py +0 -403
- package/oveanet-pack/seo-audit-360/scripts/seo_screenshot.py +0 -202
- package/oveanet-pack/seo-audit-360/tests/__pycache__/test_crawl.cpython-314-pytest-9.0.2.pyc +0 -0
- package/oveanet-pack/seo-audit-360/tests/__pycache__/test_parse.cpython-314-pytest-9.0.2.pyc +0 -0
- package/oveanet-pack/seo-audit-360/tests/fixtures/sample_page.html +0 -62
- package/oveanet-pack/seo-audit-360/tests/test_apis.py +0 -75
- package/oveanet-pack/seo-audit-360/tests/test_crawl.py +0 -121
- package/oveanet-pack/seo-audit-360/tests/test_fetch.py +0 -70
- package/oveanet-pack/seo-audit-360/tests/test_parse.py +0 -184
- package/oveanet-pack/universal-backup/DEPLOYMENT.md +0 -80
- package/oveanet-pack/universal-backup/README.md +0 -58
- package/oveanet-pack/universal-backup/agent.yaml +0 -45
- /package/{oveanet-pack/animated-website/agent → src/bmad-plus/agents/pack-animated}/animated-website-agent.md +0 -0
- /package/{oveanet-pack/animated-website → src/bmad-plus/agents/pack-animated}/templates/animated-website-workflow.md +0 -0
- /package/{oveanet-pack/universal-backup/agent → src/bmad-plus/agents/pack-backup}/backup-agent.md +0 -0
- /package/{oveanet-pack/universal-backup → src/bmad-plus/agents/pack-backup}/templates/backup-workflow.md +0 -0
- /package/{oveanet-pack/seo-audit-360 → src/bmad-plus/agents/pack-seo}/SKILL.md +0 -0
- /package/{oveanet-pack/seo-audit-360 → src/bmad-plus/agents/pack-seo}/checklist.md +0 -0
- /package/{oveanet-pack/seo-audit-360 → src/bmad-plus/agents/pack-seo}/pagespeed-playbook.md +0 -0
- /package/{oveanet-pack/seo-audit-360 → src/bmad-plus/agents/pack-seo}/ref/audit-schema.json +0 -0
- /package/{oveanet-pack/seo-audit-360 → src/bmad-plus/agents/pack-seo}/ref/cwv-thresholds.md +0 -0
- /package/{oveanet-pack/seo-audit-360 → src/bmad-plus/agents/pack-seo}/ref/eeat-criteria.md +0 -0
- /package/{oveanet-pack/seo-audit-360 → src/bmad-plus/agents/pack-seo}/ref/geo-signals.md +0 -0
- /package/{oveanet-pack/seo-audit-360 → src/bmad-plus/agents/pack-seo}/ref/hreflang-rules.md +0 -0
- /package/{oveanet-pack/seo-audit-360 → src/bmad-plus/agents/pack-seo}/ref/quality-gates.md +0 -0
- /package/{oveanet-pack/seo-audit-360 → src/bmad-plus/agents/pack-seo}/ref/schema-catalog.md +0 -0
- /package/{oveanet-pack/seo-audit-360 → src/bmad-plus/agents/pack-seo}/ref/schema-templates.json +0 -0
- /package/{oveanet-pack/seo-audit-360/agent → src/bmad-plus/agents/pack-seo}/seo-chief.md +0 -0
- /package/{oveanet-pack/seo-audit-360/agent → src/bmad-plus/agents/pack-seo}/seo-judge.md +0 -0
- /package/{oveanet-pack/seo-audit-360/agent → src/bmad-plus/agents/pack-seo}/seo-scout.md +0 -0
- /package/{oveanet-pack/seo-audit-360 → src/bmad-plus/agents/pack-seo}/templates/seo-audit-workflow.md +0 -0
|
@@ -1,202 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
SEO Screenshot — Viewport screenshot capture for visual SEO analysis.
|
|
4
|
-
|
|
5
|
-
Features:
|
|
6
|
-
- Mobile and desktop viewport presets
|
|
7
|
-
- Above-the-fold element detection
|
|
8
|
-
- Full-page capture option
|
|
9
|
-
- PNG output with configurable quality
|
|
10
|
-
|
|
11
|
-
Requires: playwright (pip install playwright && playwright install chromium)
|
|
12
|
-
|
|
13
|
-
Author: Laurent Rochetta
|
|
14
|
-
License: MIT
|
|
15
|
-
"""
|
|
16
|
-
|
|
17
|
-
import argparse
|
|
18
|
-
import sys
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
VIEWPORTS = {
|
|
22
|
-
"mobile": {"width": 375, "height": 812, "device_scale_factor": 3, "is_mobile": True},
|
|
23
|
-
"tablet": {"width": 768, "height": 1024, "device_scale_factor": 2, "is_mobile": True},
|
|
24
|
-
"desktop": {"width": 1440, "height": 900, "device_scale_factor": 1, "is_mobile": False},
|
|
25
|
-
"desktop-hd": {"width": 1920, "height": 1080, "device_scale_factor": 1, "is_mobile": False},
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def capture_screenshot(
|
|
30
|
-
url: str,
|
|
31
|
-
output: str = "screenshot.png",
|
|
32
|
-
viewport: str = "desktop",
|
|
33
|
-
full_page: bool = False,
|
|
34
|
-
wait_ms: int = 2000,
|
|
35
|
-
):
|
|
36
|
-
"""
|
|
37
|
-
Capture a viewport screenshot of a URL using Playwright.
|
|
38
|
-
|
|
39
|
-
Args:
|
|
40
|
-
url: URL to capture
|
|
41
|
-
output: Output file path (.png)
|
|
42
|
-
viewport: Viewport preset (mobile, tablet, desktop, desktop-hd)
|
|
43
|
-
full_page: Capture full page scroll or just viewport
|
|
44
|
-
wait_ms: Wait time after page load (ms)
|
|
45
|
-
"""
|
|
46
|
-
try:
|
|
47
|
-
from playwright.sync_api import sync_playwright
|
|
48
|
-
except ImportError:
|
|
49
|
-
print(
|
|
50
|
-
"Error: playwright required.\n"
|
|
51
|
-
"Install: pip install playwright && playwright install chromium",
|
|
52
|
-
file=sys.stderr,
|
|
53
|
-
)
|
|
54
|
-
sys.exit(1)
|
|
55
|
-
|
|
56
|
-
vp = VIEWPORTS.get(viewport, VIEWPORTS["desktop"])
|
|
57
|
-
|
|
58
|
-
with sync_playwright() as p:
|
|
59
|
-
browser = p.chromium.launch(headless=True)
|
|
60
|
-
context = browser.new_context(
|
|
61
|
-
viewport={"width": vp["width"], "height": vp["height"]},
|
|
62
|
-
device_scale_factor=vp["device_scale_factor"],
|
|
63
|
-
is_mobile=vp["is_mobile"],
|
|
64
|
-
user_agent=(
|
|
65
|
-
"Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) "
|
|
66
|
-
"AppleWebKit/605.1.15 Mobile/15E148 Safari/604.1"
|
|
67
|
-
if vp["is_mobile"]
|
|
68
|
-
else "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
|
|
69
|
-
"(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 BMADSEOEngine/2.0"
|
|
70
|
-
),
|
|
71
|
-
)
|
|
72
|
-
|
|
73
|
-
page = context.new_page()
|
|
74
|
-
|
|
75
|
-
try:
|
|
76
|
-
page.goto(url, wait_until="networkidle", timeout=30000)
|
|
77
|
-
except Exception:
|
|
78
|
-
# Fallback: wait for load event instead
|
|
79
|
-
page.goto(url, wait_until="load", timeout=30000)
|
|
80
|
-
|
|
81
|
-
# Wait for dynamic content
|
|
82
|
-
page.wait_for_timeout(wait_ms)
|
|
83
|
-
|
|
84
|
-
# Capture screenshot
|
|
85
|
-
page.screenshot(path=output, full_page=full_page)
|
|
86
|
-
|
|
87
|
-
# Gather above-the-fold metrics
|
|
88
|
-
metrics = page.evaluate("""() => {
|
|
89
|
-
const viewportHeight = window.innerHeight;
|
|
90
|
-
const viewportWidth = window.innerWidth;
|
|
91
|
-
|
|
92
|
-
// Find CTAs above the fold
|
|
93
|
-
const ctas = [];
|
|
94
|
-
const buttons = document.querySelectorAll('a, button, [role="button"]');
|
|
95
|
-
buttons.forEach(el => {
|
|
96
|
-
const rect = el.getBoundingClientRect();
|
|
97
|
-
if (rect.top < viewportHeight && rect.bottom > 0) {
|
|
98
|
-
const text = el.textContent.trim().substring(0, 50);
|
|
99
|
-
if (text && (
|
|
100
|
-
/sign.?up|get.?start|try|buy|contact|demo|free|download|subscribe/i.test(text)
|
|
101
|
-
)) {
|
|
102
|
-
ctas.push({
|
|
103
|
-
text: text,
|
|
104
|
-
tag: el.tagName,
|
|
105
|
-
top: Math.round(rect.top),
|
|
106
|
-
visible: rect.width > 0 && rect.height > 0,
|
|
107
|
-
});
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
});
|
|
111
|
-
|
|
112
|
-
// Find hero/LCP candidate
|
|
113
|
-
const images = document.querySelectorAll('img');
|
|
114
|
-
let largestImage = null;
|
|
115
|
-
let largestArea = 0;
|
|
116
|
-
images.forEach(img => {
|
|
117
|
-
const rect = img.getBoundingClientRect();
|
|
118
|
-
const area = rect.width * rect.height;
|
|
119
|
-
if (area > largestArea && rect.top < viewportHeight) {
|
|
120
|
-
largestArea = area;
|
|
121
|
-
largestImage = {
|
|
122
|
-
src: img.src.substring(0, 100),
|
|
123
|
-
width: Math.round(rect.width),
|
|
124
|
-
height: Math.round(rect.height),
|
|
125
|
-
top: Math.round(rect.top),
|
|
126
|
-
};
|
|
127
|
-
}
|
|
128
|
-
});
|
|
129
|
-
|
|
130
|
-
// Check for horizontal scroll
|
|
131
|
-
const hasHorizontalScroll = document.documentElement.scrollWidth > viewportWidth;
|
|
132
|
-
|
|
133
|
-
// Font size check
|
|
134
|
-
const body = document.body;
|
|
135
|
-
const bodyFontSize = body ? parseFloat(getComputedStyle(body).fontSize) : 16;
|
|
136
|
-
|
|
137
|
-
return {
|
|
138
|
-
viewportWidth,
|
|
139
|
-
viewportHeight,
|
|
140
|
-
ctas_above_fold: ctas.length,
|
|
141
|
-
cta_details: ctas.slice(0, 5),
|
|
142
|
-
largest_image_above_fold: largestImage,
|
|
143
|
-
has_horizontal_scroll: hasHorizontalScroll,
|
|
144
|
-
body_font_size_px: bodyFontSize,
|
|
145
|
-
dom_element_count: document.querySelectorAll('*').length,
|
|
146
|
-
};
|
|
147
|
-
}""")
|
|
148
|
-
|
|
149
|
-
browser.close()
|
|
150
|
-
|
|
151
|
-
return metrics
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
# ── CLI ────────────────────────────────────────────────────────────
|
|
155
|
-
|
|
156
|
-
def main():
|
|
157
|
-
parser = argparse.ArgumentParser(
|
|
158
|
-
description="SEO Screenshot — Viewport capture (BMAD+ SEO Engine)"
|
|
159
|
-
)
|
|
160
|
-
parser.add_argument("url", help="URL to capture")
|
|
161
|
-
parser.add_argument("--output", "-o", default="screenshot.png", help="Output file path")
|
|
162
|
-
parser.add_argument(
|
|
163
|
-
"--viewport", "-v",
|
|
164
|
-
choices=list(VIEWPORTS.keys()), default="desktop",
|
|
165
|
-
help="Viewport preset"
|
|
166
|
-
)
|
|
167
|
-
parser.add_argument("--full", action="store_true", help="Capture full page (not just viewport)")
|
|
168
|
-
parser.add_argument("--wait", "-w", type=int, default=2000, help="Wait after load (ms)")
|
|
169
|
-
parser.add_argument("--json", "-j", action="store_true", help="Output metrics as JSON")
|
|
170
|
-
|
|
171
|
-
args = parser.parse_args()
|
|
172
|
-
|
|
173
|
-
import json
|
|
174
|
-
|
|
175
|
-
metrics = capture_screenshot(
|
|
176
|
-
url=args.url,
|
|
177
|
-
output=args.output,
|
|
178
|
-
viewport=args.viewport,
|
|
179
|
-
full_page=args.full,
|
|
180
|
-
wait_ms=args.wait,
|
|
181
|
-
)
|
|
182
|
-
|
|
183
|
-
print(f"Screenshot saved: {args.output}", file=sys.stderr)
|
|
184
|
-
|
|
185
|
-
if args.json:
|
|
186
|
-
print(json.dumps(metrics, indent=2))
|
|
187
|
-
else:
|
|
188
|
-
print(f"\nAbove-the-Fold Analysis ({args.viewport}):")
|
|
189
|
-
print(f" Viewport: {metrics['viewportWidth']}×{metrics['viewportHeight']}")
|
|
190
|
-
print(f" CTAs above fold: {metrics['ctas_above_fold']}")
|
|
191
|
-
for cta in metrics.get("cta_details", []):
|
|
192
|
-
print(f" - \"{cta['text']}\" ({cta['tag']}, top: {cta['top']}px)")
|
|
193
|
-
if metrics.get("largest_image_above_fold"):
|
|
194
|
-
img = metrics["largest_image_above_fold"]
|
|
195
|
-
print(f" Largest image: {img['width']}×{img['height']} at y={img['top']}px")
|
|
196
|
-
print(f" Horizontal scroll: {'⚠️ YES' if metrics['has_horizontal_scroll'] else '✅ No'}")
|
|
197
|
-
print(f" Body font size: {metrics['body_font_size_px']}px {'✅' if metrics['body_font_size_px'] >= 16 else '⚠️ <16px'}")
|
|
198
|
-
print(f" DOM elements: {metrics['dom_element_count']:,}")
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
if __name__ == "__main__":
|
|
202
|
-
main()
|
package/oveanet-pack/seo-audit-360/tests/__pycache__/test_crawl.cpython-314-pytest-9.0.2.pyc
DELETED
|
Binary file
|
package/oveanet-pack/seo-audit-360/tests/__pycache__/test_parse.cpython-314-pytest-9.0.2.pyc
DELETED
|
Binary file
|
|
@@ -1,62 +0,0 @@
|
|
|
1
|
-
<!DOCTYPE html>
|
|
2
|
-
<html lang="en">
|
|
3
|
-
<head>
|
|
4
|
-
<meta charset="UTF-8">
|
|
5
|
-
<title>SEO Test Page — BMAD+ Fixture</title>
|
|
6
|
-
<meta name="description" content="A test page for validating the SEO parse module with known elements.">
|
|
7
|
-
<meta name="robots" content="index, follow">
|
|
8
|
-
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
9
|
-
<meta property="og:title" content="SEO Test Page">
|
|
10
|
-
<meta property="og:type" content="website">
|
|
11
|
-
<meta property="og:url" content="https://example.com/test">
|
|
12
|
-
<meta name="twitter:card" content="summary_large_image">
|
|
13
|
-
<meta name="twitter:title" content="SEO Test Page">
|
|
14
|
-
<link rel="canonical" href="https://example.com/test">
|
|
15
|
-
<link rel="alternate" hreflang="en" href="https://example.com/en/test">
|
|
16
|
-
<link rel="alternate" hreflang="fr" href="https://example.com/fr/test">
|
|
17
|
-
<link rel="alternate" hreflang="x-default" href="https://example.com/test">
|
|
18
|
-
</head>
|
|
19
|
-
<body>
|
|
20
|
-
<h1>Main Heading of the Page</h1>
|
|
21
|
-
<p>This is a test paragraph with enough words to verify word count functionality in the parser module. We need at least a few sentences to make the test meaningful and realistic.</p>
|
|
22
|
-
|
|
23
|
-
<h2>Second Level Heading One</h2>
|
|
24
|
-
<p>Content under the first H2. This paragraph adds more text to increase the word count.</p>
|
|
25
|
-
|
|
26
|
-
<h2>Second Level Heading Two</h2>
|
|
27
|
-
<p>Another section with different content about SEO analysis and testing.</p>
|
|
28
|
-
|
|
29
|
-
<h3>Third Level Heading</h3>
|
|
30
|
-
<p>Detailed information under the H3 heading for testing hierarchy detection.</p>
|
|
31
|
-
|
|
32
|
-
<img src="/images/hero.jpg" alt="Hero image for testing" width="800" height="400" loading="lazy">
|
|
33
|
-
<img src="/images/no-alt.jpg" width="200" height="200">
|
|
34
|
-
<img src="/images/empty-alt.jpg" alt="" width="100" height="100">
|
|
35
|
-
|
|
36
|
-
<a href="https://example.com/about">About Us</a>
|
|
37
|
-
<a href="https://example.com/services">Our Services</a>
|
|
38
|
-
<a href="https://external.com/partner" rel="nofollow" target="_blank">Partner Link</a>
|
|
39
|
-
<a href="/relative-link">Relative Link</a>
|
|
40
|
-
|
|
41
|
-
<script type="application/ld+json">
|
|
42
|
-
{
|
|
43
|
-
"@context": "https://schema.org",
|
|
44
|
-
"@type": "Organization",
|
|
45
|
-
"name": "Test Company",
|
|
46
|
-
"url": "https://example.com",
|
|
47
|
-
"logo": "https://example.com/logo.png"
|
|
48
|
-
}
|
|
49
|
-
</script>
|
|
50
|
-
|
|
51
|
-
<script type="application/ld+json">
|
|
52
|
-
{
|
|
53
|
-
"@context": "https://schema.org",
|
|
54
|
-
"@type": "BreadcrumbList",
|
|
55
|
-
"itemListElement": [
|
|
56
|
-
{"@type": "ListItem", "position": 1, "name": "Home", "item": "https://example.com"},
|
|
57
|
-
{"@type": "ListItem", "position": 2, "name": "Test", "item": "https://example.com/test"}
|
|
58
|
-
]
|
|
59
|
-
}
|
|
60
|
-
</script>
|
|
61
|
-
</body>
|
|
62
|
-
</html>
|
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Tests for seo_apis.py — API response parsing and error handling.
|
|
3
|
-
|
|
4
|
-
Author: Laurent Rochetta
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import sys
|
|
8
|
-
import os
|
|
9
|
-
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "scripts"))
|
|
10
|
-
|
|
11
|
-
# Temporarily unset API key for error tests
|
|
12
|
-
original_key = os.environ.get("GOOGLE_API_KEY", "")
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class TestAPIKeyMissing:
|
|
16
|
-
"""Test behavior when GOOGLE_API_KEY is not set."""
|
|
17
|
-
|
|
18
|
-
def setup_method(self):
|
|
19
|
-
os.environ.pop("GOOGLE_API_KEY", None)
|
|
20
|
-
# Reimport to pick up empty key
|
|
21
|
-
import importlib
|
|
22
|
-
import seo_apis
|
|
23
|
-
importlib.reload(seo_apis)
|
|
24
|
-
self.seo_apis = seo_apis
|
|
25
|
-
|
|
26
|
-
def teardown_method(self):
|
|
27
|
-
if original_key:
|
|
28
|
-
os.environ["GOOGLE_API_KEY"] = original_key
|
|
29
|
-
|
|
30
|
-
def test_pagespeed_without_key(self):
|
|
31
|
-
# Force the module to use an empty key
|
|
32
|
-
self.seo_apis.API_KEY = ""
|
|
33
|
-
result = self.seo_apis.run_pagespeed("https://example.com")
|
|
34
|
-
assert result.get("error") is not None
|
|
35
|
-
assert "GOOGLE_API_KEY" in result["error"]
|
|
36
|
-
|
|
37
|
-
def test_crux_without_key(self):
|
|
38
|
-
self.seo_apis.API_KEY = ""
|
|
39
|
-
result = self.seo_apis.run_crux("https://example.com")
|
|
40
|
-
assert result.get("error") is not None
|
|
41
|
-
|
|
42
|
-
def test_rich_results_without_key(self):
|
|
43
|
-
self.seo_apis.API_KEY = ""
|
|
44
|
-
result = self.seo_apis.run_rich_results_test("https://example.com")
|
|
45
|
-
assert result.get("error") is not None
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
class TestResultStructure:
|
|
49
|
-
"""Test that API functions return expected structures."""
|
|
50
|
-
|
|
51
|
-
def setup_method(self):
|
|
52
|
-
import importlib
|
|
53
|
-
import seo_apis
|
|
54
|
-
importlib.reload(seo_apis)
|
|
55
|
-
self.seo_apis = seo_apis
|
|
56
|
-
|
|
57
|
-
def test_pagespeed_result_keys(self):
|
|
58
|
-
self.seo_apis.API_KEY = ""
|
|
59
|
-
result = self.seo_apis.run_pagespeed("https://example.com")
|
|
60
|
-
# Even on error, should have expected structure
|
|
61
|
-
assert "error" in result
|
|
62
|
-
|
|
63
|
-
def test_crux_result_keys(self):
|
|
64
|
-
self.seo_apis.API_KEY = ""
|
|
65
|
-
result = self.seo_apis.run_crux("https://example.com")
|
|
66
|
-
assert "error" in result
|
|
67
|
-
|
|
68
|
-
def test_run_all_structure(self):
|
|
69
|
-
self.seo_apis.API_KEY = ""
|
|
70
|
-
result = self.seo_apis.run_all("https://example.com")
|
|
71
|
-
assert "pagespeed" in result
|
|
72
|
-
assert "crux" in result
|
|
73
|
-
assert "mobile_friendly" in result
|
|
74
|
-
assert "url" in result
|
|
75
|
-
assert "timestamp" in result
|
|
@@ -1,121 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Tests for seo_crawl.py — URL normalization, link extraction, depth limiting.
|
|
3
|
-
|
|
4
|
-
Author: Laurent Rochetta
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import sys
|
|
8
|
-
import os
|
|
9
|
-
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "scripts"))
|
|
10
|
-
|
|
11
|
-
from seo_crawl import SEOCrawler
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class TestURLNormalization:
|
|
15
|
-
"""Test URL normalization for deduplication."""
|
|
16
|
-
|
|
17
|
-
def setup_method(self):
|
|
18
|
-
self.crawler = SEOCrawler("https://example.com", max_depth=2, max_pages=25)
|
|
19
|
-
|
|
20
|
-
def test_strips_trailing_slash(self):
|
|
21
|
-
assert self.crawler.normalize_url("https://example.com/page/") == "https://example.com/page"
|
|
22
|
-
|
|
23
|
-
def test_preserves_root(self):
|
|
24
|
-
assert self.crawler.normalize_url("https://example.com/") == "https://example.com/"
|
|
25
|
-
|
|
26
|
-
def test_normalizes_scheme(self):
|
|
27
|
-
result = self.crawler.normalize_url("https://example.com/page")
|
|
28
|
-
assert result.startswith("https://")
|
|
29
|
-
|
|
30
|
-
def test_deduplicates(self):
|
|
31
|
-
url1 = self.crawler.normalize_url("https://example.com/page/")
|
|
32
|
-
url2 = self.crawler.normalize_url("https://example.com/page")
|
|
33
|
-
assert url1 == url2
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
class TestInternalDetection:
|
|
37
|
-
def setup_method(self):
|
|
38
|
-
self.crawler = SEOCrawler("https://example.com", max_depth=2, max_pages=25)
|
|
39
|
-
|
|
40
|
-
def test_internal_url(self):
|
|
41
|
-
assert self.crawler.is_internal("https://example.com/about") is True
|
|
42
|
-
|
|
43
|
-
def test_external_url(self):
|
|
44
|
-
assert self.crawler.is_internal("https://other.com/page") is False
|
|
45
|
-
|
|
46
|
-
def test_subdomain_is_external(self):
|
|
47
|
-
assert self.crawler.is_internal("https://blog.example.com/post") is False
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
class TestLinkExtraction:
|
|
51
|
-
def setup_method(self):
|
|
52
|
-
self.crawler = SEOCrawler("https://example.com", max_depth=2, max_pages=25)
|
|
53
|
-
|
|
54
|
-
def test_extracts_internal_links(self):
|
|
55
|
-
html = '''
|
|
56
|
-
<a href="/about">About</a>
|
|
57
|
-
<a href="https://example.com/contact">Contact</a>
|
|
58
|
-
'''
|
|
59
|
-
links = self.crawler.extract_links(html, "https://example.com/")
|
|
60
|
-
assert len(links) == 2
|
|
61
|
-
|
|
62
|
-
def test_ignores_external_links(self):
|
|
63
|
-
html = '<a href="https://other.com/page">External</a>'
|
|
64
|
-
links = self.crawler.extract_links(html, "https://example.com/")
|
|
65
|
-
assert len(links) == 0
|
|
66
|
-
|
|
67
|
-
def test_ignores_anchors(self):
|
|
68
|
-
html = '<a href="#section">Anchor</a>'
|
|
69
|
-
links = self.crawler.extract_links(html, "https://example.com/")
|
|
70
|
-
assert len(links) == 0
|
|
71
|
-
|
|
72
|
-
def test_ignores_javascript(self):
|
|
73
|
-
html = '<a href="javascript:void(0)">JS Link</a>'
|
|
74
|
-
links = self.crawler.extract_links(html, "https://example.com/")
|
|
75
|
-
assert len(links) == 0
|
|
76
|
-
|
|
77
|
-
def test_ignores_mailto(self):
|
|
78
|
-
html = '<a href="mailto:test@example.com">Email</a>'
|
|
79
|
-
links = self.crawler.extract_links(html, "https://example.com/")
|
|
80
|
-
assert len(links) == 0
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
class TestTitleExtraction:
|
|
84
|
-
def setup_method(self):
|
|
85
|
-
self.crawler = SEOCrawler("https://example.com")
|
|
86
|
-
|
|
87
|
-
def test_extracts_title(self):
|
|
88
|
-
html = "<html><head><title>Test Page</title></head><body></body></html>"
|
|
89
|
-
assert self.crawler.extract_title(html) == "Test Page"
|
|
90
|
-
|
|
91
|
-
def test_missing_title(self):
|
|
92
|
-
html = "<html><body></body></html>"
|
|
93
|
-
assert self.crawler.extract_title(html) is None
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
class TestWordCount:
|
|
97
|
-
def setup_method(self):
|
|
98
|
-
self.crawler = SEOCrawler("https://example.com")
|
|
99
|
-
|
|
100
|
-
def test_counts_visible_words(self):
|
|
101
|
-
html = "<html><body><p>This is a test with seven words.</p></body></html>"
|
|
102
|
-
assert self.crawler.count_words(html) == 7
|
|
103
|
-
|
|
104
|
-
def test_excludes_script_content(self):
|
|
105
|
-
html = '<html><body><p>Visible</p><script>var hidden = true;</script></body></html>'
|
|
106
|
-
count = self.crawler.count_words(html)
|
|
107
|
-
assert count == 1 # Only "Visible"
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
class TestCrawlerConfig:
|
|
111
|
-
def test_max_pages_respected(self):
|
|
112
|
-
crawler = SEOCrawler("https://example.com", max_pages=5)
|
|
113
|
-
assert crawler.max_pages == 5
|
|
114
|
-
|
|
115
|
-
def test_max_depth_respected(self):
|
|
116
|
-
crawler = SEOCrawler("https://example.com", max_depth=1)
|
|
117
|
-
assert crawler.max_depth == 1
|
|
118
|
-
|
|
119
|
-
def test_base_domain_extracted(self):
|
|
120
|
-
crawler = SEOCrawler("https://www.example.com/page")
|
|
121
|
-
assert crawler.base_domain == "www.example.com"
|
|
@@ -1,70 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Tests for seo_fetch.py — SSRF protection, URL handling, error cases.
|
|
3
|
-
|
|
4
|
-
Author: Laurent Rochetta
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import sys
|
|
8
|
-
import os
|
|
9
|
-
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "scripts"))
|
|
10
|
-
|
|
11
|
-
from seo_fetch import is_safe_url, fetch_page
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class TestSSRFProtection:
|
|
15
|
-
"""Test SSRF prevention blocks private/loopback/reserved IPs."""
|
|
16
|
-
|
|
17
|
-
def test_blocks_localhost(self):
|
|
18
|
-
assert is_safe_url("http://127.0.0.1/admin") is False
|
|
19
|
-
|
|
20
|
-
def test_blocks_private_10(self):
|
|
21
|
-
assert is_safe_url("http://10.0.0.1/secret") is False
|
|
22
|
-
|
|
23
|
-
def test_blocks_private_192(self):
|
|
24
|
-
assert is_safe_url("http://192.168.1.1/") is False
|
|
25
|
-
|
|
26
|
-
def test_blocks_private_172(self):
|
|
27
|
-
assert is_safe_url("http://172.16.0.1/") is False
|
|
28
|
-
|
|
29
|
-
def test_allows_public_ip(self):
|
|
30
|
-
assert is_safe_url("https://93.184.216.34/") is True
|
|
31
|
-
|
|
32
|
-
def test_allows_public_domain(self):
|
|
33
|
-
assert is_safe_url("https://example.com/") is True
|
|
34
|
-
|
|
35
|
-
def test_blocks_empty_hostname(self):
|
|
36
|
-
assert is_safe_url("http:///nohost") is False
|
|
37
|
-
|
|
38
|
-
def test_blocks_zero_ip(self):
|
|
39
|
-
assert is_safe_url("http://0.0.0.0/") is False
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
class TestFetchPage:
|
|
43
|
-
"""Test fetch_page function behavior."""
|
|
44
|
-
|
|
45
|
-
def test_normalizes_url_without_scheme(self):
|
|
46
|
-
result = fetch_page("example.com", timeout=5)
|
|
47
|
-
assert result["url"] == "example.com"
|
|
48
|
-
# Should have attempted https://example.com
|
|
49
|
-
|
|
50
|
-
def test_blocks_invalid_scheme(self):
|
|
51
|
-
result = fetch_page("ftp://example.com/file")
|
|
52
|
-
assert result["error"] is not None
|
|
53
|
-
assert "Invalid URL scheme" in result["error"]
|
|
54
|
-
|
|
55
|
-
def test_blocks_ssrf(self):
|
|
56
|
-
result = fetch_page("http://127.0.0.1/admin")
|
|
57
|
-
assert result["error"] is not None
|
|
58
|
-
assert "Blocked" in result["error"]
|
|
59
|
-
|
|
60
|
-
def test_result_structure(self):
|
|
61
|
-
"""Verify the result dict has all expected keys."""
|
|
62
|
-
result = fetch_page("https://example.com", timeout=5)
|
|
63
|
-
expected_keys = {"url", "final_url", "status_code", "content", "headers",
|
|
64
|
-
"redirect_chain", "content_length", "response_time_ms", "error"}
|
|
65
|
-
assert expected_keys == set(result.keys())
|
|
66
|
-
|
|
67
|
-
def test_timeout_returns_error(self):
|
|
68
|
-
# Use a non-routable IP to force timeout
|
|
69
|
-
result = fetch_page("http://192.0.2.1/", timeout=1)
|
|
70
|
-
assert result["error"] is not None
|