bmad-plus 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/CHANGELOG.md +29 -0
  2. package/README.md +13 -56
  3. package/osint-agent-package/skills/bmad-osint-investigate/osint/SKILL.md +452 -452
  4. package/osint-agent-package/skills/bmad-osint-investigate/osint/assets/dossier-template.md +116 -116
  5. package/osint-agent-package/skills/bmad-osint-investigate/osint/references/content-extraction.md +100 -100
  6. package/osint-agent-package/skills/bmad-osint-investigate/osint/references/platforms.md +130 -130
  7. package/osint-agent-package/skills/bmad-osint-investigate/osint/references/psychoprofile.md +69 -69
  8. package/osint-agent-package/skills/bmad-osint-investigate/osint/references/tools.md +281 -281
  9. package/osint-agent-package/skills/bmad-osint-investigate/osint/scripts/mcp-client.py +136 -136
  10. package/package.json +1 -1
  11. package/readme-international/README.de.md +1 -1
  12. package/readme-international/README.es.md +1 -1
  13. package/readme-international/README.fr.md +1 -1
  14. package/tools/cli/commands/install.js +88 -59
  15. package/tools/cli/i18n.js +501 -0
  16. package/oveanet-pack/animated-website/DEPLOYMENT.md +0 -104
  17. package/oveanet-pack/animated-website/README.md +0 -63
  18. package/oveanet-pack/animated-website/agent.yaml +0 -63
  19. package/oveanet-pack/seo-audit-360/DEPLOYMENT.md +0 -115
  20. package/oveanet-pack/seo-audit-360/README.md +0 -66
  21. package/oveanet-pack/seo-audit-360/agent.yaml +0 -70
  22. package/oveanet-pack/seo-audit-360/extensions/google-analytics/EXTENSION.md +0 -79
  23. package/oveanet-pack/seo-audit-360/extensions/google-analytics/ga4_client.py +0 -200
  24. package/oveanet-pack/seo-audit-360/extensions/google-analytics/requirements.txt +0 -4
  25. package/oveanet-pack/seo-audit-360/extensions/google-search-console/EXTENSION.md +0 -109
  26. package/oveanet-pack/seo-audit-360/extensions/google-search-console/gsc_client.py +0 -186
  27. package/oveanet-pack/seo-audit-360/extensions/google-search-console/requirements.txt +0 -4
  28. package/oveanet-pack/seo-audit-360/hooks/seo-check.sh +0 -95
  29. package/oveanet-pack/seo-audit-360/requirements.txt +0 -14
  30. package/oveanet-pack/seo-audit-360/scripts/__pycache__/seo_crawl.cpython-314.pyc +0 -0
  31. package/oveanet-pack/seo-audit-360/scripts/__pycache__/seo_parse.cpython-314.pyc +0 -0
  32. package/oveanet-pack/seo-audit-360/scripts/install.ps1 +0 -53
  33. package/oveanet-pack/seo-audit-360/scripts/install.sh +0 -48
  34. package/oveanet-pack/seo-audit-360/scripts/seo_apis.py +0 -464
  35. package/oveanet-pack/seo-audit-360/scripts/seo_crawl.py +0 -282
  36. package/oveanet-pack/seo-audit-360/scripts/seo_fetch.py +0 -231
  37. package/oveanet-pack/seo-audit-360/scripts/seo_parse.py +0 -255
  38. package/oveanet-pack/seo-audit-360/scripts/seo_report.py +0 -403
  39. package/oveanet-pack/seo-audit-360/scripts/seo_screenshot.py +0 -202
  40. package/oveanet-pack/seo-audit-360/tests/__pycache__/test_crawl.cpython-314-pytest-9.0.2.pyc +0 -0
  41. package/oveanet-pack/seo-audit-360/tests/__pycache__/test_parse.cpython-314-pytest-9.0.2.pyc +0 -0
  42. package/oveanet-pack/seo-audit-360/tests/fixtures/sample_page.html +0 -62
  43. package/oveanet-pack/seo-audit-360/tests/test_apis.py +0 -75
  44. package/oveanet-pack/seo-audit-360/tests/test_crawl.py +0 -121
  45. package/oveanet-pack/seo-audit-360/tests/test_fetch.py +0 -70
  46. package/oveanet-pack/seo-audit-360/tests/test_parse.py +0 -184
  47. package/oveanet-pack/universal-backup/DEPLOYMENT.md +0 -80
  48. package/oveanet-pack/universal-backup/README.md +0 -58
  49. package/oveanet-pack/universal-backup/agent.yaml +0 -45
  50. /package/{oveanet-pack/animated-website/agent → src/bmad-plus/agents/pack-animated}/animated-website-agent.md +0 -0
  51. /package/{oveanet-pack/animated-website → src/bmad-plus/agents/pack-animated}/templates/animated-website-workflow.md +0 -0
  52. /package/{oveanet-pack/universal-backup/agent → src/bmad-plus/agents/pack-backup}/backup-agent.md +0 -0
  53. /package/{oveanet-pack/universal-backup → src/bmad-plus/agents/pack-backup}/templates/backup-workflow.md +0 -0
  54. /package/{oveanet-pack/seo-audit-360 → src/bmad-plus/agents/pack-seo}/SKILL.md +0 -0
  55. /package/{oveanet-pack/seo-audit-360 → src/bmad-plus/agents/pack-seo}/checklist.md +0 -0
  56. /package/{oveanet-pack/seo-audit-360 → src/bmad-plus/agents/pack-seo}/pagespeed-playbook.md +0 -0
  57. /package/{oveanet-pack/seo-audit-360 → src/bmad-plus/agents/pack-seo}/ref/audit-schema.json +0 -0
  58. /package/{oveanet-pack/seo-audit-360 → src/bmad-plus/agents/pack-seo}/ref/cwv-thresholds.md +0 -0
  59. /package/{oveanet-pack/seo-audit-360 → src/bmad-plus/agents/pack-seo}/ref/eeat-criteria.md +0 -0
  60. /package/{oveanet-pack/seo-audit-360 → src/bmad-plus/agents/pack-seo}/ref/geo-signals.md +0 -0
  61. /package/{oveanet-pack/seo-audit-360 → src/bmad-plus/agents/pack-seo}/ref/hreflang-rules.md +0 -0
  62. /package/{oveanet-pack/seo-audit-360 → src/bmad-plus/agents/pack-seo}/ref/quality-gates.md +0 -0
  63. /package/{oveanet-pack/seo-audit-360 → src/bmad-plus/agents/pack-seo}/ref/schema-catalog.md +0 -0
  64. /package/{oveanet-pack/seo-audit-360 → src/bmad-plus/agents/pack-seo}/ref/schema-templates.json +0 -0
  65. /package/{oveanet-pack/seo-audit-360/agent → src/bmad-plus/agents/pack-seo}/seo-chief.md +0 -0
  66. /package/{oveanet-pack/seo-audit-360/agent → src/bmad-plus/agents/pack-seo}/seo-judge.md +0 -0
  67. /package/{oveanet-pack/seo-audit-360/agent → src/bmad-plus/agents/pack-seo}/seo-scout.md +0 -0
  68. /package/{oveanet-pack/seo-audit-360 → src/bmad-plus/agents/pack-seo}/templates/seo-audit-workflow.md +0 -0
@@ -1,202 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- SEO Screenshot — Viewport screenshot capture for visual SEO analysis.
4
-
5
- Features:
6
- - Mobile and desktop viewport presets
7
- - Above-the-fold element detection
8
- - Full-page capture option
9
- - PNG output with configurable quality
10
-
11
- Requires: playwright (pip install playwright && playwright install chromium)
12
-
13
- Author: Laurent Rochetta
14
- License: MIT
15
- """
16
-
17
- import argparse
18
- import sys
19
-
20
-
21
- VIEWPORTS = {
22
- "mobile": {"width": 375, "height": 812, "device_scale_factor": 3, "is_mobile": True},
23
- "tablet": {"width": 768, "height": 1024, "device_scale_factor": 2, "is_mobile": True},
24
- "desktop": {"width": 1440, "height": 900, "device_scale_factor": 1, "is_mobile": False},
25
- "desktop-hd": {"width": 1920, "height": 1080, "device_scale_factor": 1, "is_mobile": False},
26
- }
27
-
28
-
29
- def capture_screenshot(
30
- url: str,
31
- output: str = "screenshot.png",
32
- viewport: str = "desktop",
33
- full_page: bool = False,
34
- wait_ms: int = 2000,
35
- ):
36
- """
37
- Capture a viewport screenshot of a URL using Playwright.
38
-
39
- Args:
40
- url: URL to capture
41
- output: Output file path (.png)
42
- viewport: Viewport preset (mobile, tablet, desktop, desktop-hd)
43
- full_page: Capture full page scroll or just viewport
44
- wait_ms: Wait time after page load (ms)
45
- """
46
- try:
47
- from playwright.sync_api import sync_playwright
48
- except ImportError:
49
- print(
50
- "Error: playwright required.\n"
51
- "Install: pip install playwright && playwright install chromium",
52
- file=sys.stderr,
53
- )
54
- sys.exit(1)
55
-
56
- vp = VIEWPORTS.get(viewport, VIEWPORTS["desktop"])
57
-
58
- with sync_playwright() as p:
59
- browser = p.chromium.launch(headless=True)
60
- context = browser.new_context(
61
- viewport={"width": vp["width"], "height": vp["height"]},
62
- device_scale_factor=vp["device_scale_factor"],
63
- is_mobile=vp["is_mobile"],
64
- user_agent=(
65
- "Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) "
66
- "AppleWebKit/605.1.15 Mobile/15E148 Safari/604.1"
67
- if vp["is_mobile"]
68
- else "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
69
- "(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 BMADSEOEngine/2.0"
70
- ),
71
- )
72
-
73
- page = context.new_page()
74
-
75
- try:
76
- page.goto(url, wait_until="networkidle", timeout=30000)
77
- except Exception:
78
- # Fallback: wait for load event instead
79
- page.goto(url, wait_until="load", timeout=30000)
80
-
81
- # Wait for dynamic content
82
- page.wait_for_timeout(wait_ms)
83
-
84
- # Capture screenshot
85
- page.screenshot(path=output, full_page=full_page)
86
-
87
- # Gather above-the-fold metrics
88
- metrics = page.evaluate("""() => {
89
- const viewportHeight = window.innerHeight;
90
- const viewportWidth = window.innerWidth;
91
-
92
- // Find CTAs above the fold
93
- const ctas = [];
94
- const buttons = document.querySelectorAll('a, button, [role="button"]');
95
- buttons.forEach(el => {
96
- const rect = el.getBoundingClientRect();
97
- if (rect.top < viewportHeight && rect.bottom > 0) {
98
- const text = el.textContent.trim().substring(0, 50);
99
- if (text && (
100
- /sign.?up|get.?start|try|buy|contact|demo|free|download|subscribe/i.test(text)
101
- )) {
102
- ctas.push({
103
- text: text,
104
- tag: el.tagName,
105
- top: Math.round(rect.top),
106
- visible: rect.width > 0 && rect.height > 0,
107
- });
108
- }
109
- }
110
- });
111
-
112
- // Find hero/LCP candidate
113
- const images = document.querySelectorAll('img');
114
- let largestImage = null;
115
- let largestArea = 0;
116
- images.forEach(img => {
117
- const rect = img.getBoundingClientRect();
118
- const area = rect.width * rect.height;
119
- if (area > largestArea && rect.top < viewportHeight) {
120
- largestArea = area;
121
- largestImage = {
122
- src: img.src.substring(0, 100),
123
- width: Math.round(rect.width),
124
- height: Math.round(rect.height),
125
- top: Math.round(rect.top),
126
- };
127
- }
128
- });
129
-
130
- // Check for horizontal scroll
131
- const hasHorizontalScroll = document.documentElement.scrollWidth > viewportWidth;
132
-
133
- // Font size check
134
- const body = document.body;
135
- const bodyFontSize = body ? parseFloat(getComputedStyle(body).fontSize) : 16;
136
-
137
- return {
138
- viewportWidth,
139
- viewportHeight,
140
- ctas_above_fold: ctas.length,
141
- cta_details: ctas.slice(0, 5),
142
- largest_image_above_fold: largestImage,
143
- has_horizontal_scroll: hasHorizontalScroll,
144
- body_font_size_px: bodyFontSize,
145
- dom_element_count: document.querySelectorAll('*').length,
146
- };
147
- }""")
148
-
149
- browser.close()
150
-
151
- return metrics
152
-
153
-
154
- # ── CLI ────────────────────────────────────────────────────────────
155
-
156
- def main():
157
- parser = argparse.ArgumentParser(
158
- description="SEO Screenshot — Viewport capture (BMAD+ SEO Engine)"
159
- )
160
- parser.add_argument("url", help="URL to capture")
161
- parser.add_argument("--output", "-o", default="screenshot.png", help="Output file path")
162
- parser.add_argument(
163
- "--viewport", "-v",
164
- choices=list(VIEWPORTS.keys()), default="desktop",
165
- help="Viewport preset"
166
- )
167
- parser.add_argument("--full", action="store_true", help="Capture full page (not just viewport)")
168
- parser.add_argument("--wait", "-w", type=int, default=2000, help="Wait after load (ms)")
169
- parser.add_argument("--json", "-j", action="store_true", help="Output metrics as JSON")
170
-
171
- args = parser.parse_args()
172
-
173
- import json
174
-
175
- metrics = capture_screenshot(
176
- url=args.url,
177
- output=args.output,
178
- viewport=args.viewport,
179
- full_page=args.full,
180
- wait_ms=args.wait,
181
- )
182
-
183
- print(f"Screenshot saved: {args.output}", file=sys.stderr)
184
-
185
- if args.json:
186
- print(json.dumps(metrics, indent=2))
187
- else:
188
- print(f"\nAbove-the-Fold Analysis ({args.viewport}):")
189
- print(f" Viewport: {metrics['viewportWidth']}×{metrics['viewportHeight']}")
190
- print(f" CTAs above fold: {metrics['ctas_above_fold']}")
191
- for cta in metrics.get("cta_details", []):
192
- print(f" - \"{cta['text']}\" ({cta['tag']}, top: {cta['top']}px)")
193
- if metrics.get("largest_image_above_fold"):
194
- img = metrics["largest_image_above_fold"]
195
- print(f" Largest image: {img['width']}×{img['height']} at y={img['top']}px")
196
- print(f" Horizontal scroll: {'⚠️ YES' if metrics['has_horizontal_scroll'] else '✅ No'}")
197
- print(f" Body font size: {metrics['body_font_size_px']}px {'✅' if metrics['body_font_size_px'] >= 16 else '⚠️ <16px'}")
198
- print(f" DOM elements: {metrics['dom_element_count']:,}")
199
-
200
-
201
- if __name__ == "__main__":
202
- main()
@@ -1,62 +0,0 @@
1
- <!DOCTYPE html>
2
- <html lang="en">
3
- <head>
4
- <meta charset="UTF-8">
5
- <title>SEO Test Page — BMAD+ Fixture</title>
6
- <meta name="description" content="A test page for validating the SEO parse module with known elements.">
7
- <meta name="robots" content="index, follow">
8
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
9
- <meta property="og:title" content="SEO Test Page">
10
- <meta property="og:type" content="website">
11
- <meta property="og:url" content="https://example.com/test">
12
- <meta name="twitter:card" content="summary_large_image">
13
- <meta name="twitter:title" content="SEO Test Page">
14
- <link rel="canonical" href="https://example.com/test">
15
- <link rel="alternate" hreflang="en" href="https://example.com/en/test">
16
- <link rel="alternate" hreflang="fr" href="https://example.com/fr/test">
17
- <link rel="alternate" hreflang="x-default" href="https://example.com/test">
18
- </head>
19
- <body>
20
- <h1>Main Heading of the Page</h1>
21
- <p>This is a test paragraph with enough words to verify word count functionality in the parser module. We need at least a few sentences to make the test meaningful and realistic.</p>
22
-
23
- <h2>Second Level Heading One</h2>
24
- <p>Content under the first H2. This paragraph adds more text to increase the word count.</p>
25
-
26
- <h2>Second Level Heading Two</h2>
27
- <p>Another section with different content about SEO analysis and testing.</p>
28
-
29
- <h3>Third Level Heading</h3>
30
- <p>Detailed information under the H3 heading for testing hierarchy detection.</p>
31
-
32
- <img src="/images/hero.jpg" alt="Hero image for testing" width="800" height="400" loading="lazy">
33
- <img src="/images/no-alt.jpg" width="200" height="200">
34
- <img src="/images/empty-alt.jpg" alt="" width="100" height="100">
35
-
36
- <a href="https://example.com/about">About Us</a>
37
- <a href="https://example.com/services">Our Services</a>
38
- <a href="https://external.com/partner" rel="nofollow" target="_blank">Partner Link</a>
39
- <a href="/relative-link">Relative Link</a>
40
-
41
- <script type="application/ld+json">
42
- {
43
- "@context": "https://schema.org",
44
- "@type": "Organization",
45
- "name": "Test Company",
46
- "url": "https://example.com",
47
- "logo": "https://example.com/logo.png"
48
- }
49
- </script>
50
-
51
- <script type="application/ld+json">
52
- {
53
- "@context": "https://schema.org",
54
- "@type": "BreadcrumbList",
55
- "itemListElement": [
56
- {"@type": "ListItem", "position": 1, "name": "Home", "item": "https://example.com"},
57
- {"@type": "ListItem", "position": 2, "name": "Test", "item": "https://example.com/test"}
58
- ]
59
- }
60
- </script>
61
- </body>
62
- </html>
@@ -1,75 +0,0 @@
1
- """
2
- Tests for seo_apis.py — API response parsing and error handling.
3
-
4
- Author: Laurent Rochetta
5
- """
6
-
7
- import sys
8
- import os
9
- sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "scripts"))
10
-
11
- # Temporarily unset API key for error tests
12
- original_key = os.environ.get("GOOGLE_API_KEY", "")
13
-
14
-
15
- class TestAPIKeyMissing:
16
- """Test behavior when GOOGLE_API_KEY is not set."""
17
-
18
- def setup_method(self):
19
- os.environ.pop("GOOGLE_API_KEY", None)
20
- # Reimport to pick up empty key
21
- import importlib
22
- import seo_apis
23
- importlib.reload(seo_apis)
24
- self.seo_apis = seo_apis
25
-
26
- def teardown_method(self):
27
- if original_key:
28
- os.environ["GOOGLE_API_KEY"] = original_key
29
-
30
- def test_pagespeed_without_key(self):
31
- # Force the module to use an empty key
32
- self.seo_apis.API_KEY = ""
33
- result = self.seo_apis.run_pagespeed("https://example.com")
34
- assert result.get("error") is not None
35
- assert "GOOGLE_API_KEY" in result["error"]
36
-
37
- def test_crux_without_key(self):
38
- self.seo_apis.API_KEY = ""
39
- result = self.seo_apis.run_crux("https://example.com")
40
- assert result.get("error") is not None
41
-
42
- def test_rich_results_without_key(self):
43
- self.seo_apis.API_KEY = ""
44
- result = self.seo_apis.run_rich_results_test("https://example.com")
45
- assert result.get("error") is not None
46
-
47
-
48
- class TestResultStructure:
49
- """Test that API functions return expected structures."""
50
-
51
- def setup_method(self):
52
- import importlib
53
- import seo_apis
54
- importlib.reload(seo_apis)
55
- self.seo_apis = seo_apis
56
-
57
- def test_pagespeed_result_keys(self):
58
- self.seo_apis.API_KEY = ""
59
- result = self.seo_apis.run_pagespeed("https://example.com")
60
- # Even on error, should have expected structure
61
- assert "error" in result
62
-
63
- def test_crux_result_keys(self):
64
- self.seo_apis.API_KEY = ""
65
- result = self.seo_apis.run_crux("https://example.com")
66
- assert "error" in result
67
-
68
- def test_run_all_structure(self):
69
- self.seo_apis.API_KEY = ""
70
- result = self.seo_apis.run_all("https://example.com")
71
- assert "pagespeed" in result
72
- assert "crux" in result
73
- assert "mobile_friendly" in result
74
- assert "url" in result
75
- assert "timestamp" in result
@@ -1,121 +0,0 @@
1
- """
2
- Tests for seo_crawl.py — URL normalization, link extraction, depth limiting.
3
-
4
- Author: Laurent Rochetta
5
- """
6
-
7
- import sys
8
- import os
9
- sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "scripts"))
10
-
11
- from seo_crawl import SEOCrawler
12
-
13
-
14
- class TestURLNormalization:
15
- """Test URL normalization for deduplication."""
16
-
17
- def setup_method(self):
18
- self.crawler = SEOCrawler("https://example.com", max_depth=2, max_pages=25)
19
-
20
- def test_strips_trailing_slash(self):
21
- assert self.crawler.normalize_url("https://example.com/page/") == "https://example.com/page"
22
-
23
- def test_preserves_root(self):
24
- assert self.crawler.normalize_url("https://example.com/") == "https://example.com/"
25
-
26
- def test_normalizes_scheme(self):
27
- result = self.crawler.normalize_url("https://example.com/page")
28
- assert result.startswith("https://")
29
-
30
- def test_deduplicates(self):
31
- url1 = self.crawler.normalize_url("https://example.com/page/")
32
- url2 = self.crawler.normalize_url("https://example.com/page")
33
- assert url1 == url2
34
-
35
-
36
- class TestInternalDetection:
37
- def setup_method(self):
38
- self.crawler = SEOCrawler("https://example.com", max_depth=2, max_pages=25)
39
-
40
- def test_internal_url(self):
41
- assert self.crawler.is_internal("https://example.com/about") is True
42
-
43
- def test_external_url(self):
44
- assert self.crawler.is_internal("https://other.com/page") is False
45
-
46
- def test_subdomain_is_external(self):
47
- assert self.crawler.is_internal("https://blog.example.com/post") is False
48
-
49
-
50
- class TestLinkExtraction:
51
- def setup_method(self):
52
- self.crawler = SEOCrawler("https://example.com", max_depth=2, max_pages=25)
53
-
54
- def test_extracts_internal_links(self):
55
- html = '''
56
- <a href="/about">About</a>
57
- <a href="https://example.com/contact">Contact</a>
58
- '''
59
- links = self.crawler.extract_links(html, "https://example.com/")
60
- assert len(links) == 2
61
-
62
- def test_ignores_external_links(self):
63
- html = '<a href="https://other.com/page">External</a>'
64
- links = self.crawler.extract_links(html, "https://example.com/")
65
- assert len(links) == 0
66
-
67
- def test_ignores_anchors(self):
68
- html = '<a href="#section">Anchor</a>'
69
- links = self.crawler.extract_links(html, "https://example.com/")
70
- assert len(links) == 0
71
-
72
- def test_ignores_javascript(self):
73
- html = '<a href="javascript:void(0)">JS Link</a>'
74
- links = self.crawler.extract_links(html, "https://example.com/")
75
- assert len(links) == 0
76
-
77
- def test_ignores_mailto(self):
78
- html = '<a href="mailto:test@example.com">Email</a>'
79
- links = self.crawler.extract_links(html, "https://example.com/")
80
- assert len(links) == 0
81
-
82
-
83
- class TestTitleExtraction:
84
- def setup_method(self):
85
- self.crawler = SEOCrawler("https://example.com")
86
-
87
- def test_extracts_title(self):
88
- html = "<html><head><title>Test Page</title></head><body></body></html>"
89
- assert self.crawler.extract_title(html) == "Test Page"
90
-
91
- def test_missing_title(self):
92
- html = "<html><body></body></html>"
93
- assert self.crawler.extract_title(html) is None
94
-
95
-
96
- class TestWordCount:
97
- def setup_method(self):
98
- self.crawler = SEOCrawler("https://example.com")
99
-
100
- def test_counts_visible_words(self):
101
- html = "<html><body><p>This is a test with seven words.</p></body></html>"
102
- assert self.crawler.count_words(html) == 7
103
-
104
- def test_excludes_script_content(self):
105
- html = '<html><body><p>Visible</p><script>var hidden = true;</script></body></html>'
106
- count = self.crawler.count_words(html)
107
- assert count == 1 # Only "Visible"
108
-
109
-
110
- class TestCrawlerConfig:
111
- def test_max_pages_respected(self):
112
- crawler = SEOCrawler("https://example.com", max_pages=5)
113
- assert crawler.max_pages == 5
114
-
115
- def test_max_depth_respected(self):
116
- crawler = SEOCrawler("https://example.com", max_depth=1)
117
- assert crawler.max_depth == 1
118
-
119
- def test_base_domain_extracted(self):
120
- crawler = SEOCrawler("https://www.example.com/page")
121
- assert crawler.base_domain == "www.example.com"
@@ -1,70 +0,0 @@
1
- """
2
- Tests for seo_fetch.py — SSRF protection, URL handling, error cases.
3
-
4
- Author: Laurent Rochetta
5
- """
6
-
7
- import sys
8
- import os
9
- sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "scripts"))
10
-
11
- from seo_fetch import is_safe_url, fetch_page
12
-
13
-
14
- class TestSSRFProtection:
15
- """Test SSRF prevention blocks private/loopback/reserved IPs."""
16
-
17
- def test_blocks_localhost(self):
18
- assert is_safe_url("http://127.0.0.1/admin") is False
19
-
20
- def test_blocks_private_10(self):
21
- assert is_safe_url("http://10.0.0.1/secret") is False
22
-
23
- def test_blocks_private_192(self):
24
- assert is_safe_url("http://192.168.1.1/") is False
25
-
26
- def test_blocks_private_172(self):
27
- assert is_safe_url("http://172.16.0.1/") is False
28
-
29
- def test_allows_public_ip(self):
30
- assert is_safe_url("https://93.184.216.34/") is True
31
-
32
- def test_allows_public_domain(self):
33
- assert is_safe_url("https://example.com/") is True
34
-
35
- def test_blocks_empty_hostname(self):
36
- assert is_safe_url("http:///nohost") is False
37
-
38
- def test_blocks_zero_ip(self):
39
- assert is_safe_url("http://0.0.0.0/") is False
40
-
41
-
42
- class TestFetchPage:
43
- """Test fetch_page function behavior."""
44
-
45
- def test_normalizes_url_without_scheme(self):
46
- result = fetch_page("example.com", timeout=5)
47
- assert result["url"] == "example.com"
48
- # Should have attempted https://example.com
49
-
50
- def test_blocks_invalid_scheme(self):
51
- result = fetch_page("ftp://example.com/file")
52
- assert result["error"] is not None
53
- assert "Invalid URL scheme" in result["error"]
54
-
55
- def test_blocks_ssrf(self):
56
- result = fetch_page("http://127.0.0.1/admin")
57
- assert result["error"] is not None
58
- assert "Blocked" in result["error"]
59
-
60
- def test_result_structure(self):
61
- """Verify the result dict has all expected keys."""
62
- result = fetch_page("https://example.com", timeout=5)
63
- expected_keys = {"url", "final_url", "status_code", "content", "headers",
64
- "redirect_chain", "content_length", "response_time_ms", "error"}
65
- assert expected_keys == set(result.keys())
66
-
67
- def test_timeout_returns_error(self):
68
- # Use a non-routable IP to force timeout
69
- result = fetch_page("http://192.0.2.1/", timeout=1)
70
- assert result["error"] is not None