firecrawl 4.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. firecrawl/__init__.py +87 -0
  2. firecrawl/__tests__/e2e/v2/aio/conftest.py +62 -0
  3. firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +69 -0
  4. firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +189 -0
  5. firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +39 -0
  6. firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +41 -0
  7. firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +138 -0
  8. firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +249 -0
  9. firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +42 -0
  10. firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +43 -0
  11. firecrawl/__tests__/e2e/v2/conftest.py +73 -0
  12. firecrawl/__tests__/e2e/v2/test_async.py +73 -0
  13. firecrawl/__tests__/e2e/v2/test_batch_scrape.py +106 -0
  14. firecrawl/__tests__/e2e/v2/test_crawl.py +278 -0
  15. firecrawl/__tests__/e2e/v2/test_extract.py +55 -0
  16. firecrawl/__tests__/e2e/v2/test_map.py +61 -0
  17. firecrawl/__tests__/e2e/v2/test_scrape.py +191 -0
  18. firecrawl/__tests__/e2e/v2/test_search.py +270 -0
  19. firecrawl/__tests__/e2e/v2/test_usage.py +26 -0
  20. firecrawl/__tests__/e2e/v2/test_watcher.py +65 -0
  21. firecrawl/__tests__/unit/test_recursive_schema_v1.py +1209 -0
  22. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +12 -0
  23. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +79 -0
  24. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +12 -0
  25. firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +20 -0
  26. firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +50 -0
  27. firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +64 -0
  28. firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +28 -0
  29. firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +117 -0
  30. firecrawl/__tests__/unit/v2/methods/test_agent.py +367 -0
  31. firecrawl/__tests__/unit/v2/methods/test_agent_request_preparation.py +226 -0
  32. firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +90 -0
  33. firecrawl/__tests__/unit/v2/methods/test_branding.py +214 -0
  34. firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +70 -0
  35. firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +240 -0
  36. firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +107 -0
  37. firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +54 -0
  38. firecrawl/__tests__/unit/v2/methods/test_pagination.py +671 -0
  39. firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +109 -0
  40. firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +169 -0
  41. firecrawl/__tests__/unit/v2/methods/test_search_validation.py +236 -0
  42. firecrawl/__tests__/unit/v2/methods/test_usage_types.py +18 -0
  43. firecrawl/__tests__/unit/v2/methods/test_webhook.py +123 -0
  44. firecrawl/__tests__/unit/v2/utils/test_metadata_extras.py +94 -0
  45. firecrawl/__tests__/unit/v2/utils/test_metadata_extras_multivalue.py +22 -0
  46. firecrawl/__tests__/unit/v2/utils/test_recursive_schema.py +1133 -0
  47. firecrawl/__tests__/unit/v2/utils/test_validation.py +311 -0
  48. firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +332 -0
  49. firecrawl/client.py +281 -0
  50. firecrawl/firecrawl.backup.py +4635 -0
  51. firecrawl/types.py +167 -0
  52. firecrawl/v1/__init__.py +14 -0
  53. firecrawl/v1/client.py +5164 -0
  54. firecrawl/v2/__init__.py +4 -0
  55. firecrawl/v2/client.py +967 -0
  56. firecrawl/v2/client_async.py +408 -0
  57. firecrawl/v2/methods/agent.py +144 -0
  58. firecrawl/v2/methods/aio/__init__.py +1 -0
  59. firecrawl/v2/methods/aio/agent.py +137 -0
  60. firecrawl/v2/methods/aio/batch.py +188 -0
  61. firecrawl/v2/methods/aio/crawl.py +351 -0
  62. firecrawl/v2/methods/aio/extract.py +133 -0
  63. firecrawl/v2/methods/aio/map.py +65 -0
  64. firecrawl/v2/methods/aio/scrape.py +33 -0
  65. firecrawl/v2/methods/aio/search.py +176 -0
  66. firecrawl/v2/methods/aio/usage.py +89 -0
  67. firecrawl/v2/methods/batch.py +499 -0
  68. firecrawl/v2/methods/crawl.py +592 -0
  69. firecrawl/v2/methods/extract.py +161 -0
  70. firecrawl/v2/methods/map.py +83 -0
  71. firecrawl/v2/methods/scrape.py +64 -0
  72. firecrawl/v2/methods/search.py +215 -0
  73. firecrawl/v2/methods/usage.py +84 -0
  74. firecrawl/v2/types.py +1143 -0
  75. firecrawl/v2/utils/__init__.py +9 -0
  76. firecrawl/v2/utils/error_handler.py +107 -0
  77. firecrawl/v2/utils/get_version.py +15 -0
  78. firecrawl/v2/utils/http_client.py +178 -0
  79. firecrawl/v2/utils/http_client_async.py +69 -0
  80. firecrawl/v2/utils/normalize.py +125 -0
  81. firecrawl/v2/utils/validation.py +692 -0
  82. firecrawl/v2/watcher.py +301 -0
  83. firecrawl/v2/watcher_async.py +243 -0
  84. firecrawl-4.12.0.dist-info/METADATA +234 -0
  85. firecrawl-4.12.0.dist-info/RECORD +92 -0
  86. firecrawl-4.12.0.dist-info/WHEEL +5 -0
  87. firecrawl-4.12.0.dist-info/licenses/LICENSE +21 -0
  88. firecrawl-4.12.0.dist-info/top_level.txt +2 -0
  89. tests/test_agent_integration.py +277 -0
  90. tests/test_api_key_handling.py +44 -0
  91. tests/test_change_tracking.py +98 -0
  92. tests/test_timeout_conversion.py +117 -0
@@ -0,0 +1,214 @@
1
+ import pytest
2
+ from unittest.mock import Mock, MagicMock
3
+ from firecrawl.v2.methods.scrape import scrape
4
+ from firecrawl.v2.types import ScrapeOptions, Document
5
+
6
+
7
+ class TestBrandingFormat:
8
+ """Unit tests for branding format support."""
9
+
10
+ def test_scrape_with_branding_format_returns_branding_data(self):
11
+ """Test that scraping with branding format returns branding data."""
12
+ mock_response = Mock()
13
+ mock_response.ok = True
14
+ mock_response.json.return_value = {
15
+ "success": True,
16
+ "data": {
17
+ "markdown": "# Example",
18
+ "branding": {
19
+ "colorScheme": "light",
20
+ "colors": {
21
+ "primary": "#E11D48",
22
+ "secondary": "#3B82F6",
23
+ "accent": "#F59E0B"
24
+ },
25
+ "typography": {
26
+ "fontFamilies": {
27
+ "primary": "Inter",
28
+ "heading": "Poppins"
29
+ },
30
+ "fontSizes": {
31
+ "h1": "2.5rem",
32
+ "body": "1rem"
33
+ }
34
+ },
35
+ "spacing": {
36
+ "baseUnit": 8
37
+ },
38
+ "components": {
39
+ "buttonPrimary": {
40
+ "background": "#E11D48",
41
+ "textColor": "#FFFFFF",
42
+ "borderRadius": "0.5rem"
43
+ }
44
+ }
45
+ }
46
+ }
47
+ }
48
+
49
+ mock_client = Mock()
50
+ mock_client.post.return_value = mock_response
51
+
52
+ result = scrape(mock_client, "https://example.com", ScrapeOptions(formats=["branding"]))
53
+
54
+ assert result.branding is not None
55
+ assert result.branding.color_scheme == "light"
56
+ assert result.branding.colors["primary"] == "#E11D48"
57
+ assert result.branding.typography["fontFamilies"]["primary"] == "Inter"
58
+ assert result.branding.spacing["baseUnit"] == 8
59
+ assert result.branding.components["buttonPrimary"]["background"] == "#E11D48"
60
+
61
+ def test_scrape_with_branding_and_markdown_formats_returns_both(self):
62
+ """Test that scraping with both branding and markdown formats returns both."""
63
+ mock_response = Mock()
64
+ mock_response.ok = True
65
+ mock_response.json.return_value = {
66
+ "success": True,
67
+ "data": {
68
+ "markdown": "# Example Content",
69
+ "branding": {
70
+ "colorScheme": "dark",
71
+ "colors": {
72
+ "primary": "#10B981"
73
+ },
74
+ "typography": {
75
+ "fontFamilies": {
76
+ "primary": "Roboto"
77
+ }
78
+ }
79
+ }
80
+ }
81
+ }
82
+
83
+ mock_client = Mock()
84
+ mock_client.post.return_value = mock_response
85
+
86
+ result = scrape(mock_client, "https://example.com", ScrapeOptions(formats=["markdown", "branding"]))
87
+
88
+ assert result.markdown == "# Example Content"
89
+ assert result.branding is not None
90
+ assert result.branding.color_scheme == "dark"
91
+ assert result.branding.colors["primary"] == "#10B981"
92
+
93
+ def test_scrape_without_branding_format_does_not_return_branding(self):
94
+ """Test that scraping without branding format does not return branding."""
95
+ mock_response = Mock()
96
+ mock_response.ok = True
97
+ mock_response.json.return_value = {
98
+ "success": True,
99
+ "data": {
100
+ "markdown": "# Example"
101
+ }
102
+ }
103
+
104
+ mock_client = Mock()
105
+ mock_client.post.return_value = mock_response
106
+
107
+ result = scrape(mock_client, "https://example.com", ScrapeOptions(formats=["markdown"]))
108
+
109
+ assert result.markdown == "# Example"
110
+ assert result.branding is None
111
+
112
+ def test_branding_format_with_all_nested_fields(self):
113
+ """Test branding format with all nested fields populated."""
114
+ mock_response = Mock()
115
+ mock_response.ok = True
116
+ mock_response.json.return_value = {
117
+ "success": True,
118
+ "data": {
119
+ "branding": {
120
+ "colorScheme": "light",
121
+ "logo": "https://example.com/logo.png",
122
+ "fonts": [
123
+ {"family": "Inter", "weight": 400},
124
+ {"family": "Poppins", "weight": 700}
125
+ ],
126
+ "colors": {
127
+ "primary": "#E11D48",
128
+ "background": "#FFFFFF"
129
+ },
130
+ "typography": {
131
+ "fontFamilies": {"primary": "Inter"},
132
+ "fontStacks": {"body": ["Inter", "sans-serif"]},
133
+ "fontSizes": {"h1": "2.5rem"},
134
+ "lineHeights": {"body": 1.5},
135
+ "fontWeights": {"regular": 400}
136
+ },
137
+ "spacing": {
138
+ "baseUnit": 8,
139
+ "padding": {"sm": 8, "md": 16}
140
+ },
141
+ "components": {
142
+ "buttonPrimary": {
143
+ "background": "#E11D48",
144
+ "textColor": "#FFFFFF"
145
+ }
146
+ },
147
+ "icons": {
148
+ "style": "outline",
149
+ "primaryColor": "#E11D48"
150
+ },
151
+ "images": {
152
+ "logo": "https://example.com/logo.png",
153
+ "favicon": "https://example.com/favicon.ico"
154
+ },
155
+ "animations": {
156
+ "transitionDuration": "200ms",
157
+ "easing": "ease-in-out"
158
+ },
159
+ "layout": {
160
+ "grid": {"columns": 12, "maxWidth": "1200px"},
161
+ "headerHeight": "64px"
162
+ },
163
+ "tone": {
164
+ "voice": "professional",
165
+ "emojiUsage": "minimal"
166
+ },
167
+ "personality": {
168
+ "tone": "professional",
169
+ "energy": "medium",
170
+ "targetAudience": "developers"
171
+ }
172
+ }
173
+ }
174
+ }
175
+
176
+ mock_client = Mock()
177
+ mock_client.post.return_value = mock_response
178
+
179
+ result = scrape(mock_client, "https://example.com", ScrapeOptions(formats=["branding"]))
180
+
181
+ assert result.branding is not None
182
+ assert result.branding.color_scheme == "light"
183
+ assert result.branding.logo == "https://example.com/logo.png"
184
+ assert len(result.branding.fonts) == 2
185
+ assert result.branding.typography["fontStacks"]["body"] == ["Inter", "sans-serif"]
186
+ assert result.branding.spacing["padding"] == {"sm": 8, "md": 16}
187
+ assert result.branding.icons["style"] == "outline"
188
+ assert result.branding.images["favicon"] == "https://example.com/favicon.ico"
189
+ assert result.branding.animations["easing"] == "ease-in-out"
190
+ assert result.branding.layout["grid"]["columns"] == 12
191
+ assert result.branding.personality["tone"] == "professional"
192
+
193
+ def test_branding_colorscheme_normalization(self):
194
+ """Test that colorScheme is normalized to color_scheme."""
195
+ mock_response = Mock()
196
+ mock_response.ok = True
197
+ mock_response.json.return_value = {
198
+ "success": True,
199
+ "data": {
200
+ "branding": {
201
+ "colorScheme": "dark",
202
+ "colors": {"primary": "#000000"}
203
+ }
204
+ }
205
+ }
206
+
207
+ mock_client = Mock()
208
+ mock_client.post.return_value = mock_response
209
+
210
+ result = scrape(mock_client, "https://example.com", ScrapeOptions(formats=["branding"]))
211
+
212
+ assert result.branding is not None
213
+ assert result.branding.color_scheme == "dark"
214
+ assert not hasattr(result.branding, "colorScheme")
@@ -0,0 +1,70 @@
1
+ """
2
+ Unit tests for crawl params functionality in Firecrawl v2 SDK.
3
+ """
4
+
5
+ import pytest
6
+ from firecrawl.v2.types import CrawlParamsRequest, CrawlParamsData
7
+
8
+
9
+ class TestCrawlParamsRequest:
10
+ """Unit tests for CrawlParamsRequest."""
11
+
12
+ def test_crawl_params_request_creation(self):
13
+ """Test creating CrawlParamsRequest with valid data."""
14
+ request = CrawlParamsRequest(
15
+ url="https://example.com",
16
+ prompt="Extract all blog posts"
17
+ )
18
+
19
+ assert request.url == "https://example.com"
20
+ assert request.prompt == "Extract all blog posts"
21
+
22
+ def test_crawl_params_request_serialization(self):
23
+ """Test that CrawlParamsRequest serializes correctly."""
24
+ request = CrawlParamsRequest(
25
+ url="https://example.com",
26
+ prompt="Extract all blog posts and documentation"
27
+ )
28
+
29
+ data = request.model_dump()
30
+
31
+ assert data["url"] == "https://example.com"
32
+ assert data["prompt"] == "Extract all blog posts and documentation"
33
+
34
+
35
+ class TestCrawlParamsData:
36
+ """Unit tests for CrawlParamsData."""
37
+
38
+ def test_crawl_params_data_creation(self):
39
+ """Test creating CrawlParamsData with minimal data."""
40
+ data = CrawlParamsData()
41
+
42
+ assert data.include_paths is None
43
+ assert data.exclude_paths is None
44
+ assert data.max_discovery_depth is None
45
+ assert data.ignore_sitemap is False
46
+ assert data.limit is None
47
+ assert data.crawl_entire_domain is False
48
+ assert data.allow_external_links is False
49
+ assert data.scrape_options is None
50
+ assert data.warning is None
51
+
52
+ def test_crawl_params_data_with_values(self):
53
+ """Test creating CrawlParamsData with values."""
54
+ data = CrawlParamsData(
55
+ include_paths=["/blog/*"],
56
+ exclude_paths=["/admin/*"],
57
+ max_discovery_depth=3,
58
+ limit=50,
59
+ crawl_entire_domain=True,
60
+ allow_external_links=False,
61
+ warning="Test warning"
62
+ )
63
+
64
+ assert data.include_paths == ["/blog/*"]
65
+ assert data.exclude_paths == ["/admin/*"]
66
+ assert data.max_discovery_depth == 3
67
+ assert data.limit == 50
68
+ assert data.crawl_entire_domain is True
69
+ assert data.allow_external_links is False
70
+ assert data.warning == "Test warning"
@@ -0,0 +1,240 @@
1
+ import pytest
2
+ from firecrawl.v2.types import CrawlRequest, ScrapeOptions
3
+ from firecrawl.v2.methods.crawl import _prepare_crawl_request
4
+
5
+
6
+ class TestCrawlRequestPreparation:
7
+ """Unit tests for crawl request preparation."""
8
+
9
+ def test_basic_request_preparation(self):
10
+ """Test basic request preparation with minimal fields."""
11
+ request = CrawlRequest(url="https://example.com")
12
+ data = _prepare_crawl_request(request)
13
+
14
+ # Check basic fields
15
+ assert data["url"] == "https://example.com"
16
+
17
+ # Check that no options are present
18
+ assert "limit" not in data
19
+ assert "prompt" not in data
20
+
21
+ def test_crawl_options_conversion(self):
22
+ """Test that CrawlOptions fields are converted to camelCase."""
23
+ request = CrawlRequest(
24
+ url="https://example.com",
25
+ limit=10,
26
+ max_discovery_depth=3,
27
+ sitemap="skip",
28
+ crawl_entire_domain=False,
29
+ allow_external_links=True
30
+ )
31
+
32
+ data = _prepare_crawl_request(request)
33
+
34
+ # Check basic field
35
+ assert data["url"] == "https://example.com"
36
+
37
+ # Check snake_case to camelCase conversions
38
+ assert "limit" in data
39
+ assert data["limit"] == 10
40
+ assert "maxDiscoveryDepth" in data
41
+ assert data["maxDiscoveryDepth"] == 3
42
+ assert "sitemap" in data
43
+ assert data["sitemap"] == "skip"
44
+ assert "crawlEntireDomain" in data
45
+ assert data["crawlEntireDomain"] is False
46
+ assert "allowExternalLinks" in data
47
+ assert data["allowExternalLinks"] is True
48
+
49
+ # Check that snake_case fields are not present
50
+ assert "ignore_sitemap" not in data
51
+ assert "crawl_entire_domain" not in data
52
+ assert "allow_external_links" not in data
53
+
54
+ def test_scrape_options_conversion(self):
55
+ """Test that nested ScrapeOptions are converted to camelCase."""
56
+ scrape_opts = ScrapeOptions(
57
+ formats=["markdown", "html"],
58
+ headers={"User-Agent": "Test"},
59
+ include_tags=["h1", "h2"],
60
+ exclude_tags=["nav"],
61
+ only_main_content=False,
62
+ timeout=15000,
63
+ wait_for=2000,
64
+ mobile=True,
65
+ skip_tls_verification=True,
66
+ remove_base64_images=False
67
+ )
68
+
69
+ request = CrawlRequest(
70
+ url="https://example.com",
71
+ scrape_options=scrape_opts
72
+ )
73
+
74
+ data = _prepare_crawl_request(request)
75
+
76
+ assert "scrapeOptions" in data
77
+ assert "scrape_options" not in data
78
+
79
+ # Check nested conversions
80
+ scrape_data = data["scrapeOptions"]
81
+ assert "includeTags" in scrape_data
82
+ assert scrape_data["includeTags"] == ["h1", "h2"]
83
+ assert "excludeTags" in scrape_data
84
+ assert scrape_data["excludeTags"] == ["nav"]
85
+ assert "onlyMainContent" in scrape_data
86
+ assert scrape_data["onlyMainContent"] is False
87
+ assert "waitFor" in scrape_data
88
+ assert scrape_data["waitFor"] == 2000
89
+ assert "skipTlsVerification" in scrape_data
90
+ assert scrape_data["skipTlsVerification"] is True
91
+ assert "removeBase64Images" in scrape_data
92
+ assert scrape_data["removeBase64Images"] is False
93
+
94
+ def test_all_fields_conversion(self):
95
+ """Test request preparation with all possible fields."""
96
+ scrape_opts = ScrapeOptions(
97
+ formats=["markdown"],
98
+ headers={"User-Agent": "Test"},
99
+ only_main_content=False,
100
+ mobile=True
101
+ )
102
+
103
+ request = CrawlRequest(
104
+ url="https://example.com",
105
+ prompt="Extract all blog posts and documentation",
106
+ include_paths=["/blog/*", "/docs/*"],
107
+ exclude_paths=["/admin/*"],
108
+ max_discovery_depth=3,
109
+ sitemap="include",
110
+ limit=100,
111
+ crawl_entire_domain=True,
112
+ allow_external_links=False,
113
+ scrape_options=scrape_opts
114
+ )
115
+
116
+ data = _prepare_crawl_request(request)
117
+
118
+ # Check basic fields
119
+ assert data["url"] == "https://example.com"
120
+ assert data["prompt"] == "Extract all blog posts and documentation"
121
+
122
+ # Check all CrawlOptions fields
123
+ assert "includePaths" in data
124
+ assert data["includePaths"] == ["/blog/*", "/docs/*"]
125
+ assert "excludePaths" in data
126
+ assert data["excludePaths"] == ["/admin/*"]
127
+ assert "maxDiscoveryDepth" in data
128
+ assert data["maxDiscoveryDepth"] == 3
129
+ assert "sitemap" in data
130
+ assert data["sitemap"] == "include"
131
+ assert "limit" in data
132
+ assert data["limit"] == 100
133
+ assert "crawlEntireDomain" in data
134
+ assert data["crawlEntireDomain"] is True
135
+ assert "allowExternalLinks" in data
136
+ assert data["allowExternalLinks"] is False
137
+
138
+ # Check nested scrape options
139
+ assert "scrapeOptions" in data
140
+ scrape_data = data["scrapeOptions"]
141
+ assert "onlyMainContent" in scrape_data
142
+ assert scrape_data["onlyMainContent"] is False
143
+ assert "mobile" in scrape_data
144
+ assert scrape_data["mobile"] is True
145
+
146
+ def test_none_values_handling(self):
147
+ """Test that None values are handled correctly."""
148
+ request = CrawlRequest(
149
+ url="https://example.com",
150
+ prompt=None,
151
+ limit=None,
152
+ scrape_options=None
153
+ )
154
+
155
+ data = _prepare_crawl_request(request)
156
+
157
+ # Only the required field should be present
158
+ assert "url" in data
159
+ assert len(data) == 1 # Only url should be present
160
+
161
+ def test_prompt_parameter(self):
162
+ """Test that prompt parameter is included when provided."""
163
+ request = CrawlRequest(
164
+ url="https://example.com",
165
+ prompt="Extract all blog posts"
166
+ )
167
+
168
+ data = _prepare_crawl_request(request)
169
+
170
+ assert "url" in data
171
+ assert "prompt" in data
172
+ assert data["prompt"] == "Extract all blog posts"
173
+
174
+ def test_empty_options(self):
175
+ """Test that empty options are handled correctly."""
176
+ request = CrawlRequest(
177
+ url="https://example.com"
178
+ )
179
+
180
+ data = _prepare_crawl_request(request)
181
+
182
+ # Should only have the required url field
183
+ assert "url" in data
184
+ assert len(data) == 1 # Only url should be present
185
+
186
+ def test_validation_integration(self):
187
+ """Test that validation is called during preparation."""
188
+ # This should raise an error due to validation
189
+ with pytest.raises(ValueError, match="URL cannot be empty"):
190
+ request = CrawlRequest(url="")
191
+ _prepare_crawl_request(request)
192
+
193
+ # This should raise an error due to validation
194
+ with pytest.raises(ValueError, match="Limit must be positive"):
195
+ request = CrawlRequest(
196
+ url="https://example.com",
197
+ limit=0
198
+ )
199
+ _prepare_crawl_request(request)
200
+
201
+ def test_scrape_options_shared_function_integration(self):
202
+ """Test that the shared prepare_scrape_options function is being used."""
203
+ # Test with all snake_case fields to ensure conversion
204
+ scrape_opts = ScrapeOptions(
205
+ include_tags=["h1", "h2"],
206
+ exclude_tags=["nav"],
207
+ only_main_content=False,
208
+ wait_for=2000,
209
+ skip_tls_verification=True,
210
+ remove_base64_images=False
211
+ )
212
+
213
+ request = CrawlRequest(
214
+ url="https://example.com",
215
+ scrape_options=scrape_opts
216
+ )
217
+
218
+ data = _prepare_crawl_request(request)
219
+
220
+ # Check that scrapeOptions is present and converted
221
+ assert "scrapeOptions" in data
222
+ scrape_data = data["scrapeOptions"]
223
+
224
+ # Check all conversions are working
225
+ assert "includeTags" in scrape_data
226
+ assert "excludeTags" in scrape_data
227
+ assert "onlyMainContent" in scrape_data
228
+ assert "waitFor" in scrape_data
229
+ assert "skipTlsVerification" in scrape_data
230
+ assert "removeBase64Images" in scrape_data
231
+
232
+ # Check that snake_case fields are not present
233
+ assert "include_tags" not in scrape_data
234
+ assert "exclude_tags" not in scrape_data
235
+ assert "only_main_content" not in scrape_data
236
+ assert "wait_for" not in scrape_data
237
+ assert "skip_tls_verification" not in scrape_data
238
+ assert "remove_base64_images" not in scrape_data
239
+ assert "raw_html" not in scrape_data
240
+ assert "screenshot_full_page" not in scrape_data
@@ -0,0 +1,107 @@
1
+ import pytest
2
+ from firecrawl.v2.types import CrawlRequest, ScrapeOptions
3
+ from firecrawl.v2.methods.crawl import _validate_crawl_request
4
+
5
+
6
+ class TestCrawlRequestValidation:
7
+ """Unit tests for crawl request validation."""
8
+
9
+ def test_validate_empty_url(self):
10
+ """Test validation with empty URL."""
11
+ with pytest.raises(ValueError, match="URL cannot be empty"):
12
+ request = CrawlRequest(url="")
13
+ _validate_crawl_request(request)
14
+
15
+ def test_validate_whitespace_url(self):
16
+ """Test validation with whitespace-only URL."""
17
+ with pytest.raises(ValueError, match="URL cannot be empty"):
18
+ request = CrawlRequest(url=" ")
19
+ _validate_crawl_request(request)
20
+
21
+ def test_validate_valid_url(self):
22
+ """Test validation with valid URL."""
23
+ request = CrawlRequest(url="https://example.com")
24
+ _validate_crawl_request(request) # Should not raise
25
+
26
+ def test_validate_invalid_limit(self):
27
+ """Test validation with invalid limit."""
28
+ with pytest.raises(ValueError, match="Limit must be positive"):
29
+ request = CrawlRequest(
30
+ url="https://example.com",
31
+ limit=0
32
+ )
33
+ _validate_crawl_request(request)
34
+
35
+ def test_validate_negative_limit(self):
36
+ """Test validation with negative limit."""
37
+ with pytest.raises(ValueError, match="Limit must be positive"):
38
+ request = CrawlRequest(
39
+ url="https://example.com",
40
+ limit=-5
41
+ )
42
+ _validate_crawl_request(request)
43
+
44
+ def test_validate_valid_limit(self):
45
+ """Test validation with valid limit."""
46
+ request = CrawlRequest(
47
+ url="https://example.com",
48
+ limit=10
49
+ )
50
+ _validate_crawl_request(request) # Should not raise
51
+
52
+ def test_validate_with_prompt(self):
53
+ """Test validation with prompt."""
54
+ request = CrawlRequest(
55
+ url="https://example.com",
56
+ prompt="Extract all blog posts"
57
+ )
58
+ _validate_crawl_request(request) # Should not raise
59
+
60
+ def test_validate_with_prompt_and_options(self):
61
+ """Test validation with prompt and options."""
62
+ request = CrawlRequest(
63
+ url="https://example.com",
64
+ prompt="Extract all blog posts",
65
+ limit=10
66
+ )
67
+ _validate_crawl_request(request) # Should not raise
68
+
69
+ def test_validate_none_options(self):
70
+ """Test validation with None options."""
71
+ request = CrawlRequest(url="https://example.com")
72
+ _validate_crawl_request(request) # Should not raise
73
+
74
+ def test_validate_complex_options(self):
75
+ """Test validation with complex options."""
76
+ scrape_opts = ScrapeOptions(
77
+ formats=["markdown"],
78
+ only_main_content=False,
79
+ mobile=True
80
+ )
81
+
82
+ request = CrawlRequest(
83
+ url="https://example.com",
84
+ limit=50,
85
+ max_discovery_depth=3,
86
+ scrape_options=scrape_opts
87
+ )
88
+ _validate_crawl_request(request) # Should not raise
89
+
90
+ def test_validate_scrape_options_integration(self):
91
+ """Test that scrape_options validation is integrated."""
92
+ # Test with valid scrape options
93
+ scrape_opts = ScrapeOptions(formats=["markdown"], timeout=30000)
94
+ request = CrawlRequest(
95
+ url="https://example.com",
96
+ scrape_options=scrape_opts
97
+ )
98
+ _validate_crawl_request(request) # Should not raise
99
+
100
+ # Test with invalid scrape options (should raise error)
101
+ invalid_scrape_opts = ScrapeOptions(timeout=-1000)
102
+ request = CrawlRequest(
103
+ url="https://example.com",
104
+ scrape_options=invalid_scrape_opts
105
+ )
106
+ with pytest.raises(ValueError, match="Timeout must be positive"):
107
+ _validate_crawl_request(request)
@@ -0,0 +1,54 @@
1
+ import pytest
2
+ from firecrawl.v2.types import MapOptions
3
+ from firecrawl.v2.methods.map import _prepare_map_request
4
+
5
+
6
+ class TestMapRequestPreparation:
7
+ """Unit tests for map request preparation."""
8
+
9
+ def test_basic_request_preparation(self):
10
+ data = _prepare_map_request("https://example.com")
11
+ assert data["url"] == "https://example.com"
12
+ # Default sitemap handling should be "include" when no flags provided
13
+ assert "sitemap" not in data # we only send when options provided
14
+
15
+ def test_sitemap_transformations(self):
16
+ # sitemap -> "only"
17
+ opts = MapOptions(sitemap="only")
18
+ data = _prepare_map_request("https://example.com", opts)
19
+ assert data["sitemap"] == "only"
20
+
21
+ # sitemap -> "skip"
22
+ opts = MapOptions(sitemap="skip")
23
+ data = _prepare_map_request("https://example.com", opts)
24
+ assert data["sitemap"] == "skip"
25
+
26
+ # default when options present but sitemap left as default -> include
27
+ opts = MapOptions(search="docs")
28
+ data = _prepare_map_request("https://example.com", opts)
29
+ assert data["sitemap"] == "include"
30
+
31
+ def test_field_conversions(self):
32
+ opts = MapOptions(
33
+ search="docs",
34
+ include_subdomains=True,
35
+ limit=25,
36
+ sitemap="only",
37
+ timeout=15000,
38
+ integration=" _unit-test ",
39
+ )
40
+ data = _prepare_map_request("https://example.com", opts)
41
+
42
+ assert data["url"] == "https://example.com"
43
+ assert data["search"] == "docs"
44
+ assert data["includeSubdomains"] is True
45
+ assert data["limit"] == 25
46
+ assert data["sitemap"] == "only"
47
+ assert data["timeout"] == 15000
48
+ assert data["integration"] == "_unit-test"
49
+
50
+ def test_invalid_url(self):
51
+ with pytest.raises(ValueError):
52
+ _prepare_map_request("")
53
+ with pytest.raises(ValueError):
54
+ _prepare_map_request(" ")