firecrawl 4.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. firecrawl/__init__.py +87 -0
  2. firecrawl/__tests__/e2e/v2/aio/conftest.py +62 -0
  3. firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +69 -0
  4. firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +189 -0
  5. firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +39 -0
  6. firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +41 -0
  7. firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +138 -0
  8. firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +249 -0
  9. firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +42 -0
  10. firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +43 -0
  11. firecrawl/__tests__/e2e/v2/conftest.py +73 -0
  12. firecrawl/__tests__/e2e/v2/test_async.py +73 -0
  13. firecrawl/__tests__/e2e/v2/test_batch_scrape.py +106 -0
  14. firecrawl/__tests__/e2e/v2/test_crawl.py +278 -0
  15. firecrawl/__tests__/e2e/v2/test_extract.py +55 -0
  16. firecrawl/__tests__/e2e/v2/test_map.py +61 -0
  17. firecrawl/__tests__/e2e/v2/test_scrape.py +191 -0
  18. firecrawl/__tests__/e2e/v2/test_search.py +270 -0
  19. firecrawl/__tests__/e2e/v2/test_usage.py +26 -0
  20. firecrawl/__tests__/e2e/v2/test_watcher.py +65 -0
  21. firecrawl/__tests__/unit/test_recursive_schema_v1.py +1209 -0
  22. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +12 -0
  23. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +79 -0
  24. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +12 -0
  25. firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +20 -0
  26. firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +50 -0
  27. firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +64 -0
  28. firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +28 -0
  29. firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +117 -0
  30. firecrawl/__tests__/unit/v2/methods/test_agent.py +367 -0
  31. firecrawl/__tests__/unit/v2/methods/test_agent_request_preparation.py +226 -0
  32. firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +90 -0
  33. firecrawl/__tests__/unit/v2/methods/test_branding.py +214 -0
  34. firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +70 -0
  35. firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +240 -0
  36. firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +107 -0
  37. firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +54 -0
  38. firecrawl/__tests__/unit/v2/methods/test_pagination.py +671 -0
  39. firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +109 -0
  40. firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +169 -0
  41. firecrawl/__tests__/unit/v2/methods/test_search_validation.py +236 -0
  42. firecrawl/__tests__/unit/v2/methods/test_usage_types.py +18 -0
  43. firecrawl/__tests__/unit/v2/methods/test_webhook.py +123 -0
  44. firecrawl/__tests__/unit/v2/utils/test_metadata_extras.py +94 -0
  45. firecrawl/__tests__/unit/v2/utils/test_metadata_extras_multivalue.py +22 -0
  46. firecrawl/__tests__/unit/v2/utils/test_recursive_schema.py +1133 -0
  47. firecrawl/__tests__/unit/v2/utils/test_validation.py +311 -0
  48. firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +332 -0
  49. firecrawl/client.py +281 -0
  50. firecrawl/firecrawl.backup.py +4635 -0
  51. firecrawl/types.py +167 -0
  52. firecrawl/v1/__init__.py +14 -0
  53. firecrawl/v1/client.py +5164 -0
  54. firecrawl/v2/__init__.py +4 -0
  55. firecrawl/v2/client.py +967 -0
  56. firecrawl/v2/client_async.py +408 -0
  57. firecrawl/v2/methods/agent.py +144 -0
  58. firecrawl/v2/methods/aio/__init__.py +1 -0
  59. firecrawl/v2/methods/aio/agent.py +137 -0
  60. firecrawl/v2/methods/aio/batch.py +188 -0
  61. firecrawl/v2/methods/aio/crawl.py +351 -0
  62. firecrawl/v2/methods/aio/extract.py +133 -0
  63. firecrawl/v2/methods/aio/map.py +65 -0
  64. firecrawl/v2/methods/aio/scrape.py +33 -0
  65. firecrawl/v2/methods/aio/search.py +176 -0
  66. firecrawl/v2/methods/aio/usage.py +89 -0
  67. firecrawl/v2/methods/batch.py +499 -0
  68. firecrawl/v2/methods/crawl.py +592 -0
  69. firecrawl/v2/methods/extract.py +161 -0
  70. firecrawl/v2/methods/map.py +83 -0
  71. firecrawl/v2/methods/scrape.py +64 -0
  72. firecrawl/v2/methods/search.py +215 -0
  73. firecrawl/v2/methods/usage.py +84 -0
  74. firecrawl/v2/types.py +1143 -0
  75. firecrawl/v2/utils/__init__.py +9 -0
  76. firecrawl/v2/utils/error_handler.py +107 -0
  77. firecrawl/v2/utils/get_version.py +15 -0
  78. firecrawl/v2/utils/http_client.py +178 -0
  79. firecrawl/v2/utils/http_client_async.py +69 -0
  80. firecrawl/v2/utils/normalize.py +125 -0
  81. firecrawl/v2/utils/validation.py +692 -0
  82. firecrawl/v2/watcher.py +301 -0
  83. firecrawl/v2/watcher_async.py +243 -0
  84. firecrawl-4.12.0.dist-info/METADATA +234 -0
  85. firecrawl-4.12.0.dist-info/RECORD +92 -0
  86. firecrawl-4.12.0.dist-info/WHEEL +5 -0
  87. firecrawl-4.12.0.dist-info/licenses/LICENSE +21 -0
  88. firecrawl-4.12.0.dist-info/top_level.txt +2 -0
  89. tests/test_agent_integration.py +277 -0
  90. tests/test_api_key_handling.py +44 -0
  91. tests/test_change_tracking.py +98 -0
  92. tests/test_timeout_conversion.py +117 -0
@@ -0,0 +1,367 @@
1
+ """
2
+ Unit tests for agent methods with mocked HTTP client.
3
+ """
4
+
5
+ import pytest
6
+ import time
7
+ from unittest.mock import Mock, patch
8
+ from pydantic import BaseModel, Field
9
+ from typing import List, Optional
10
+
11
+ from firecrawl.v2.methods.agent import (
12
+ start_agent,
13
+ agent,
14
+ get_agent_status,
15
+ cancel_agent,
16
+ wait_agent
17
+ )
18
+ from firecrawl.v2.types import AgentResponse
19
+ from firecrawl.v2.utils.error_handler import BadRequestError
20
+
21
+
22
+ class TestAgentMethods:
23
+ """Unit tests for agent methods with mocked HTTP client."""
24
+
25
+ def setup_method(self):
26
+ """Set up test fixtures."""
27
+ self.mock_client = Mock()
28
+ self.job_id = "test-agent-123"
29
+
30
+ # Sample agent response
31
+ self.sample_response = {
32
+ "success": True,
33
+ "id": self.job_id,
34
+ "status": "completed",
35
+ "data": {
36
+ "founders": [
37
+ {"name": "John Doe", "role": "CEO"},
38
+ {"name": "Jane Smith", "role": "CTO"}
39
+ ]
40
+ },
41
+ "creditsUsed": 10,
42
+ "expiresAt": "2024-01-01T00:00:00Z"
43
+ }
44
+
45
+ def test_start_agent_basic(self):
46
+ """Test starting an agent job with basic parameters."""
47
+ mock_response = Mock()
48
+ mock_response.ok = True
49
+ mock_response.json.return_value = {
50
+ "success": True,
51
+ "id": self.job_id,
52
+ "status": "processing"
53
+ }
54
+
55
+ self.mock_client.post.return_value = mock_response
56
+
57
+ result = start_agent(
58
+ self.mock_client,
59
+ None,
60
+ prompt="Find information about Firecrawl"
61
+ )
62
+
63
+ # Check that post was called with correct endpoint
64
+ self.mock_client.post.assert_called_once()
65
+ call_args = self.mock_client.post.call_args
66
+ assert call_args[0][0] == "/v2/agent"
67
+
68
+ # Check request body (second positional argument)
69
+ body = call_args[0][1]
70
+ assert body["prompt"] == "Find information about Firecrawl"
71
+ assert "urls" not in body
72
+
73
+ # Check result
74
+ assert isinstance(result, AgentResponse)
75
+ assert result.id == self.job_id
76
+ assert result.status == "processing"
77
+
78
+ def test_start_agent_with_urls(self):
79
+ """Test starting an agent job with URLs."""
80
+ mock_response = Mock()
81
+ mock_response.ok = True
82
+ mock_response.json.return_value = {
83
+ "success": True,
84
+ "id": self.job_id,
85
+ "status": "processing"
86
+ }
87
+
88
+ self.mock_client.post.return_value = mock_response
89
+
90
+ urls = ["https://example.com", "https://test.com"]
91
+ result = start_agent(
92
+ self.mock_client,
93
+ urls,
94
+ prompt="Extract data"
95
+ )
96
+
97
+ call_args = self.mock_client.post.call_args
98
+ body = call_args[0][1]
99
+ assert body["urls"] == urls
100
+
101
+ def test_start_agent_with_dict_schema(self):
102
+ """Test starting an agent job with dict schema."""
103
+ mock_response = Mock()
104
+ mock_response.ok = True
105
+ mock_response.json.return_value = {
106
+ "success": True,
107
+ "id": self.job_id,
108
+ "status": "processing"
109
+ }
110
+
111
+ self.mock_client.post.return_value = mock_response
112
+
113
+ schema = {
114
+ "type": "object",
115
+ "properties": {
116
+ "name": {"type": "string"}
117
+ }
118
+ }
119
+
120
+ result = start_agent(
121
+ self.mock_client,
122
+ None,
123
+ prompt="Extract data",
124
+ schema=schema
125
+ )
126
+
127
+ call_args = self.mock_client.post.call_args
128
+ body = call_args[0][1]
129
+ assert body["schema"] == schema
130
+
131
+ def test_start_agent_with_pydantic_schema(self):
132
+ """Test starting an agent job with Pydantic schema."""
133
+ mock_response = Mock()
134
+ mock_response.ok = True
135
+ mock_response.json.return_value = {
136
+ "success": True,
137
+ "id": self.job_id,
138
+ "status": "processing"
139
+ }
140
+
141
+ self.mock_client.post.return_value = mock_response
142
+
143
+ class Founder(BaseModel):
144
+ name: str = Field(description="Full name")
145
+ role: Optional[str] = Field(None, description="Role")
146
+
147
+ class FoundersSchema(BaseModel):
148
+ founders: List[Founder] = Field(description="List of founders")
149
+
150
+ result = start_agent(
151
+ self.mock_client,
152
+ None,
153
+ prompt="Find founders",
154
+ schema=FoundersSchema
155
+ )
156
+
157
+ call_args = self.mock_client.post.call_args
158
+ body = call_args[0][1]
159
+ assert "schema" in body
160
+ assert body["schema"]["type"] == "object"
161
+ assert "founders" in body["schema"]["properties"]
162
+
163
+ def test_start_agent_with_all_params(self):
164
+ """Test starting an agent job with all parameters."""
165
+ mock_response = Mock()
166
+ mock_response.ok = True
167
+ mock_response.json.return_value = {
168
+ "success": True,
169
+ "id": self.job_id,
170
+ "status": "processing"
171
+ }
172
+
173
+ self.mock_client.post.return_value = mock_response
174
+
175
+ schema = {"type": "object"}
176
+ urls = ["https://example.com"]
177
+
178
+ result = start_agent(
179
+ self.mock_client,
180
+ urls,
181
+ prompt="Complete test",
182
+ schema=schema,
183
+ integration="test-integration",
184
+ max_credits=50,
185
+ strict_constrain_to_urls=True
186
+ )
187
+
188
+ call_args = self.mock_client.post.call_args
189
+ body = call_args[0][1]
190
+ assert body["prompt"] == "Complete test"
191
+ assert body["urls"] == urls
192
+ assert body["schema"] == schema
193
+ assert body["integration"] == "test-integration"
194
+ assert body["maxCredits"] == 50
195
+ assert body["strictConstrainToURLs"] is True
196
+
197
+ def test_get_agent_status(self):
198
+ """Test getting agent status."""
199
+ mock_response = Mock()
200
+ mock_response.ok = True
201
+ mock_response.json.return_value = self.sample_response
202
+
203
+ self.mock_client.get.return_value = mock_response
204
+
205
+ result = get_agent_status(self.mock_client, self.job_id)
206
+
207
+ # Check that get was called with correct endpoint
208
+ self.mock_client.get.assert_called_once_with(f"/v2/agent/{self.job_id}")
209
+
210
+ # Check result
211
+ assert isinstance(result, AgentResponse)
212
+ assert result.id == self.job_id
213
+ assert result.status == "completed"
214
+
215
+ def test_cancel_agent(self):
216
+ """Test canceling an agent job."""
217
+ mock_response = Mock()
218
+ mock_response.ok = True
219
+ mock_response.json.return_value = {"success": True}
220
+
221
+ self.mock_client.delete.return_value = mock_response
222
+
223
+ result = cancel_agent(self.mock_client, self.job_id)
224
+
225
+ # Check that delete was called with correct endpoint
226
+ self.mock_client.delete.assert_called_once_with(f"/v2/agent/{self.job_id}")
227
+
228
+ # Check result
229
+ assert result is True
230
+
231
+ @patch('time.sleep')
232
+ def test_wait_agent_completed(self, mock_sleep):
233
+ """Test waiting for agent to complete."""
234
+ # First call returns processing, second returns completed
235
+ mock_response_processing = Mock()
236
+ mock_response_processing.ok = True
237
+ mock_response_processing.json.return_value = {
238
+ "success": True,
239
+ "id": self.job_id,
240
+ "status": "processing"
241
+ }
242
+
243
+ mock_response_completed = Mock()
244
+ mock_response_completed.ok = True
245
+ mock_response_completed.json.return_value = self.sample_response
246
+
247
+ self.mock_client.get.side_effect = [
248
+ mock_response_processing,
249
+ mock_response_completed
250
+ ]
251
+
252
+ result = wait_agent(self.mock_client, self.job_id, poll_interval=1)
253
+
254
+ # Should have called get twice
255
+ assert self.mock_client.get.call_count == 2
256
+ assert result.status == "completed"
257
+ assert mock_sleep.call_count == 1
258
+
259
+ @patch('time.sleep')
260
+ def test_wait_agent_timeout(self, mock_sleep):
261
+ """Test waiting for agent with timeout."""
262
+ mock_response = Mock()
263
+ mock_response.ok = True
264
+ mock_response.json.return_value = {
265
+ "success": True,
266
+ "id": self.job_id,
267
+ "status": "processing"
268
+ }
269
+
270
+ self.mock_client.get.return_value = mock_response
271
+
272
+ # Mock time.time to simulate timeout
273
+ with patch('time.time', side_effect=[0, 0, 5]): # Start at 0, timeout at 5
274
+ result = wait_agent(
275
+ self.mock_client,
276
+ self.job_id,
277
+ poll_interval=1,
278
+ timeout=3 # Timeout after 3 seconds
279
+ )
280
+
281
+ # Should return processing status due to timeout
282
+ assert result.status == "processing"
283
+
284
+ @patch('time.sleep')
285
+ def test_agent_complete_flow(self, mock_sleep):
286
+ """Test the complete agent flow (start + wait)."""
287
+ # Mock start_agent response
288
+ mock_start_response = Mock()
289
+ mock_start_response.ok = True
290
+ mock_start_response.json.return_value = {
291
+ "success": True,
292
+ "id": self.job_id,
293
+ "status": "processing"
294
+ }
295
+
296
+ # Mock get_agent_status responses
297
+ mock_status_response = Mock()
298
+ mock_status_response.ok = True
299
+ mock_status_response.json.return_value = self.sample_response
300
+
301
+ self.mock_client.post.return_value = mock_start_response
302
+ self.mock_client.get.return_value = mock_status_response
303
+
304
+ result = agent(
305
+ self.mock_client,
306
+ None,
307
+ prompt="Find information",
308
+ poll_interval=1
309
+ )
310
+
311
+ # Should have called post once and get once
312
+ assert self.mock_client.post.call_count == 1
313
+ assert self.mock_client.get.call_count == 1
314
+
315
+ # Check result
316
+ assert isinstance(result, AgentResponse)
317
+ assert result.status == "completed"
318
+ assert result.data is not None
319
+
320
+ def test_agent_immediate_completion(self):
321
+ """Test agent that completes immediately (no job ID)."""
322
+ mock_response = Mock()
323
+ mock_response.ok = True
324
+ mock_response.json.return_value = {
325
+ "success": True,
326
+ "status": "completed",
327
+ "data": {"result": "done"}
328
+ }
329
+
330
+ self.mock_client.post.return_value = mock_response
331
+
332
+ result = agent(
333
+ self.mock_client,
334
+ None,
335
+ prompt="Quick task"
336
+ )
337
+
338
+ # Should only call post, not get
339
+ assert self.mock_client.post.call_count == 1
340
+ assert self.mock_client.get.call_count == 0
341
+ assert result.status == "completed"
342
+
343
+ def test_start_agent_error_handling(self):
344
+ """Test error handling in start_agent."""
345
+ mock_response = Mock()
346
+ mock_response.ok = False
347
+ mock_response.status_code = 400
348
+ mock_response.text = "Bad Request"
349
+ # Mock response.json() to return error details
350
+ mock_response.json.return_value = {
351
+ "error": "Invalid request",
352
+ "details": "Bad Request"
353
+ }
354
+
355
+ self.mock_client.post.return_value = mock_response
356
+
357
+ with pytest.raises(BadRequestError) as exc_info:
358
+ start_agent(
359
+ self.mock_client,
360
+ None,
361
+ prompt="Test prompt"
362
+ )
363
+
364
+ # Verify the exception has the correct status code
365
+ assert exc_info.value.status_code == 400
366
+ assert "agent" in str(exc_info.value).lower()
367
+
@@ -0,0 +1,226 @@
1
+ """
2
+ Unit tests for agent request preparation.
3
+ """
4
+
5
+ import pytest
6
+ from pydantic import BaseModel, Field
7
+ from typing import List, Optional
8
+
9
+ from firecrawl.v2.methods.agent import _prepare_agent_request
10
+
11
+
12
+ class TestAgentRequestPreparation:
13
+ """Unit tests for agent request preparation."""
14
+
15
+ def test_basic_request_preparation(self):
16
+ """Test basic request preparation with minimal fields."""
17
+ data = _prepare_agent_request(
18
+ None,
19
+ prompt="Find information about Firecrawl"
20
+ )
21
+
22
+ assert data["prompt"] == "Find information about Firecrawl"
23
+ assert "urls" not in data
24
+ assert "schema" not in data
25
+
26
+ def test_request_with_urls(self):
27
+ """Test request preparation with URLs."""
28
+ urls = ["https://example.com", "https://test.com"]
29
+ data = _prepare_agent_request(
30
+ urls,
31
+ prompt="Extract data from these pages"
32
+ )
33
+
34
+ assert data["prompt"] == "Extract data from these pages"
35
+ assert data["urls"] == urls
36
+
37
+ def test_request_with_dict_schema(self):
38
+ """Test request preparation with dict schema."""
39
+ schema = {
40
+ "type": "object",
41
+ "properties": {
42
+ "name": {"type": "string"},
43
+ "age": {"type": "integer"}
44
+ }
45
+ }
46
+ data = _prepare_agent_request(
47
+ None,
48
+ prompt="Extract person data",
49
+ schema=schema
50
+ )
51
+
52
+ assert data["prompt"] == "Extract person data"
53
+ assert data["schema"] == schema
54
+
55
+ def test_request_with_pydantic_schema(self):
56
+ """Test request preparation with Pydantic BaseModel schema."""
57
+ class Person(BaseModel):
58
+ name: str = Field(description="Person's name")
59
+ age: Optional[int] = Field(None, description="Person's age")
60
+
61
+ data = _prepare_agent_request(
62
+ None,
63
+ prompt="Extract person data",
64
+ schema=Person
65
+ )
66
+
67
+ assert data["prompt"] == "Extract person data"
68
+ assert "schema" in data
69
+ assert data["schema"]["type"] == "object"
70
+ assert "properties" in data["schema"]
71
+ assert "name" in data["schema"]["properties"]
72
+ assert "age" in data["schema"]["properties"]
73
+
74
+ def test_request_with_pydantic_schema_instance(self):
75
+ """Test request preparation with Pydantic model instance."""
76
+ class Person(BaseModel):
77
+ name: str = Field(description="Person's name")
78
+ age: Optional[int] = Field(None, description="Person's age")
79
+
80
+ person_instance = Person(name="John", age=30)
81
+ data = _prepare_agent_request(
82
+ None,
83
+ prompt="Extract person data",
84
+ schema=person_instance
85
+ )
86
+
87
+ assert data["prompt"] == "Extract person data"
88
+ assert "schema" in data
89
+ # Should use the class schema, not the instance data
90
+ assert data["schema"]["type"] == "object"
91
+
92
+ def test_request_with_nested_pydantic_schema(self):
93
+ """Test request preparation with nested Pydantic schema."""
94
+ class Founder(BaseModel):
95
+ name: str = Field(description="Full name of the founder")
96
+ role: Optional[str] = Field(None, description="Role or position")
97
+
98
+ class FoundersSchema(BaseModel):
99
+ founders: List[Founder] = Field(description="List of founders")
100
+
101
+ data = _prepare_agent_request(
102
+ None,
103
+ prompt="Find the founders",
104
+ schema=FoundersSchema
105
+ )
106
+
107
+ assert data["prompt"] == "Find the founders"
108
+ assert "schema" in data
109
+ assert data["schema"]["type"] == "object"
110
+ assert "founders" in data["schema"]["properties"]
111
+ assert data["schema"]["properties"]["founders"]["type"] == "array"
112
+
113
+ def test_request_with_integration(self):
114
+ """Test request preparation with integration tag."""
115
+ data = _prepare_agent_request(
116
+ None,
117
+ prompt="Test prompt",
118
+ integration=" test-integration "
119
+ )
120
+
121
+ assert data["prompt"] == "Test prompt"
122
+ assert data["integration"] == "test-integration"
123
+
124
+ def test_request_with_max_credits(self):
125
+ """Test request preparation with max credits."""
126
+ data = _prepare_agent_request(
127
+ None,
128
+ prompt="Test prompt",
129
+ max_credits=100
130
+ )
131
+
132
+ assert data["prompt"] == "Test prompt"
133
+ assert data["maxCredits"] == 100
134
+
135
+ def test_request_with_strict_constrain_to_urls(self):
136
+ """Test request preparation with strict_constrain_to_urls."""
137
+ data = _prepare_agent_request(
138
+ ["https://example.com"],
139
+ prompt="Test prompt",
140
+ strict_constrain_to_urls=True
141
+ )
142
+
143
+ assert data["prompt"] == "Test prompt"
144
+ assert data["strictConstrainToURLs"] is True
145
+
146
+ def test_request_all_fields(self):
147
+ """Test request preparation with all fields."""
148
+ schema = {
149
+ "type": "object",
150
+ "properties": {"test": {"type": "string"}}
151
+ }
152
+ urls = ["https://example.com"]
153
+
154
+ data = _prepare_agent_request(
155
+ urls,
156
+ prompt="Complete test",
157
+ schema=schema,
158
+ integration="test-integration",
159
+ max_credits=50,
160
+ strict_constrain_to_urls=True
161
+ )
162
+
163
+ assert data["prompt"] == "Complete test"
164
+ assert data["urls"] == urls
165
+ assert data["schema"] == schema
166
+ assert data["integration"] == "test-integration"
167
+ assert data["maxCredits"] == 50
168
+ assert data["strictConstrainToURLs"] is True
169
+
170
+ def test_request_with_empty_integration(self):
171
+ """Test that empty integration is not included."""
172
+ data = _prepare_agent_request(
173
+ None,
174
+ prompt="Test prompt",
175
+ integration=" "
176
+ )
177
+
178
+ assert "integration" not in data
179
+
180
+ def test_request_with_zero_max_credits(self):
181
+ """Test that zero max_credits is not included."""
182
+ data = _prepare_agent_request(
183
+ None,
184
+ prompt="Test prompt",
185
+ max_credits=0
186
+ )
187
+
188
+ assert "maxCredits" not in data
189
+
190
+ def test_request_with_false_strict_constrain(self):
191
+ """Test that False strict_constrain_to_urls is not included."""
192
+ data = _prepare_agent_request(
193
+ None,
194
+ prompt="Test prompt",
195
+ strict_constrain_to_urls=False
196
+ )
197
+
198
+ assert "strictConstrainToURLs" not in data
199
+
200
+ def test_request_with_invalid_schema_type_string(self):
201
+ """Test that invalid schema types raise ValueError."""
202
+ with pytest.raises(ValueError, match="Invalid schema type"):
203
+ _prepare_agent_request(
204
+ None,
205
+ prompt="Test prompt",
206
+ schema="invalid_string_schema"
207
+ )
208
+
209
+ def test_request_with_invalid_schema_type_int(self):
210
+ """Test that invalid schema types raise ValueError."""
211
+ with pytest.raises(ValueError, match="Invalid schema type"):
212
+ _prepare_agent_request(
213
+ None,
214
+ prompt="Test prompt",
215
+ schema=123
216
+ )
217
+
218
+ def test_request_with_invalid_schema_type_list(self):
219
+ """Test that invalid schema types raise ValueError."""
220
+ with pytest.raises(ValueError, match="Invalid schema type"):
221
+ _prepare_agent_request(
222
+ None,
223
+ prompt="Test prompt",
224
+ schema=["not", "a", "valid", "schema"]
225
+ )
226
+
@@ -0,0 +1,90 @@
1
+ import pytest
2
+ from firecrawl.v2.types import ScrapeOptions, Location, WebhookConfig
3
+ from firecrawl.v2.methods.batch import prepare_batch_scrape_request
4
+
5
+
6
+ class TestBatchScrapeRequestPreparation:
7
+ """Unit tests for batch scrape request preparation."""
8
+
9
+ def test_urls_validation(self):
10
+ # empty list
11
+ with pytest.raises(ValueError):
12
+ prepare_batch_scrape_request([])
13
+ # invalid protocol
14
+ with pytest.raises(ValueError):
15
+ prepare_batch_scrape_request(["example.com"]) # missing http(s)
16
+ # valid
17
+ data = prepare_batch_scrape_request(["https://example.com", "http://foo.bar"])
18
+ assert data["urls"] == ["https://example.com", "http://foo.bar"]
19
+
20
+ def test_flatten_scrape_options(self):
21
+ opts = ScrapeOptions(
22
+ formats=["markdown", "change_tracking", {"type": "screenshot", "full_page": True, "quality": 80}],
23
+ include_tags=["main"],
24
+ exclude_tags=["nav"],
25
+ only_main_content=True,
26
+ wait_for=500,
27
+ timeout=30000,
28
+ mobile=True,
29
+ parsers=["pdf"],
30
+ actions=[{"type": "screenshot", "full_page": True}],
31
+ location=Location(country="us", languages=["en"]),
32
+ skip_tls_verification=False,
33
+ remove_base64_images=False,
34
+ fast_mode=True,
35
+ use_mock="test",
36
+ block_ads=False,
37
+ proxy="basic",
38
+ max_age=1000,
39
+ store_in_cache=False,
40
+ )
41
+ data = prepare_batch_scrape_request(["https://example.com"], options=opts)
42
+
43
+ # Formats should be at top-level as list, with screenshot normalized to object w/ fullPage
44
+ assert isinstance(data.get("formats"), list)
45
+ assert "markdown" in data["formats"]
46
+ # snake_case format should be converted to camelCase
47
+ assert "changeTracking" in data["formats"]
48
+ found_obj = next((f for f in data["formats"] if isinstance(f, dict) and f.get("type") == "screenshot"), None)
49
+ assert found_obj is not None and found_obj.get("fullPage") is True and found_obj.get("quality") == 80
50
+
51
+ # Field conversions to camelCase
52
+ assert data["includeTags"] == ["main"]
53
+ assert data["excludeTags"] == ["nav"]
54
+ assert data["onlyMainContent"] is True
55
+ assert data["waitFor"] == 500
56
+ assert data["timeout"] == 30000
57
+ assert data["mobile"] is True
58
+ assert data["parsers"] == ["pdf"]
59
+ assert isinstance(data["actions"], list) and data["actions"][0]["type"] == "screenshot"
60
+ assert isinstance(data["location"], dict) and data["location"]["country"] == "us"
61
+ assert data["skipTlsVerification"] is False
62
+ assert data["removeBase64Images"] is False
63
+ assert data["fastMode"] is True
64
+ assert data["useMock"] == "test"
65
+ assert data["blockAds"] is False
66
+ assert data["proxy"] == "basic"
67
+ assert data["maxAge"] == 1000
68
+ assert data["storeInCache"] is False
69
+
70
+ def test_batch_specific_fields(self):
71
+ webhook = WebhookConfig(url="https://hook.test", headers={"X": "Y"}, events=["completed"])
72
+ data = prepare_batch_scrape_request(
73
+ ["https://example.com"],
74
+ webhook=webhook,
75
+ append_to_id="00000000-0000-0000-0000-000000000000",
76
+ ignore_invalid_urls=True,
77
+ max_concurrency=5,
78
+ zero_data_retention=True,
79
+ integration="_unit-test",
80
+ )
81
+ assert isinstance(data["webhook"], dict) and data["webhook"]["url"] == "https://hook.test"
82
+ assert data["appendToId"] == "00000000-0000-0000-0000-000000000000"
83
+ assert data["ignoreInvalidURLs"] is True
84
+ assert data["maxConcurrency"] == 5
85
+ assert data["zeroDataRetention"] is True
86
+ assert data["integration"] == "_unit-test"
87
+
88
+ def test_string_webhook_is_passed_verbatim(self):
89
+ data = prepare_batch_scrape_request(["https://example.com"], webhook="https://hook.simple")
90
+ assert data["webhook"] == "https://hook.simple"