firecrawl 4.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- firecrawl/__init__.py +87 -0
- firecrawl/__tests__/e2e/v2/aio/conftest.py +62 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +69 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +189 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +39 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +41 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +138 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +249 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +42 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +43 -0
- firecrawl/__tests__/e2e/v2/conftest.py +73 -0
- firecrawl/__tests__/e2e/v2/test_async.py +73 -0
- firecrawl/__tests__/e2e/v2/test_batch_scrape.py +106 -0
- firecrawl/__tests__/e2e/v2/test_crawl.py +278 -0
- firecrawl/__tests__/e2e/v2/test_extract.py +55 -0
- firecrawl/__tests__/e2e/v2/test_map.py +61 -0
- firecrawl/__tests__/e2e/v2/test_scrape.py +191 -0
- firecrawl/__tests__/e2e/v2/test_search.py +270 -0
- firecrawl/__tests__/e2e/v2/test_usage.py +26 -0
- firecrawl/__tests__/e2e/v2/test_watcher.py +65 -0
- firecrawl/__tests__/unit/test_recursive_schema_v1.py +1209 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +12 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +79 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +12 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +20 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +50 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +64 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +28 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +117 -0
- firecrawl/__tests__/unit/v2/methods/test_agent.py +367 -0
- firecrawl/__tests__/unit/v2/methods/test_agent_request_preparation.py +226 -0
- firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +90 -0
- firecrawl/__tests__/unit/v2/methods/test_branding.py +214 -0
- firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +70 -0
- firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +240 -0
- firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +107 -0
- firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +54 -0
- firecrawl/__tests__/unit/v2/methods/test_pagination.py +671 -0
- firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +109 -0
- firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +169 -0
- firecrawl/__tests__/unit/v2/methods/test_search_validation.py +236 -0
- firecrawl/__tests__/unit/v2/methods/test_usage_types.py +18 -0
- firecrawl/__tests__/unit/v2/methods/test_webhook.py +123 -0
- firecrawl/__tests__/unit/v2/utils/test_metadata_extras.py +94 -0
- firecrawl/__tests__/unit/v2/utils/test_metadata_extras_multivalue.py +22 -0
- firecrawl/__tests__/unit/v2/utils/test_recursive_schema.py +1133 -0
- firecrawl/__tests__/unit/v2/utils/test_validation.py +311 -0
- firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +332 -0
- firecrawl/client.py +281 -0
- firecrawl/firecrawl.backup.py +4635 -0
- firecrawl/types.py +167 -0
- firecrawl/v1/__init__.py +14 -0
- firecrawl/v1/client.py +5164 -0
- firecrawl/v2/__init__.py +4 -0
- firecrawl/v2/client.py +967 -0
- firecrawl/v2/client_async.py +408 -0
- firecrawl/v2/methods/agent.py +144 -0
- firecrawl/v2/methods/aio/__init__.py +1 -0
- firecrawl/v2/methods/aio/agent.py +137 -0
- firecrawl/v2/methods/aio/batch.py +188 -0
- firecrawl/v2/methods/aio/crawl.py +351 -0
- firecrawl/v2/methods/aio/extract.py +133 -0
- firecrawl/v2/methods/aio/map.py +65 -0
- firecrawl/v2/methods/aio/scrape.py +33 -0
- firecrawl/v2/methods/aio/search.py +176 -0
- firecrawl/v2/methods/aio/usage.py +89 -0
- firecrawl/v2/methods/batch.py +499 -0
- firecrawl/v2/methods/crawl.py +592 -0
- firecrawl/v2/methods/extract.py +161 -0
- firecrawl/v2/methods/map.py +83 -0
- firecrawl/v2/methods/scrape.py +64 -0
- firecrawl/v2/methods/search.py +215 -0
- firecrawl/v2/methods/usage.py +84 -0
- firecrawl/v2/types.py +1143 -0
- firecrawl/v2/utils/__init__.py +9 -0
- firecrawl/v2/utils/error_handler.py +107 -0
- firecrawl/v2/utils/get_version.py +15 -0
- firecrawl/v2/utils/http_client.py +178 -0
- firecrawl/v2/utils/http_client_async.py +69 -0
- firecrawl/v2/utils/normalize.py +125 -0
- firecrawl/v2/utils/validation.py +692 -0
- firecrawl/v2/watcher.py +301 -0
- firecrawl/v2/watcher_async.py +243 -0
- firecrawl-4.12.0.dist-info/METADATA +234 -0
- firecrawl-4.12.0.dist-info/RECORD +92 -0
- firecrawl-4.12.0.dist-info/WHEEL +5 -0
- firecrawl-4.12.0.dist-info/licenses/LICENSE +21 -0
- firecrawl-4.12.0.dist-info/top_level.txt +2 -0
- tests/test_agent_integration.py +277 -0
- tests/test_api_key_handling.py +44 -0
- tests/test_change_tracking.py +98 -0
- tests/test_timeout_conversion.py +117 -0
|
@@ -0,0 +1,367 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Unit tests for agent methods with mocked HTTP client.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
import time
|
|
7
|
+
from unittest.mock import Mock, patch
|
|
8
|
+
from pydantic import BaseModel, Field
|
|
9
|
+
from typing import List, Optional
|
|
10
|
+
|
|
11
|
+
from firecrawl.v2.methods.agent import (
|
|
12
|
+
start_agent,
|
|
13
|
+
agent,
|
|
14
|
+
get_agent_status,
|
|
15
|
+
cancel_agent,
|
|
16
|
+
wait_agent
|
|
17
|
+
)
|
|
18
|
+
from firecrawl.v2.types import AgentResponse
|
|
19
|
+
from firecrawl.v2.utils.error_handler import BadRequestError
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class TestAgentMethods:
|
|
23
|
+
"""Unit tests for agent methods with mocked HTTP client."""
|
|
24
|
+
|
|
25
|
+
def setup_method(self):
|
|
26
|
+
"""Set up test fixtures."""
|
|
27
|
+
self.mock_client = Mock()
|
|
28
|
+
self.job_id = "test-agent-123"
|
|
29
|
+
|
|
30
|
+
# Sample agent response
|
|
31
|
+
self.sample_response = {
|
|
32
|
+
"success": True,
|
|
33
|
+
"id": self.job_id,
|
|
34
|
+
"status": "completed",
|
|
35
|
+
"data": {
|
|
36
|
+
"founders": [
|
|
37
|
+
{"name": "John Doe", "role": "CEO"},
|
|
38
|
+
{"name": "Jane Smith", "role": "CTO"}
|
|
39
|
+
]
|
|
40
|
+
},
|
|
41
|
+
"creditsUsed": 10,
|
|
42
|
+
"expiresAt": "2024-01-01T00:00:00Z"
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
def test_start_agent_basic(self):
|
|
46
|
+
"""Test starting an agent job with basic parameters."""
|
|
47
|
+
mock_response = Mock()
|
|
48
|
+
mock_response.ok = True
|
|
49
|
+
mock_response.json.return_value = {
|
|
50
|
+
"success": True,
|
|
51
|
+
"id": self.job_id,
|
|
52
|
+
"status": "processing"
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
self.mock_client.post.return_value = mock_response
|
|
56
|
+
|
|
57
|
+
result = start_agent(
|
|
58
|
+
self.mock_client,
|
|
59
|
+
None,
|
|
60
|
+
prompt="Find information about Firecrawl"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# Check that post was called with correct endpoint
|
|
64
|
+
self.mock_client.post.assert_called_once()
|
|
65
|
+
call_args = self.mock_client.post.call_args
|
|
66
|
+
assert call_args[0][0] == "/v2/agent"
|
|
67
|
+
|
|
68
|
+
# Check request body (second positional argument)
|
|
69
|
+
body = call_args[0][1]
|
|
70
|
+
assert body["prompt"] == "Find information about Firecrawl"
|
|
71
|
+
assert "urls" not in body
|
|
72
|
+
|
|
73
|
+
# Check result
|
|
74
|
+
assert isinstance(result, AgentResponse)
|
|
75
|
+
assert result.id == self.job_id
|
|
76
|
+
assert result.status == "processing"
|
|
77
|
+
|
|
78
|
+
def test_start_agent_with_urls(self):
|
|
79
|
+
"""Test starting an agent job with URLs."""
|
|
80
|
+
mock_response = Mock()
|
|
81
|
+
mock_response.ok = True
|
|
82
|
+
mock_response.json.return_value = {
|
|
83
|
+
"success": True,
|
|
84
|
+
"id": self.job_id,
|
|
85
|
+
"status": "processing"
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
self.mock_client.post.return_value = mock_response
|
|
89
|
+
|
|
90
|
+
urls = ["https://example.com", "https://test.com"]
|
|
91
|
+
result = start_agent(
|
|
92
|
+
self.mock_client,
|
|
93
|
+
urls,
|
|
94
|
+
prompt="Extract data"
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
call_args = self.mock_client.post.call_args
|
|
98
|
+
body = call_args[0][1]
|
|
99
|
+
assert body["urls"] == urls
|
|
100
|
+
|
|
101
|
+
def test_start_agent_with_dict_schema(self):
|
|
102
|
+
"""Test starting an agent job with dict schema."""
|
|
103
|
+
mock_response = Mock()
|
|
104
|
+
mock_response.ok = True
|
|
105
|
+
mock_response.json.return_value = {
|
|
106
|
+
"success": True,
|
|
107
|
+
"id": self.job_id,
|
|
108
|
+
"status": "processing"
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
self.mock_client.post.return_value = mock_response
|
|
112
|
+
|
|
113
|
+
schema = {
|
|
114
|
+
"type": "object",
|
|
115
|
+
"properties": {
|
|
116
|
+
"name": {"type": "string"}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
result = start_agent(
|
|
121
|
+
self.mock_client,
|
|
122
|
+
None,
|
|
123
|
+
prompt="Extract data",
|
|
124
|
+
schema=schema
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
call_args = self.mock_client.post.call_args
|
|
128
|
+
body = call_args[0][1]
|
|
129
|
+
assert body["schema"] == schema
|
|
130
|
+
|
|
131
|
+
def test_start_agent_with_pydantic_schema(self):
|
|
132
|
+
"""Test starting an agent job with Pydantic schema."""
|
|
133
|
+
mock_response = Mock()
|
|
134
|
+
mock_response.ok = True
|
|
135
|
+
mock_response.json.return_value = {
|
|
136
|
+
"success": True,
|
|
137
|
+
"id": self.job_id,
|
|
138
|
+
"status": "processing"
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
self.mock_client.post.return_value = mock_response
|
|
142
|
+
|
|
143
|
+
class Founder(BaseModel):
|
|
144
|
+
name: str = Field(description="Full name")
|
|
145
|
+
role: Optional[str] = Field(None, description="Role")
|
|
146
|
+
|
|
147
|
+
class FoundersSchema(BaseModel):
|
|
148
|
+
founders: List[Founder] = Field(description="List of founders")
|
|
149
|
+
|
|
150
|
+
result = start_agent(
|
|
151
|
+
self.mock_client,
|
|
152
|
+
None,
|
|
153
|
+
prompt="Find founders",
|
|
154
|
+
schema=FoundersSchema
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
call_args = self.mock_client.post.call_args
|
|
158
|
+
body = call_args[0][1]
|
|
159
|
+
assert "schema" in body
|
|
160
|
+
assert body["schema"]["type"] == "object"
|
|
161
|
+
assert "founders" in body["schema"]["properties"]
|
|
162
|
+
|
|
163
|
+
def test_start_agent_with_all_params(self):
|
|
164
|
+
"""Test starting an agent job with all parameters."""
|
|
165
|
+
mock_response = Mock()
|
|
166
|
+
mock_response.ok = True
|
|
167
|
+
mock_response.json.return_value = {
|
|
168
|
+
"success": True,
|
|
169
|
+
"id": self.job_id,
|
|
170
|
+
"status": "processing"
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
self.mock_client.post.return_value = mock_response
|
|
174
|
+
|
|
175
|
+
schema = {"type": "object"}
|
|
176
|
+
urls = ["https://example.com"]
|
|
177
|
+
|
|
178
|
+
result = start_agent(
|
|
179
|
+
self.mock_client,
|
|
180
|
+
urls,
|
|
181
|
+
prompt="Complete test",
|
|
182
|
+
schema=schema,
|
|
183
|
+
integration="test-integration",
|
|
184
|
+
max_credits=50,
|
|
185
|
+
strict_constrain_to_urls=True
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
call_args = self.mock_client.post.call_args
|
|
189
|
+
body = call_args[0][1]
|
|
190
|
+
assert body["prompt"] == "Complete test"
|
|
191
|
+
assert body["urls"] == urls
|
|
192
|
+
assert body["schema"] == schema
|
|
193
|
+
assert body["integration"] == "test-integration"
|
|
194
|
+
assert body["maxCredits"] == 50
|
|
195
|
+
assert body["strictConstrainToURLs"] is True
|
|
196
|
+
|
|
197
|
+
def test_get_agent_status(self):
|
|
198
|
+
"""Test getting agent status."""
|
|
199
|
+
mock_response = Mock()
|
|
200
|
+
mock_response.ok = True
|
|
201
|
+
mock_response.json.return_value = self.sample_response
|
|
202
|
+
|
|
203
|
+
self.mock_client.get.return_value = mock_response
|
|
204
|
+
|
|
205
|
+
result = get_agent_status(self.mock_client, self.job_id)
|
|
206
|
+
|
|
207
|
+
# Check that get was called with correct endpoint
|
|
208
|
+
self.mock_client.get.assert_called_once_with(f"/v2/agent/{self.job_id}")
|
|
209
|
+
|
|
210
|
+
# Check result
|
|
211
|
+
assert isinstance(result, AgentResponse)
|
|
212
|
+
assert result.id == self.job_id
|
|
213
|
+
assert result.status == "completed"
|
|
214
|
+
|
|
215
|
+
def test_cancel_agent(self):
|
|
216
|
+
"""Test canceling an agent job."""
|
|
217
|
+
mock_response = Mock()
|
|
218
|
+
mock_response.ok = True
|
|
219
|
+
mock_response.json.return_value = {"success": True}
|
|
220
|
+
|
|
221
|
+
self.mock_client.delete.return_value = mock_response
|
|
222
|
+
|
|
223
|
+
result = cancel_agent(self.mock_client, self.job_id)
|
|
224
|
+
|
|
225
|
+
# Check that delete was called with correct endpoint
|
|
226
|
+
self.mock_client.delete.assert_called_once_with(f"/v2/agent/{self.job_id}")
|
|
227
|
+
|
|
228
|
+
# Check result
|
|
229
|
+
assert result is True
|
|
230
|
+
|
|
231
|
+
@patch('time.sleep')
|
|
232
|
+
def test_wait_agent_completed(self, mock_sleep):
|
|
233
|
+
"""Test waiting for agent to complete."""
|
|
234
|
+
# First call returns processing, second returns completed
|
|
235
|
+
mock_response_processing = Mock()
|
|
236
|
+
mock_response_processing.ok = True
|
|
237
|
+
mock_response_processing.json.return_value = {
|
|
238
|
+
"success": True,
|
|
239
|
+
"id": self.job_id,
|
|
240
|
+
"status": "processing"
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
mock_response_completed = Mock()
|
|
244
|
+
mock_response_completed.ok = True
|
|
245
|
+
mock_response_completed.json.return_value = self.sample_response
|
|
246
|
+
|
|
247
|
+
self.mock_client.get.side_effect = [
|
|
248
|
+
mock_response_processing,
|
|
249
|
+
mock_response_completed
|
|
250
|
+
]
|
|
251
|
+
|
|
252
|
+
result = wait_agent(self.mock_client, self.job_id, poll_interval=1)
|
|
253
|
+
|
|
254
|
+
# Should have called get twice
|
|
255
|
+
assert self.mock_client.get.call_count == 2
|
|
256
|
+
assert result.status == "completed"
|
|
257
|
+
assert mock_sleep.call_count == 1
|
|
258
|
+
|
|
259
|
+
@patch('time.sleep')
|
|
260
|
+
def test_wait_agent_timeout(self, mock_sleep):
|
|
261
|
+
"""Test waiting for agent with timeout."""
|
|
262
|
+
mock_response = Mock()
|
|
263
|
+
mock_response.ok = True
|
|
264
|
+
mock_response.json.return_value = {
|
|
265
|
+
"success": True,
|
|
266
|
+
"id": self.job_id,
|
|
267
|
+
"status": "processing"
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
self.mock_client.get.return_value = mock_response
|
|
271
|
+
|
|
272
|
+
# Mock time.time to simulate timeout
|
|
273
|
+
with patch('time.time', side_effect=[0, 0, 5]): # Start at 0, timeout at 5
|
|
274
|
+
result = wait_agent(
|
|
275
|
+
self.mock_client,
|
|
276
|
+
self.job_id,
|
|
277
|
+
poll_interval=1,
|
|
278
|
+
timeout=3 # Timeout after 3 seconds
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
# Should return processing status due to timeout
|
|
282
|
+
assert result.status == "processing"
|
|
283
|
+
|
|
284
|
+
@patch('time.sleep')
|
|
285
|
+
def test_agent_complete_flow(self, mock_sleep):
|
|
286
|
+
"""Test the complete agent flow (start + wait)."""
|
|
287
|
+
# Mock start_agent response
|
|
288
|
+
mock_start_response = Mock()
|
|
289
|
+
mock_start_response.ok = True
|
|
290
|
+
mock_start_response.json.return_value = {
|
|
291
|
+
"success": True,
|
|
292
|
+
"id": self.job_id,
|
|
293
|
+
"status": "processing"
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
# Mock get_agent_status responses
|
|
297
|
+
mock_status_response = Mock()
|
|
298
|
+
mock_status_response.ok = True
|
|
299
|
+
mock_status_response.json.return_value = self.sample_response
|
|
300
|
+
|
|
301
|
+
self.mock_client.post.return_value = mock_start_response
|
|
302
|
+
self.mock_client.get.return_value = mock_status_response
|
|
303
|
+
|
|
304
|
+
result = agent(
|
|
305
|
+
self.mock_client,
|
|
306
|
+
None,
|
|
307
|
+
prompt="Find information",
|
|
308
|
+
poll_interval=1
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
# Should have called post once and get once
|
|
312
|
+
assert self.mock_client.post.call_count == 1
|
|
313
|
+
assert self.mock_client.get.call_count == 1
|
|
314
|
+
|
|
315
|
+
# Check result
|
|
316
|
+
assert isinstance(result, AgentResponse)
|
|
317
|
+
assert result.status == "completed"
|
|
318
|
+
assert result.data is not None
|
|
319
|
+
|
|
320
|
+
def test_agent_immediate_completion(self):
|
|
321
|
+
"""Test agent that completes immediately (no job ID)."""
|
|
322
|
+
mock_response = Mock()
|
|
323
|
+
mock_response.ok = True
|
|
324
|
+
mock_response.json.return_value = {
|
|
325
|
+
"success": True,
|
|
326
|
+
"status": "completed",
|
|
327
|
+
"data": {"result": "done"}
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
self.mock_client.post.return_value = mock_response
|
|
331
|
+
|
|
332
|
+
result = agent(
|
|
333
|
+
self.mock_client,
|
|
334
|
+
None,
|
|
335
|
+
prompt="Quick task"
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
# Should only call post, not get
|
|
339
|
+
assert self.mock_client.post.call_count == 1
|
|
340
|
+
assert self.mock_client.get.call_count == 0
|
|
341
|
+
assert result.status == "completed"
|
|
342
|
+
|
|
343
|
+
def test_start_agent_error_handling(self):
|
|
344
|
+
"""Test error handling in start_agent."""
|
|
345
|
+
mock_response = Mock()
|
|
346
|
+
mock_response.ok = False
|
|
347
|
+
mock_response.status_code = 400
|
|
348
|
+
mock_response.text = "Bad Request"
|
|
349
|
+
# Mock response.json() to return error details
|
|
350
|
+
mock_response.json.return_value = {
|
|
351
|
+
"error": "Invalid request",
|
|
352
|
+
"details": "Bad Request"
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
self.mock_client.post.return_value = mock_response
|
|
356
|
+
|
|
357
|
+
with pytest.raises(BadRequestError) as exc_info:
|
|
358
|
+
start_agent(
|
|
359
|
+
self.mock_client,
|
|
360
|
+
None,
|
|
361
|
+
prompt="Test prompt"
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
# Verify the exception has the correct status code
|
|
365
|
+
assert exc_info.value.status_code == 400
|
|
366
|
+
assert "agent" in str(exc_info.value).lower()
|
|
367
|
+
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Unit tests for agent request preparation.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
from pydantic import BaseModel, Field
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
|
|
9
|
+
from firecrawl.v2.methods.agent import _prepare_agent_request
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TestAgentRequestPreparation:
|
|
13
|
+
"""Unit tests for agent request preparation."""
|
|
14
|
+
|
|
15
|
+
def test_basic_request_preparation(self):
|
|
16
|
+
"""Test basic request preparation with minimal fields."""
|
|
17
|
+
data = _prepare_agent_request(
|
|
18
|
+
None,
|
|
19
|
+
prompt="Find information about Firecrawl"
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
assert data["prompt"] == "Find information about Firecrawl"
|
|
23
|
+
assert "urls" not in data
|
|
24
|
+
assert "schema" not in data
|
|
25
|
+
|
|
26
|
+
def test_request_with_urls(self):
|
|
27
|
+
"""Test request preparation with URLs."""
|
|
28
|
+
urls = ["https://example.com", "https://test.com"]
|
|
29
|
+
data = _prepare_agent_request(
|
|
30
|
+
urls,
|
|
31
|
+
prompt="Extract data from these pages"
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
assert data["prompt"] == "Extract data from these pages"
|
|
35
|
+
assert data["urls"] == urls
|
|
36
|
+
|
|
37
|
+
def test_request_with_dict_schema(self):
|
|
38
|
+
"""Test request preparation with dict schema."""
|
|
39
|
+
schema = {
|
|
40
|
+
"type": "object",
|
|
41
|
+
"properties": {
|
|
42
|
+
"name": {"type": "string"},
|
|
43
|
+
"age": {"type": "integer"}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
data = _prepare_agent_request(
|
|
47
|
+
None,
|
|
48
|
+
prompt="Extract person data",
|
|
49
|
+
schema=schema
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
assert data["prompt"] == "Extract person data"
|
|
53
|
+
assert data["schema"] == schema
|
|
54
|
+
|
|
55
|
+
def test_request_with_pydantic_schema(self):
|
|
56
|
+
"""Test request preparation with Pydantic BaseModel schema."""
|
|
57
|
+
class Person(BaseModel):
|
|
58
|
+
name: str = Field(description="Person's name")
|
|
59
|
+
age: Optional[int] = Field(None, description="Person's age")
|
|
60
|
+
|
|
61
|
+
data = _prepare_agent_request(
|
|
62
|
+
None,
|
|
63
|
+
prompt="Extract person data",
|
|
64
|
+
schema=Person
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
assert data["prompt"] == "Extract person data"
|
|
68
|
+
assert "schema" in data
|
|
69
|
+
assert data["schema"]["type"] == "object"
|
|
70
|
+
assert "properties" in data["schema"]
|
|
71
|
+
assert "name" in data["schema"]["properties"]
|
|
72
|
+
assert "age" in data["schema"]["properties"]
|
|
73
|
+
|
|
74
|
+
def test_request_with_pydantic_schema_instance(self):
|
|
75
|
+
"""Test request preparation with Pydantic model instance."""
|
|
76
|
+
class Person(BaseModel):
|
|
77
|
+
name: str = Field(description="Person's name")
|
|
78
|
+
age: Optional[int] = Field(None, description="Person's age")
|
|
79
|
+
|
|
80
|
+
person_instance = Person(name="John", age=30)
|
|
81
|
+
data = _prepare_agent_request(
|
|
82
|
+
None,
|
|
83
|
+
prompt="Extract person data",
|
|
84
|
+
schema=person_instance
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
assert data["prompt"] == "Extract person data"
|
|
88
|
+
assert "schema" in data
|
|
89
|
+
# Should use the class schema, not the instance data
|
|
90
|
+
assert data["schema"]["type"] == "object"
|
|
91
|
+
|
|
92
|
+
def test_request_with_nested_pydantic_schema(self):
|
|
93
|
+
"""Test request preparation with nested Pydantic schema."""
|
|
94
|
+
class Founder(BaseModel):
|
|
95
|
+
name: str = Field(description="Full name of the founder")
|
|
96
|
+
role: Optional[str] = Field(None, description="Role or position")
|
|
97
|
+
|
|
98
|
+
class FoundersSchema(BaseModel):
|
|
99
|
+
founders: List[Founder] = Field(description="List of founders")
|
|
100
|
+
|
|
101
|
+
data = _prepare_agent_request(
|
|
102
|
+
None,
|
|
103
|
+
prompt="Find the founders",
|
|
104
|
+
schema=FoundersSchema
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
assert data["prompt"] == "Find the founders"
|
|
108
|
+
assert "schema" in data
|
|
109
|
+
assert data["schema"]["type"] == "object"
|
|
110
|
+
assert "founders" in data["schema"]["properties"]
|
|
111
|
+
assert data["schema"]["properties"]["founders"]["type"] == "array"
|
|
112
|
+
|
|
113
|
+
def test_request_with_integration(self):
|
|
114
|
+
"""Test request preparation with integration tag."""
|
|
115
|
+
data = _prepare_agent_request(
|
|
116
|
+
None,
|
|
117
|
+
prompt="Test prompt",
|
|
118
|
+
integration=" test-integration "
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
assert data["prompt"] == "Test prompt"
|
|
122
|
+
assert data["integration"] == "test-integration"
|
|
123
|
+
|
|
124
|
+
def test_request_with_max_credits(self):
|
|
125
|
+
"""Test request preparation with max credits."""
|
|
126
|
+
data = _prepare_agent_request(
|
|
127
|
+
None,
|
|
128
|
+
prompt="Test prompt",
|
|
129
|
+
max_credits=100
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
assert data["prompt"] == "Test prompt"
|
|
133
|
+
assert data["maxCredits"] == 100
|
|
134
|
+
|
|
135
|
+
def test_request_with_strict_constrain_to_urls(self):
|
|
136
|
+
"""Test request preparation with strict_constrain_to_urls."""
|
|
137
|
+
data = _prepare_agent_request(
|
|
138
|
+
["https://example.com"],
|
|
139
|
+
prompt="Test prompt",
|
|
140
|
+
strict_constrain_to_urls=True
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
assert data["prompt"] == "Test prompt"
|
|
144
|
+
assert data["strictConstrainToURLs"] is True
|
|
145
|
+
|
|
146
|
+
def test_request_all_fields(self):
|
|
147
|
+
"""Test request preparation with all fields."""
|
|
148
|
+
schema = {
|
|
149
|
+
"type": "object",
|
|
150
|
+
"properties": {"test": {"type": "string"}}
|
|
151
|
+
}
|
|
152
|
+
urls = ["https://example.com"]
|
|
153
|
+
|
|
154
|
+
data = _prepare_agent_request(
|
|
155
|
+
urls,
|
|
156
|
+
prompt="Complete test",
|
|
157
|
+
schema=schema,
|
|
158
|
+
integration="test-integration",
|
|
159
|
+
max_credits=50,
|
|
160
|
+
strict_constrain_to_urls=True
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
assert data["prompt"] == "Complete test"
|
|
164
|
+
assert data["urls"] == urls
|
|
165
|
+
assert data["schema"] == schema
|
|
166
|
+
assert data["integration"] == "test-integration"
|
|
167
|
+
assert data["maxCredits"] == 50
|
|
168
|
+
assert data["strictConstrainToURLs"] is True
|
|
169
|
+
|
|
170
|
+
def test_request_with_empty_integration(self):
|
|
171
|
+
"""Test that empty integration is not included."""
|
|
172
|
+
data = _prepare_agent_request(
|
|
173
|
+
None,
|
|
174
|
+
prompt="Test prompt",
|
|
175
|
+
integration=" "
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
assert "integration" not in data
|
|
179
|
+
|
|
180
|
+
def test_request_with_zero_max_credits(self):
|
|
181
|
+
"""Test that zero max_credits is not included."""
|
|
182
|
+
data = _prepare_agent_request(
|
|
183
|
+
None,
|
|
184
|
+
prompt="Test prompt",
|
|
185
|
+
max_credits=0
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
assert "maxCredits" not in data
|
|
189
|
+
|
|
190
|
+
def test_request_with_false_strict_constrain(self):
|
|
191
|
+
"""Test that False strict_constrain_to_urls is not included."""
|
|
192
|
+
data = _prepare_agent_request(
|
|
193
|
+
None,
|
|
194
|
+
prompt="Test prompt",
|
|
195
|
+
strict_constrain_to_urls=False
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
assert "strictConstrainToURLs" not in data
|
|
199
|
+
|
|
200
|
+
def test_request_with_invalid_schema_type_string(self):
|
|
201
|
+
"""Test that invalid schema types raise ValueError."""
|
|
202
|
+
with pytest.raises(ValueError, match="Invalid schema type"):
|
|
203
|
+
_prepare_agent_request(
|
|
204
|
+
None,
|
|
205
|
+
prompt="Test prompt",
|
|
206
|
+
schema="invalid_string_schema"
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
def test_request_with_invalid_schema_type_int(self):
|
|
210
|
+
"""Test that invalid schema types raise ValueError."""
|
|
211
|
+
with pytest.raises(ValueError, match="Invalid schema type"):
|
|
212
|
+
_prepare_agent_request(
|
|
213
|
+
None,
|
|
214
|
+
prompt="Test prompt",
|
|
215
|
+
schema=123
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
def test_request_with_invalid_schema_type_list(self):
|
|
219
|
+
"""Test that invalid schema types raise ValueError."""
|
|
220
|
+
with pytest.raises(ValueError, match="Invalid schema type"):
|
|
221
|
+
_prepare_agent_request(
|
|
222
|
+
None,
|
|
223
|
+
prompt="Test prompt",
|
|
224
|
+
schema=["not", "a", "valid", "schema"]
|
|
225
|
+
)
|
|
226
|
+
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from firecrawl.v2.types import ScrapeOptions, Location, WebhookConfig
|
|
3
|
+
from firecrawl.v2.methods.batch import prepare_batch_scrape_request
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class TestBatchScrapeRequestPreparation:
|
|
7
|
+
"""Unit tests for batch scrape request preparation."""
|
|
8
|
+
|
|
9
|
+
def test_urls_validation(self):
|
|
10
|
+
# empty list
|
|
11
|
+
with pytest.raises(ValueError):
|
|
12
|
+
prepare_batch_scrape_request([])
|
|
13
|
+
# invalid protocol
|
|
14
|
+
with pytest.raises(ValueError):
|
|
15
|
+
prepare_batch_scrape_request(["example.com"]) # missing http(s)
|
|
16
|
+
# valid
|
|
17
|
+
data = prepare_batch_scrape_request(["https://example.com", "http://foo.bar"])
|
|
18
|
+
assert data["urls"] == ["https://example.com", "http://foo.bar"]
|
|
19
|
+
|
|
20
|
+
def test_flatten_scrape_options(self):
|
|
21
|
+
opts = ScrapeOptions(
|
|
22
|
+
formats=["markdown", "change_tracking", {"type": "screenshot", "full_page": True, "quality": 80}],
|
|
23
|
+
include_tags=["main"],
|
|
24
|
+
exclude_tags=["nav"],
|
|
25
|
+
only_main_content=True,
|
|
26
|
+
wait_for=500,
|
|
27
|
+
timeout=30000,
|
|
28
|
+
mobile=True,
|
|
29
|
+
parsers=["pdf"],
|
|
30
|
+
actions=[{"type": "screenshot", "full_page": True}],
|
|
31
|
+
location=Location(country="us", languages=["en"]),
|
|
32
|
+
skip_tls_verification=False,
|
|
33
|
+
remove_base64_images=False,
|
|
34
|
+
fast_mode=True,
|
|
35
|
+
use_mock="test",
|
|
36
|
+
block_ads=False,
|
|
37
|
+
proxy="basic",
|
|
38
|
+
max_age=1000,
|
|
39
|
+
store_in_cache=False,
|
|
40
|
+
)
|
|
41
|
+
data = prepare_batch_scrape_request(["https://example.com"], options=opts)
|
|
42
|
+
|
|
43
|
+
# Formats should be at top-level as list, with screenshot normalized to object w/ fullPage
|
|
44
|
+
assert isinstance(data.get("formats"), list)
|
|
45
|
+
assert "markdown" in data["formats"]
|
|
46
|
+
# snake_case format should be converted to camelCase
|
|
47
|
+
assert "changeTracking" in data["formats"]
|
|
48
|
+
found_obj = next((f for f in data["formats"] if isinstance(f, dict) and f.get("type") == "screenshot"), None)
|
|
49
|
+
assert found_obj is not None and found_obj.get("fullPage") is True and found_obj.get("quality") == 80
|
|
50
|
+
|
|
51
|
+
# Field conversions to camelCase
|
|
52
|
+
assert data["includeTags"] == ["main"]
|
|
53
|
+
assert data["excludeTags"] == ["nav"]
|
|
54
|
+
assert data["onlyMainContent"] is True
|
|
55
|
+
assert data["waitFor"] == 500
|
|
56
|
+
assert data["timeout"] == 30000
|
|
57
|
+
assert data["mobile"] is True
|
|
58
|
+
assert data["parsers"] == ["pdf"]
|
|
59
|
+
assert isinstance(data["actions"], list) and data["actions"][0]["type"] == "screenshot"
|
|
60
|
+
assert isinstance(data["location"], dict) and data["location"]["country"] == "us"
|
|
61
|
+
assert data["skipTlsVerification"] is False
|
|
62
|
+
assert data["removeBase64Images"] is False
|
|
63
|
+
assert data["fastMode"] is True
|
|
64
|
+
assert data["useMock"] == "test"
|
|
65
|
+
assert data["blockAds"] is False
|
|
66
|
+
assert data["proxy"] == "basic"
|
|
67
|
+
assert data["maxAge"] == 1000
|
|
68
|
+
assert data["storeInCache"] is False
|
|
69
|
+
|
|
70
|
+
def test_batch_specific_fields(self):
|
|
71
|
+
webhook = WebhookConfig(url="https://hook.test", headers={"X": "Y"}, events=["completed"])
|
|
72
|
+
data = prepare_batch_scrape_request(
|
|
73
|
+
["https://example.com"],
|
|
74
|
+
webhook=webhook,
|
|
75
|
+
append_to_id="00000000-0000-0000-0000-000000000000",
|
|
76
|
+
ignore_invalid_urls=True,
|
|
77
|
+
max_concurrency=5,
|
|
78
|
+
zero_data_retention=True,
|
|
79
|
+
integration="_unit-test",
|
|
80
|
+
)
|
|
81
|
+
assert isinstance(data["webhook"], dict) and data["webhook"]["url"] == "https://hook.test"
|
|
82
|
+
assert data["appendToId"] == "00000000-0000-0000-0000-000000000000"
|
|
83
|
+
assert data["ignoreInvalidURLs"] is True
|
|
84
|
+
assert data["maxConcurrency"] == 5
|
|
85
|
+
assert data["zeroDataRetention"] is True
|
|
86
|
+
assert data["integration"] == "_unit-test"
|
|
87
|
+
|
|
88
|
+
def test_string_webhook_is_passed_verbatim(self):
|
|
89
|
+
data = prepare_batch_scrape_request(["https://example.com"], webhook="https://hook.simple")
|
|
90
|
+
assert data["webhook"] == "https://hook.simple"
|