firecrawl-py 3.3.1__py3-none-any.whl → 3.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl-py might be problematic. Click here for more details.

Files changed (84) hide show
  1. firecrawl/__init__.py +1 -1
  2. firecrawl/__tests__/e2e/v2/test_scrape.py +37 -1
  3. firecrawl/client.py +8 -4
  4. firecrawl/v2/types.py +19 -2
  5. {firecrawl_py-3.3.1.dist-info → firecrawl_py-3.3.3.dist-info}/METADATA +7 -3
  6. firecrawl_py-3.3.3.dist-info/RECORD +79 -0
  7. {firecrawl_py-3.3.1.dist-info → firecrawl_py-3.3.3.dist-info}/WHEEL +1 -1
  8. {firecrawl_py-3.3.1.dist-info → firecrawl_py-3.3.3.dist-info/licenses}/LICENSE +0 -0
  9. {firecrawl_py-3.3.1.dist-info → firecrawl_py-3.3.3.dist-info}/top_level.txt +0 -2
  10. build/lib/firecrawl/__init__.py +0 -87
  11. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +0 -79
  12. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +0 -188
  13. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +0 -38
  14. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +0 -40
  15. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +0 -137
  16. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +0 -248
  17. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +0 -35
  18. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +0 -43
  19. build/lib/firecrawl/__tests__/e2e/v2/conftest.py +0 -73
  20. build/lib/firecrawl/__tests__/e2e/v2/test_async.py +0 -73
  21. build/lib/firecrawl/__tests__/e2e/v2/test_batch_scrape.py +0 -105
  22. build/lib/firecrawl/__tests__/e2e/v2/test_crawl.py +0 -276
  23. build/lib/firecrawl/__tests__/e2e/v2/test_extract.py +0 -54
  24. build/lib/firecrawl/__tests__/e2e/v2/test_map.py +0 -60
  25. build/lib/firecrawl/__tests__/e2e/v2/test_scrape.py +0 -154
  26. build/lib/firecrawl/__tests__/e2e/v2/test_search.py +0 -269
  27. build/lib/firecrawl/__tests__/e2e/v2/test_usage.py +0 -26
  28. build/lib/firecrawl/__tests__/e2e/v2/test_watcher.py +0 -65
  29. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +0 -12
  30. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +0 -61
  31. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +0 -12
  32. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +0 -19
  33. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +0 -50
  34. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +0 -63
  35. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +0 -28
  36. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +0 -117
  37. build/lib/firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +0 -90
  38. build/lib/firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +0 -70
  39. build/lib/firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +0 -240
  40. build/lib/firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +0 -107
  41. build/lib/firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +0 -53
  42. build/lib/firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +0 -92
  43. build/lib/firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +0 -167
  44. build/lib/firecrawl/__tests__/unit/v2/methods/test_search_validation.py +0 -236
  45. build/lib/firecrawl/__tests__/unit/v2/methods/test_usage_types.py +0 -18
  46. build/lib/firecrawl/__tests__/unit/v2/methods/test_webhook.py +0 -123
  47. build/lib/firecrawl/__tests__/unit/v2/utils/test_validation.py +0 -290
  48. build/lib/firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +0 -332
  49. build/lib/firecrawl/client.py +0 -242
  50. build/lib/firecrawl/firecrawl.backup.py +0 -4635
  51. build/lib/firecrawl/types.py +0 -161
  52. build/lib/firecrawl/v1/__init__.py +0 -14
  53. build/lib/firecrawl/v1/client.py +0 -4653
  54. build/lib/firecrawl/v2/__init__.py +0 -4
  55. build/lib/firecrawl/v2/client.py +0 -805
  56. build/lib/firecrawl/v2/client_async.py +0 -250
  57. build/lib/firecrawl/v2/methods/aio/__init__.py +0 -1
  58. build/lib/firecrawl/v2/methods/aio/batch.py +0 -85
  59. build/lib/firecrawl/v2/methods/aio/crawl.py +0 -171
  60. build/lib/firecrawl/v2/methods/aio/extract.py +0 -126
  61. build/lib/firecrawl/v2/methods/aio/map.py +0 -59
  62. build/lib/firecrawl/v2/methods/aio/scrape.py +0 -33
  63. build/lib/firecrawl/v2/methods/aio/search.py +0 -172
  64. build/lib/firecrawl/v2/methods/aio/usage.py +0 -42
  65. build/lib/firecrawl/v2/methods/batch.py +0 -417
  66. build/lib/firecrawl/v2/methods/crawl.py +0 -469
  67. build/lib/firecrawl/v2/methods/extract.py +0 -131
  68. build/lib/firecrawl/v2/methods/map.py +0 -77
  69. build/lib/firecrawl/v2/methods/scrape.py +0 -64
  70. build/lib/firecrawl/v2/methods/search.py +0 -197
  71. build/lib/firecrawl/v2/methods/usage.py +0 -41
  72. build/lib/firecrawl/v2/types.py +0 -665
  73. build/lib/firecrawl/v2/utils/__init__.py +0 -9
  74. build/lib/firecrawl/v2/utils/error_handler.py +0 -107
  75. build/lib/firecrawl/v2/utils/get_version.py +0 -15
  76. build/lib/firecrawl/v2/utils/http_client.py +0 -153
  77. build/lib/firecrawl/v2/utils/http_client_async.py +0 -65
  78. build/lib/firecrawl/v2/utils/normalize.py +0 -107
  79. build/lib/firecrawl/v2/utils/validation.py +0 -324
  80. build/lib/firecrawl/v2/watcher.py +0 -301
  81. build/lib/firecrawl/v2/watcher_async.py +0 -242
  82. build/lib/tests/test_change_tracking.py +0 -98
  83. build/lib/tests/test_timeout_conversion.py +0 -117
  84. firecrawl_py-3.3.1.dist-info/RECORD +0 -153
@@ -1,269 +0,0 @@
1
- from firecrawl import Firecrawl
2
- import os
3
- from dotenv import load_dotenv
4
- from firecrawl.types import SearchData, Document, ScrapeOptions, SearchResultWeb, SearchResultNews, SearchResultImages
5
-
6
- load_dotenv()
7
-
8
- firecrawl = Firecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
9
-
10
- def _collect_texts(entries):
11
- texts = []
12
- for r in entries or []:
13
- title = getattr(r, 'title', None) if hasattr(r, 'title') else None
14
- desc = getattr(r, 'description', None) if hasattr(r, 'description') else None
15
- if title:
16
- texts.append(str(title).lower())
17
- if desc:
18
- texts.append(str(desc).lower())
19
- return texts
20
-
21
- def _is_document(entry) -> bool:
22
- try:
23
- from firecrawl.v2.types import Document
24
- return isinstance(entry, Document) or \
25
- hasattr(entry, 'markdown') or \
26
- hasattr(entry, 'html') or \
27
- hasattr(entry, 'raw_html') or \
28
- hasattr(entry, 'json') or \
29
- hasattr(entry, 'screenshot') or \
30
- hasattr(entry, 'change_tracking') or \
31
- hasattr(entry, 'summary')
32
- except Exception:
33
- return hasattr(entry, 'markdown') or \
34
- hasattr(entry, 'html') or \
35
- hasattr(entry, 'raw_html') or \
36
- hasattr(entry, 'json') or \
37
- hasattr(entry, 'screenshot') or \
38
- hasattr(entry, 'change_tracking') or \
39
- hasattr(entry, 'summary')
40
-
41
- def test_search_minimal_request():
42
- results = firecrawl.search(
43
- query="What is the capital of France?"
44
- )
45
-
46
- assert isinstance(results, SearchData)
47
- assert hasattr(results, 'web')
48
- assert results.web is not None
49
- assert len(results.web) > 0
50
- assert hasattr(results, 'news')
51
- assert results.news is None
52
- assert hasattr(results, 'images')
53
- assert results.images is None
54
-
55
- for result in results.web:
56
- assert isinstance(result, SearchResultWeb)
57
- assert hasattr(result, 'url')
58
- assert hasattr(result, 'title')
59
- assert hasattr(result, 'description')
60
- assert result.url.startswith('http')
61
- assert result.title is not None
62
- assert result.description is not None
63
-
64
- all_text = ' '.join(_collect_texts(results.web))
65
-
66
- assert 'paris' in all_text
67
-
68
- assert results.news is None
69
- assert results.images is None
70
-
71
-
72
- def test_search_with_sources():
73
- """Test search with specific sources."""
74
- results = firecrawl.search(
75
- query="firecrawl",
76
- sources=["web", "news", "images"],
77
- limit=3
78
- )
79
-
80
- assert isinstance(results, SearchData)
81
-
82
- assert results.web is not None
83
- assert len(results.web) <= 3
84
- assert isinstance(results.web[0], SearchResultWeb)
85
-
86
- if results.news is not None:
87
- assert len(results.news) <= 3
88
- assert isinstance(results.news[0], SearchResultNews)
89
-
90
- if results.images is not None:
91
- assert len(results.images) <= 3
92
- assert isinstance(results.images[0], SearchResultImages)
93
-
94
- web_titles = [result.title.lower() for result in results.web]
95
- web_descriptions = [result.description.lower() for result in results.web]
96
- all_web_text = ' '.join(web_titles + web_descriptions)
97
-
98
- assert 'firecrawl' in all_web_text
99
-
100
- def test_search_result_structure():
101
- """Test that SearchResult objects have the correct structure."""
102
- results = firecrawl.search(
103
- query="test query",
104
- limit=1
105
- )
106
-
107
- if results.web and len(results.web) > 0:
108
- result = results.web[0]
109
-
110
- assert hasattr(result, 'url')
111
- assert hasattr(result, 'title')
112
- assert hasattr(result, 'description')
113
-
114
- assert isinstance(result.url, str)
115
- assert isinstance(result.title, str) or result.title is None
116
- assert isinstance(result.description, str) or result.description is None
117
-
118
- # Test URL format
119
- assert result.url.startswith('http')
120
-
121
- def test_search_all_parameters():
122
- """Test search with all available parameters (comprehensive e2e test)."""
123
- from firecrawl.types import ScrapeOptions, JsonFormat, Location, WaitAction
124
-
125
- # Define a schema for JSON extraction
126
- schema = {
127
- "type": "object",
128
- "properties": {
129
- "title": {"type": "string"},
130
- "description": {"type": "string"},
131
- "url": {"type": "string"}
132
- },
133
- "required": ["title", "description"]
134
- }
135
-
136
- results = firecrawl.search(
137
- query="artificial intelligence",
138
- sources=[
139
- {"type": "web"},
140
- {"type": "news"}
141
- ],
142
- limit=3,
143
- tbs="qdr:m", # Last month
144
- location="US",
145
- ignore_invalid_urls=True,
146
- timeout=60000,
147
- scrape_options=ScrapeOptions(
148
- formats=[
149
- "markdown",
150
- "html",
151
- {
152
- "type": "json",
153
- "prompt": "Extract the title and description from the page",
154
- "schema": schema
155
- },
156
- {"type": "summary"}
157
- ],
158
- headers={"User-Agent": "Firecrawl-Test/1.0"},
159
- include_tags=["h1", "h2", "p"],
160
- exclude_tags=["nav", "footer"],
161
- only_main_content=True,
162
- wait_for=2000,
163
- mobile=False,
164
- skip_tls_verification=False,
165
- remove_base64_images=True,
166
- block_ads=True,
167
- proxy="basic",
168
- max_age=3600000, # 1 hour cache
169
- store_in_cache=True,
170
- location=Location(
171
- country="US",
172
- languages=["en"]
173
- ),
174
- actions=[
175
- WaitAction(milliseconds=1000)
176
- ]
177
- # Note: raw_html and screenshot_full_page are not supported by v2 API yet
178
- )
179
- )
180
-
181
- # Test structure
182
- assert isinstance(results, SearchData)
183
- assert hasattr(results, 'web')
184
- assert hasattr(results, 'news')
185
- assert hasattr(results, 'images')
186
-
187
- # Test that web results exist
188
- assert results.web is not None
189
- assert len(results.web) <= 3 # Should respect limit
190
-
191
- # Test that results contain expected content for non-document entries only
192
- non_doc_entries = [r for r in (results.web or []) if not _is_document(r)]
193
- if non_doc_entries:
194
- all_web_text = ' '.join(_collect_texts(non_doc_entries))
195
- ai_terms = ['artificial', 'intelligence', 'ai', 'machine', 'learning']
196
- assert any(term in all_web_text for term in ai_terms)
197
-
198
- # Test that each result has proper structure
199
- for result in results.web:
200
- assert isinstance(result, (SearchResultWeb, Document))
201
- if isinstance(result, Document):
202
- # Document path: ensure content present
203
- assert (result.markdown is not None) or (result.html is not None)
204
- else:
205
- # LinkResult path
206
- assert hasattr(result, 'url')
207
- assert isinstance(result.url, str) and result.url.startswith('http')
208
-
209
- # Test that news results exist (if API supports it)
210
- if results.news is not None:
211
- assert len(results.news) <= 3
212
- for result in results.news:
213
- assert isinstance(result, (SearchResultNews, Document))
214
- if isinstance(result, Document):
215
- assert (result.markdown is not None) or (result.html is not None)
216
- else:
217
- assert hasattr(result, 'url')
218
- assert isinstance(result.url, str) and result.url.startswith('http')
219
-
220
- # Test that unspecified sources are None
221
- assert results.images is None
222
-
223
-
224
- def test_search_formats_flexibility():
225
- """Test that both list and ScrapeFormats work for formats."""
226
- from firecrawl.types import ScrapeFormats
227
-
228
- # Test with list format
229
- results1 = firecrawl.search(
230
- query="python programming",
231
- limit=1,
232
- scrape_options=ScrapeOptions(
233
- formats=["markdown"]
234
- )
235
- )
236
-
237
- # Test with ScrapeFormats object
238
- results2 = firecrawl.search(
239
- query="python programming",
240
- limit=1,
241
- scrape_options=ScrapeOptions(
242
- formats=ScrapeFormats(markdown=True)
243
- )
244
- )
245
-
246
- # Both should work without errors
247
- assert isinstance(results1, SearchData)
248
- assert isinstance(results2, SearchData)
249
- assert results1.web is not None
250
- assert results2.web is not None
251
-
252
- def test_search_with_json_format_object():
253
- """Search with scrape_options including a JSON format object (prompt + schema)."""
254
- json_schema = {
255
- "type": "object",
256
- "properties": {
257
- "title": {"type": "string"}
258
- },
259
- "required": ["title"],
260
- }
261
- results = firecrawl.search(
262
- query="site:docs.firecrawl.dev",
263
- limit=1,
264
- scrape_options=ScrapeOptions(
265
- formats=[{"type": "json", "prompt": "Extract page title", "schema": json_schema}]
266
- ),
267
- )
268
- assert isinstance(results, SearchData)
269
- assert results.web is not None and len(results.web) >= 0
@@ -1,26 +0,0 @@
1
- import os
2
- from dotenv import load_dotenv
3
- from firecrawl import Firecrawl
4
-
5
- load_dotenv()
6
-
7
-
8
- class TestUsageE2E:
9
- def setup_method(self):
10
- # Environment is exported by conftest at import time
11
- self.client = Firecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
12
-
13
- def test_get_concurrency(self):
14
- resp = self.client.get_concurrency()
15
- # Shape assertions (endpoint not live yet, but types are defined)
16
- assert hasattr(resp, "concurrency")
17
- assert hasattr(resp, "max_concurrency")
18
-
19
- def test_get_credit_usage(self):
20
- resp = self.client.get_credit_usage()
21
- assert hasattr(resp, "remaining_credits")
22
-
23
- def test_get_token_usage(self):
24
- resp = self.client.get_token_usage()
25
- assert hasattr(resp, "remaining_tokens")
26
-
@@ -1,65 +0,0 @@
1
- import os
2
- import time
3
- from dotenv import load_dotenv
4
- from firecrawl import Firecrawl
5
-
6
- load_dotenv()
7
-
8
- if not os.getenv("API_KEY"):
9
- raise ValueError("API_KEY is not set")
10
-
11
- if not os.getenv("API_URL"):
12
- raise ValueError("API_URL is not set")
13
-
14
-
15
- class TestWatcherE2E:
16
- def setup_method(self):
17
- from firecrawl import Firecrawl
18
- self.client = Firecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
19
-
20
- def test_crawl_watcher(self):
21
- # Start a small crawl job
22
- start_job = self.client.start_crawl("https://docs.firecrawl.dev", limit=2)
23
- job_id = start_job.id
24
-
25
- statuses = []
26
- w = self.client.watcher(job_id, kind="crawl", poll_interval=1, timeout=120)
27
- w.add_listener(lambda s: statuses.append(s.status))
28
- w.start()
29
-
30
- # Wait for terminal state up to 180 seconds
31
- deadline = time.time() + 180
32
- while time.time() < deadline:
33
- if statuses and statuses[-1] in ["completed", "failed"]:
34
- break
35
- time.sleep(1)
36
-
37
- w.stop()
38
-
39
- assert len(statuses) > 0
40
- assert statuses[-1] in ["completed", "failed"]
41
-
42
- def test_batch_watcher(self):
43
- urls = [
44
- "https://docs.firecrawl.dev",
45
- "https://firecrawl.dev",
46
- ]
47
- start_resp = self.client.start_batch_scrape(urls, formats=["markdown"], max_concurrency=1)
48
- job_id = start_resp.id
49
-
50
- statuses = []
51
- w = self.client.watcher(job_id, kind="batch", poll_interval=1, timeout=180)
52
- w.add_listener(lambda s: statuses.append(s.status))
53
- w.start()
54
-
55
- deadline = time.time() + 240
56
- while time.time() < deadline:
57
- if statuses and statuses[-1] in ["completed", "failed", "cancelled"]:
58
- break
59
- time.sleep(1)
60
-
61
- w.stop()
62
-
63
- assert len(statuses) > 0
64
- assert statuses[-1] in ["completed", "failed", "cancelled"]
65
-
@@ -1,12 +0,0 @@
1
- import pytest
2
- from firecrawl.v2.types import CrawlParamsRequest
3
- from firecrawl.v2.methods.aio import crawl as aio_crawl
4
-
5
-
6
- @pytest.mark.asyncio
7
- async def test_crawl_params_request_validation():
8
- with pytest.raises(ValueError):
9
- await aio_crawl.crawl_params_preview(None, CrawlParamsRequest(url="", prompt="x"))
10
- with pytest.raises(ValueError):
11
- await aio_crawl.crawl_params_preview(None, CrawlParamsRequest(url="https://x", prompt=""))
12
-
@@ -1,61 +0,0 @@
1
- from firecrawl.v2.types import CrawlRequest, ScrapeOptions, WebhookConfig
2
- from firecrawl.v2.methods.aio.crawl import _prepare_crawl_request
3
-
4
-
5
- class TestAsyncCrawlRequestPreparation:
6
- def test_basic_request(self):
7
- req = CrawlRequest(url="https://example.com")
8
- payload = _prepare_crawl_request(req)
9
- assert payload["url"] == "https://example.com"
10
-
11
- def test_field_mappings(self):
12
- req = CrawlRequest(
13
- url="https://example.com",
14
- include_paths=["/docs/*"],
15
- exclude_paths=["/admin/*"],
16
- max_discovery_depth=2,
17
- sitemap="skip",
18
- ignore_query_parameters=True,
19
- crawl_entire_domain=True,
20
- allow_external_links=False,
21
- allow_subdomains=True,
22
- max_concurrency=5,
23
- zero_data_retention=True,
24
- )
25
- payload = _prepare_crawl_request(req)
26
- assert payload["includePaths"] == ["/docs/*"]
27
- assert payload["excludePaths"] == ["/admin/*"]
28
- assert payload["maxDiscoveryDepth"] == 2
29
- assert payload["sitemap"] == "skip"
30
- assert payload["ignoreQueryParameters"] is True
31
- assert payload["crawlEntireDomain"] is True
32
- assert payload["allowExternalLinks"] is False
33
- assert payload["allowSubdomains"] is True
34
- assert payload["maxConcurrency"] == 5
35
- assert payload["zeroDataRetention"] is True
36
-
37
- def test_webhook_preparation(self):
38
- # string webhook
39
- req = CrawlRequest(url="https://example.com", webhook="https://example.com/hook")
40
- payload = _prepare_crawl_request(req)
41
- assert payload["webhook"] == "https://example.com/hook"
42
-
43
- # object webhook
44
- req2 = CrawlRequest(url="https://example.com", webhook=WebhookConfig(url="https://x/h", headers={"X": "1"}, events=["completed"]))
45
- payload2 = _prepare_crawl_request(req2)
46
- assert isinstance(payload2["webhook"], dict)
47
- assert payload2["webhook"]["url"] == "https://x/h"
48
- assert payload2["webhook"]["headers"] == {"X": "1"}
49
-
50
- def test_webhook_none_values_excluded(self):
51
- req = CrawlRequest(
52
- url="https://example.com",
53
- webhook=WebhookConfig(url="https://example.com/webhook", headers=None, metadata=None, events=None),
54
- )
55
- payload = _prepare_crawl_request(req)
56
- webhook = payload["webhook"]
57
- assert webhook["url"] == "https://example.com/webhook"
58
- assert "headers" not in webhook
59
- assert "metadata" not in webhook
60
- assert "events" not in webhook
61
-
@@ -1,12 +0,0 @@
1
- from firecrawl.v2.types import CrawlRequest, ScrapeOptions
2
- from firecrawl.v2.methods.aio.crawl import _prepare_crawl_request
3
- import pytest
4
-
5
-
6
- class TestAsyncCrawlValidation:
7
- def test_invalid_url(self):
8
- with pytest.raises(ValueError):
9
- _prepare_crawl_request(CrawlRequest(url=""))
10
- with pytest.raises(ValueError):
11
- _prepare_crawl_request(CrawlRequest(url=" "))
12
-
@@ -1,19 +0,0 @@
1
- import pytest
2
- from firecrawl.v2.types import MapOptions
3
- from firecrawl.v2.methods.aio.map import _prepare_map_request
4
-
5
-
6
- class TestAsyncMapRequestPreparation:
7
- def test_basic(self):
8
- payload = _prepare_map_request("https://example.com")
9
- assert payload["url"] == "https://example.com"
10
-
11
- def test_fields(self):
12
- opts = MapOptions(search="docs", include_subdomains=True, limit=10, sitemap="only", timeout=15000)
13
- payload = _prepare_map_request("https://example.com", opts)
14
- assert payload["search"] == "docs"
15
- assert payload["includeSubdomains"] is True
16
- assert payload["limit"] == 10
17
- assert payload["sitemap"] == "only"
18
- assert payload["timeout"] == 15000
19
-
@@ -1,50 +0,0 @@
1
- import pytest
2
- from firecrawl.v2.types import ScrapeOptions, Location
3
- from firecrawl.v2.methods.aio.scrape import _prepare_scrape_request
4
-
5
-
6
- class TestAsyncScrapeRequestPreparation:
7
- @pytest.mark.asyncio
8
- async def test_basic_request_preparation(self):
9
- payload = await _prepare_scrape_request("https://example.com", None)
10
- assert payload["url"] == "https://example.com"
11
-
12
- @pytest.mark.asyncio
13
- async def test_options_conversion(self):
14
- opts = ScrapeOptions(
15
- formats=["markdown", {"type": "screenshot", "full_page": True, "quality": 80}],
16
- include_tags=["main"],
17
- exclude_tags=["nav"],
18
- only_main_content=True,
19
- wait_for=500,
20
- timeout=30000,
21
- mobile=True,
22
- parsers=["pdf"],
23
- location=Location(country="us", languages=["en"]),
24
- skip_tls_verification=False,
25
- remove_base64_images=False,
26
- fast_mode=True,
27
- use_mock="test",
28
- block_ads=False,
29
- proxy="basic",
30
- max_age=1000,
31
- store_in_cache=False,
32
- )
33
- payload = await _prepare_scrape_request("https://example.com", opts)
34
- assert payload["url"] == "https://example.com"
35
- assert isinstance(payload.get("formats"), list) and "markdown" in payload["formats"]
36
- assert payload["includeTags"] == ["main"]
37
- assert payload["excludeTags"] == ["nav"]
38
- assert payload["onlyMainContent"] is True
39
- assert payload["waitFor"] == 500
40
- assert payload["timeout"] == 30000
41
- assert payload["mobile"] is True
42
- assert payload["skipTlsVerification"] is False
43
- assert payload["removeBase64Images"] is False
44
- assert payload["fastMode"] is True
45
- assert payload["useMock"] == "test"
46
- assert payload["blockAds"] is False
47
- assert payload["proxy"] == "basic"
48
- assert payload["maxAge"] == 1000
49
- assert payload["storeInCache"] is False
50
-
@@ -1,63 +0,0 @@
1
- import pytest
2
- from firecrawl.v2.types import SearchRequest, ScrapeOptions
3
- from firecrawl.v2.methods.aio.search import _prepare_search_request
4
-
5
-
6
- class TestAsyncSearchRequestPreparation:
7
- def test_basic_request_preparation(self):
8
- request = SearchRequest(query="test query")
9
- data = _prepare_search_request(request)
10
- assert data["query"] == "test query"
11
- assert "ignore_invalid_urls" not in data
12
- assert "scrape_options" not in data
13
-
14
- def test_all_fields_conversion(self):
15
- scrape_opts = ScrapeOptions(
16
- formats=["markdown"],
17
- headers={"User-Agent": "Test"},
18
- include_tags=["h1", "h2"],
19
- exclude_tags=["nav"],
20
- only_main_content=False,
21
- timeout=15000,
22
- wait_for=2000,
23
- mobile=True,
24
- skip_tls_verification=True,
25
- remove_base64_images=False,
26
- )
27
- request = SearchRequest(
28
- query="test query",
29
- sources=["web", "news"],
30
- limit=10,
31
- tbs="qdr:w",
32
- location="US",
33
- ignore_invalid_urls=False,
34
- timeout=30000,
35
- scrape_options=scrape_opts,
36
- )
37
- data = _prepare_search_request(request)
38
- assert data["ignoreInvalidURLs"] is False
39
- assert "scrapeOptions" in data
40
-
41
- def test_exclude_none_behavior(self):
42
- request = SearchRequest(
43
- query="test",
44
- sources=None,
45
- limit=None,
46
- tbs=None,
47
- location=None,
48
- ignore_invalid_urls=None,
49
- timeout=None,
50
- scrape_options=None,
51
- )
52
- data = _prepare_search_request(request)
53
- assert "query" in data
54
- assert len(data) == 1
55
-
56
- def test_empty_scrape_options(self):
57
- request = SearchRequest(query="test", scrape_options=ScrapeOptions())
58
- data = _prepare_search_request(request)
59
- assert "scrapeOptions" in data
60
- scrape_data = data["scrapeOptions"]
61
- assert "onlyMainContent" in scrape_data
62
- assert "mobile" in scrape_data
63
-
@@ -1,28 +0,0 @@
1
- from firecrawl.v2.types import ScrapeOptions, Location
2
- from firecrawl.v2.methods.aio.batch import _prepare as _prepare_batch
3
-
4
-
5
- class TestAsyncBatchRequestPreparation:
6
- def test_urls_validation_and_conversion(self):
7
- payload = _prepare_batch(["https://example.com", "http://foo.bar"], options=None)
8
- assert payload["urls"] == ["https://example.com", "http://foo.bar"]
9
-
10
- def test_options_and_batch_fields(self):
11
- opts = ScrapeOptions(formats=["markdown"], only_main_content=True)
12
- payload = _prepare_batch(
13
- ["https://example.com"],
14
- options=opts,
15
- webhook="https://hook.example",
16
- append_to_id="00000000-0000-0000-0000-000000000000",
17
- ignore_invalid_urls=True,
18
- max_concurrency=3,
19
- zero_data_retention=True,
20
- integration="zapier",
21
- )
22
- assert payload["webhook"] == "https://hook.example"
23
- assert payload["appendToId"] == "00000000-0000-0000-0000-000000000000"
24
- assert payload["ignoreInvalidURLs"] is True
25
- assert payload["maxConcurrency"] == 3
26
- assert payload["zeroDataRetention"] is True
27
- assert payload["integration"] == "zapier"
28
-