firecrawl 2.16.5__py3-none-any.whl → 3.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

Files changed (82) hide show
  1. firecrawl/__init__.py +27 -19
  2. firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +79 -0
  3. firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +189 -0
  4. firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +38 -0
  5. firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +40 -0
  6. firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +137 -0
  7. firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +183 -0
  8. firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +35 -0
  9. firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +43 -0
  10. firecrawl/__tests__/e2e/v2/conftest.py +73 -0
  11. firecrawl/__tests__/e2e/v2/test_async.py +73 -0
  12. firecrawl/__tests__/e2e/v2/test_batch_scrape.py +105 -0
  13. firecrawl/__tests__/e2e/v2/test_crawl.py +276 -0
  14. firecrawl/__tests__/e2e/v2/test_extract.py +54 -0
  15. firecrawl/__tests__/e2e/v2/test_map.py +60 -0
  16. firecrawl/__tests__/e2e/v2/test_scrape.py +154 -0
  17. firecrawl/__tests__/e2e/v2/test_search.py +265 -0
  18. firecrawl/__tests__/e2e/v2/test_usage.py +26 -0
  19. firecrawl/__tests__/e2e/v2/test_watcher.py +65 -0
  20. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +12 -0
  21. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +61 -0
  22. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +12 -0
  23. firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +19 -0
  24. firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +50 -0
  25. firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +63 -0
  26. firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +28 -0
  27. firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +117 -0
  28. firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +90 -0
  29. firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +70 -0
  30. firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +240 -0
  31. firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +107 -0
  32. firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +53 -0
  33. firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +92 -0
  34. firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +167 -0
  35. firecrawl/__tests__/unit/v2/methods/test_search_validation.py +206 -0
  36. firecrawl/__tests__/unit/v2/methods/test_usage_types.py +18 -0
  37. firecrawl/__tests__/unit/v2/methods/test_webhook.py +123 -0
  38. firecrawl/__tests__/unit/v2/utils/test_validation.py +290 -0
  39. firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +332 -0
  40. firecrawl/client.py +241 -0
  41. firecrawl/{firecrawl.py → firecrawl.backup.py} +17 -15
  42. firecrawl/types.py +157 -0
  43. firecrawl/v1/__init__.py +14 -0
  44. firecrawl/v1/client.py +4653 -0
  45. firecrawl/v2/__init__.py +4 -0
  46. firecrawl/v2/client.py +802 -0
  47. firecrawl/v2/client_async.py +250 -0
  48. firecrawl/v2/methods/aio/__init__.py +1 -0
  49. firecrawl/v2/methods/aio/batch.py +85 -0
  50. firecrawl/v2/methods/aio/crawl.py +174 -0
  51. firecrawl/v2/methods/aio/extract.py +126 -0
  52. firecrawl/v2/methods/aio/map.py +59 -0
  53. firecrawl/v2/methods/aio/scrape.py +36 -0
  54. firecrawl/v2/methods/aio/search.py +58 -0
  55. firecrawl/v2/methods/aio/usage.py +42 -0
  56. firecrawl/v2/methods/batch.py +420 -0
  57. firecrawl/v2/methods/crawl.py +468 -0
  58. firecrawl/v2/methods/extract.py +131 -0
  59. firecrawl/v2/methods/map.py +77 -0
  60. firecrawl/v2/methods/scrape.py +68 -0
  61. firecrawl/v2/methods/search.py +173 -0
  62. firecrawl/v2/methods/usage.py +41 -0
  63. firecrawl/v2/types.py +546 -0
  64. firecrawl/v2/utils/__init__.py +9 -0
  65. firecrawl/v2/utils/error_handler.py +107 -0
  66. firecrawl/v2/utils/get_version.py +15 -0
  67. firecrawl/v2/utils/http_client.py +153 -0
  68. firecrawl/v2/utils/http_client_async.py +64 -0
  69. firecrawl/v2/utils/validation.py +324 -0
  70. firecrawl/v2/watcher.py +312 -0
  71. firecrawl/v2/watcher_async.py +245 -0
  72. {firecrawl-2.16.5.dist-info → firecrawl-3.0.3.dist-info}/LICENSE +0 -0
  73. {firecrawl-2.16.5.dist-info → firecrawl-3.0.3.dist-info}/METADATA +49 -32
  74. firecrawl-3.0.3.dist-info/RECORD +78 -0
  75. tests/test_timeout_conversion.py +117 -0
  76. firecrawl/__tests__/e2e_withAuth/__init__.py +0 -0
  77. firecrawl/__tests__/e2e_withAuth/test.py +0 -170
  78. firecrawl/__tests__/v1/e2e_withAuth/__init__.py +0 -0
  79. firecrawl/__tests__/v1/e2e_withAuth/test.py +0 -465
  80. firecrawl-2.16.5.dist-info/RECORD +0 -12
  81. {firecrawl-2.16.5.dist-info → firecrawl-3.0.3.dist-info}/WHEEL +0 -0
  82. {firecrawl-2.16.5.dist-info → firecrawl-3.0.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,117 @@
1
+ import asyncio
2
+ import time
3
+ import httpx
4
+ import pytest
5
+
6
+ from firecrawl.v2.client_async import AsyncFirecrawlClient
7
+ from firecrawl.v2.utils.http_client_async import AsyncHttpClient
8
+ from firecrawl.v2.utils.http_client import HttpClient
9
+ from firecrawl.v2.methods.aio import batch as aio_batch
10
+
11
+
12
+ @pytest.mark.asyncio
13
+ async def test_scrape_concurrency(monkeypatch):
14
+ async def fake_post(self, endpoint, data, headers=None, timeout=None):
15
+ await asyncio.sleep(0.1)
16
+ return httpx.Response(200, json={"success": True, "data": {}})
17
+
18
+ monkeypatch.setattr(AsyncHttpClient, "post", fake_post)
19
+
20
+ client = AsyncFirecrawlClient(api_key="test", api_url="http://localhost")
21
+
22
+ start = time.perf_counter()
23
+ await asyncio.gather(
24
+ client.scrape("https://firecrawl.dev"),
25
+ client.scrape("https://firecrawl.dev"),
26
+ client.scrape("https://firecrawl.dev")
27
+ )
28
+ elapsed = time.perf_counter() - start
29
+
30
+ # If calls run concurrently, total should be close to single 0.1s delay, not 0.3s
31
+ assert elapsed < 0.25
32
+
33
+
34
+ @pytest.mark.asyncio
35
+ async def test_event_loop_not_blocked(monkeypatch):
36
+ ticks = 0
37
+
38
+ async def ticker():
39
+ nonlocal ticks
40
+ for _ in range(5):
41
+ await asyncio.sleep(0.05)
42
+ ticks += 1
43
+
44
+ async def fake_post(self, endpoint, data, headers=None, timeout=None):
45
+ await asyncio.sleep(0.2)
46
+ return httpx.Response(200, json={"success": True, "data": {}})
47
+
48
+ monkeypatch.setattr(AsyncHttpClient, "post", fake_post)
49
+
50
+ client = AsyncFirecrawlClient(api_key="test", api_url="http://localhost")
51
+
52
+ await asyncio.gather(ticker(), client.scrape("https://a"))
53
+ # If scrape awaited properly, ticker should have progressed several steps
54
+ assert ticks >= 3
55
+
56
+
57
+ @pytest.mark.asyncio
58
+ async def test_wait_batch_scrape_polling_interval(monkeypatch):
59
+ # Simulate one scraping status then completed
60
+ class S: # simple status holder
61
+ def __init__(self, status):
62
+ self.status = status
63
+
64
+ states = ["scraping", "completed"]
65
+
66
+ async def fake_status(client, job_id):
67
+ state = states.pop(0)
68
+ return S(state)
69
+
70
+ monkeypatch.setattr(aio_batch, "get_batch_scrape_status", fake_status)
71
+
72
+ client = AsyncFirecrawlClient(api_key="test", api_url="http://localhost")
73
+
74
+ start = time.perf_counter()
75
+ await client.wait_batch_scrape("job-1", poll_interval=0.1, timeout=2)
76
+ elapsed = time.perf_counter() - start
77
+
78
+ # Should take roughly one poll interval to reach completed
79
+ assert 0.09 <= elapsed <= 0.5
80
+
81
+
82
+ @pytest.mark.asyncio
83
+ async def test_async_transport_used_no_threads(monkeypatch):
84
+ # Make any to_thread usage blow up
85
+ monkeypatch.setattr(asyncio, "to_thread", lambda *a, **k: (_ for _ in ()).throw(RuntimeError("to_thread not allowed")))
86
+ # Make sync HttpClient unusable
87
+ def _boom(*a, **k):
88
+ raise RuntimeError("sync client should not be used")
89
+ monkeypatch.setattr(HttpClient, "post", _boom)
90
+ monkeypatch.setattr(HttpClient, "get", _boom)
91
+ monkeypatch.setattr(HttpClient, "delete", _boom)
92
+
93
+ # Track true async concurrency
94
+ active = 0
95
+ max_active = 0
96
+ async def fake_post(self, endpoint, data, headers=None, timeout=None):
97
+ nonlocal active, max_active
98
+ active += 1
99
+ max_active = max(max_active, active)
100
+ try:
101
+ await asyncio.sleep(0.1)
102
+ return httpx.Response(200, json={"success": True, "data": {}})
103
+ finally:
104
+ active -= 1
105
+
106
+ monkeypatch.setattr(AsyncHttpClient, "post", fake_post)
107
+
108
+ client = AsyncFirecrawlClient(api_key="test", api_url="http://localhost")
109
+
110
+ await asyncio.gather(
111
+ client.scrape("https://firecrawl.dev"),
112
+ client.scrape("https://firecrawl.dev"),
113
+ client.search("q"), # uses async search
114
+ )
115
+
116
+ assert max_active >= 2
117
+
@@ -0,0 +1,90 @@
1
+ import pytest
2
+ from firecrawl.v2.types import ScrapeOptions, Location, WebhookConfig
3
+ from firecrawl.v2.methods.batch import prepare_batch_scrape_request
4
+
5
+
6
+ class TestBatchScrapeRequestPreparation:
7
+ """Unit tests for batch scrape request preparation."""
8
+
9
+ def test_urls_validation(self):
10
+ # empty list
11
+ with pytest.raises(ValueError):
12
+ prepare_batch_scrape_request([])
13
+ # invalid protocol
14
+ with pytest.raises(ValueError):
15
+ prepare_batch_scrape_request(["example.com"]) # missing http(s)
16
+ # valid
17
+ data = prepare_batch_scrape_request(["https://example.com", "http://foo.bar"])
18
+ assert data["urls"] == ["https://example.com", "http://foo.bar"]
19
+
20
+ def test_flatten_scrape_options(self):
21
+ opts = ScrapeOptions(
22
+ formats=["markdown", "change_tracking", {"type": "screenshot", "full_page": True, "quality": 80}],
23
+ include_tags=["main"],
24
+ exclude_tags=["nav"],
25
+ only_main_content=True,
26
+ wait_for=500,
27
+ timeout=30000,
28
+ mobile=True,
29
+ parsers=["pdf"],
30
+ actions=[{"type": "screenshot", "full_page": True}],
31
+ location=Location(country="us", languages=["en"]),
32
+ skip_tls_verification=False,
33
+ remove_base64_images=False,
34
+ fast_mode=True,
35
+ use_mock="test",
36
+ block_ads=False,
37
+ proxy="basic",
38
+ max_age=1000,
39
+ store_in_cache=False,
40
+ )
41
+ data = prepare_batch_scrape_request(["https://example.com"], options=opts)
42
+
43
+ # Formats should be at top-level as list, with screenshot normalized to object w/ fullPage
44
+ assert isinstance(data.get("formats"), list)
45
+ assert "markdown" in data["formats"]
46
+ # snake_case format should be converted to camelCase
47
+ assert "changeTracking" in data["formats"]
48
+ found_obj = next((f for f in data["formats"] if isinstance(f, dict) and f.get("type") == "screenshot"), None)
49
+ assert found_obj is not None and found_obj.get("fullPage") is True and found_obj.get("quality") == 80
50
+
51
+ # Field conversions to camelCase
52
+ assert data["includeTags"] == ["main"]
53
+ assert data["excludeTags"] == ["nav"]
54
+ assert data["onlyMainContent"] is True
55
+ assert data["waitFor"] == 500
56
+ assert data["timeout"] == 30000
57
+ assert data["mobile"] is True
58
+ assert data["parsers"] == ["pdf"]
59
+ assert isinstance(data["actions"], list) and data["actions"][0]["type"] == "screenshot"
60
+ assert isinstance(data["location"], dict) and data["location"]["country"] == "us"
61
+ assert data["skipTlsVerification"] is False
62
+ assert data["removeBase64Images"] is False
63
+ assert data["fastMode"] is True
64
+ assert data["useMock"] == "test"
65
+ assert data["blockAds"] is False
66
+ assert data["proxy"] == "basic"
67
+ assert data["maxAge"] == 1000
68
+ assert data["storeInCache"] is False
69
+
70
+ def test_batch_specific_fields(self):
71
+ webhook = WebhookConfig(url="https://hook.test", headers={"X": "Y"}, events=["completed"])
72
+ data = prepare_batch_scrape_request(
73
+ ["https://example.com"],
74
+ webhook=webhook,
75
+ append_to_id="00000000-0000-0000-0000-000000000000",
76
+ ignore_invalid_urls=True,
77
+ max_concurrency=5,
78
+ zero_data_retention=True,
79
+ integration="test",
80
+ )
81
+ assert isinstance(data["webhook"], dict) and data["webhook"]["url"] == "https://hook.test"
82
+ assert data["appendToId"] == "00000000-0000-0000-0000-000000000000"
83
+ assert data["ignoreInvalidURLs"] is True
84
+ assert data["maxConcurrency"] == 5
85
+ assert data["zeroDataRetention"] is True
86
+ assert data["integration"] == "test"
87
+
88
+ def test_string_webhook_is_passed_verbatim(self):
89
+ data = prepare_batch_scrape_request(["https://example.com"], webhook="https://hook.simple")
90
+ assert data["webhook"] == "https://hook.simple"
@@ -0,0 +1,70 @@
1
+ """
2
+ Unit tests for crawl params functionality in Firecrawl v2 SDK.
3
+ """
4
+
5
+ import pytest
6
+ from firecrawl.v2.types import CrawlParamsRequest, CrawlParamsData
7
+
8
+
9
+ class TestCrawlParamsRequest:
10
+ """Unit tests for CrawlParamsRequest."""
11
+
12
+ def test_crawl_params_request_creation(self):
13
+ """Test creating CrawlParamsRequest with valid data."""
14
+ request = CrawlParamsRequest(
15
+ url="https://example.com",
16
+ prompt="Extract all blog posts"
17
+ )
18
+
19
+ assert request.url == "https://example.com"
20
+ assert request.prompt == "Extract all blog posts"
21
+
22
+ def test_crawl_params_request_serialization(self):
23
+ """Test that CrawlParamsRequest serializes correctly."""
24
+ request = CrawlParamsRequest(
25
+ url="https://example.com",
26
+ prompt="Extract all blog posts and documentation"
27
+ )
28
+
29
+ data = request.model_dump()
30
+
31
+ assert data["url"] == "https://example.com"
32
+ assert data["prompt"] == "Extract all blog posts and documentation"
33
+
34
+
35
+ class TestCrawlParamsData:
36
+ """Unit tests for CrawlParamsData."""
37
+
38
+ def test_crawl_params_data_creation(self):
39
+ """Test creating CrawlParamsData with minimal data."""
40
+ data = CrawlParamsData()
41
+
42
+ assert data.include_paths is None
43
+ assert data.exclude_paths is None
44
+ assert data.max_discovery_depth is None
45
+ assert data.ignore_sitemap is False
46
+ assert data.limit is None
47
+ assert data.crawl_entire_domain is False
48
+ assert data.allow_external_links is False
49
+ assert data.scrape_options is None
50
+ assert data.warning is None
51
+
52
+ def test_crawl_params_data_with_values(self):
53
+ """Test creating CrawlParamsData with values."""
54
+ data = CrawlParamsData(
55
+ include_paths=["/blog/*"],
56
+ exclude_paths=["/admin/*"],
57
+ max_discovery_depth=3,
58
+ limit=50,
59
+ crawl_entire_domain=True,
60
+ allow_external_links=False,
61
+ warning="Test warning"
62
+ )
63
+
64
+ assert data.include_paths == ["/blog/*"]
65
+ assert data.exclude_paths == ["/admin/*"]
66
+ assert data.max_discovery_depth == 3
67
+ assert data.limit == 50
68
+ assert data.crawl_entire_domain is True
69
+ assert data.allow_external_links is False
70
+ assert data.warning == "Test warning"
@@ -0,0 +1,240 @@
1
+ import pytest
2
+ from firecrawl.v2.types import CrawlRequest, ScrapeOptions
3
+ from firecrawl.v2.methods.crawl import _prepare_crawl_request
4
+
5
+
6
+ class TestCrawlRequestPreparation:
7
+ """Unit tests for crawl request preparation."""
8
+
9
+ def test_basic_request_preparation(self):
10
+ """Test basic request preparation with minimal fields."""
11
+ request = CrawlRequest(url="https://example.com")
12
+ data = _prepare_crawl_request(request)
13
+
14
+ # Check basic fields
15
+ assert data["url"] == "https://example.com"
16
+
17
+ # Check that no options are present
18
+ assert "limit" not in data
19
+ assert "prompt" not in data
20
+
21
+ def test_crawl_options_conversion(self):
22
+ """Test that CrawlOptions fields are converted to camelCase."""
23
+ request = CrawlRequest(
24
+ url="https://example.com",
25
+ limit=10,
26
+ max_discovery_depth=3,
27
+ ignore_sitemap=True,
28
+ crawl_entire_domain=False,
29
+ allow_external_links=True
30
+ )
31
+
32
+ data = _prepare_crawl_request(request)
33
+
34
+ # Check basic field
35
+ assert data["url"] == "https://example.com"
36
+
37
+ # Check snake_case to camelCase conversions
38
+ assert "limit" in data
39
+ assert data["limit"] == 10
40
+ assert "maxDiscoveryDepth" in data
41
+ assert data["maxDiscoveryDepth"] == 3
42
+ assert "ignoreSitemap" in data
43
+ assert data["ignoreSitemap"] is True
44
+ assert "crawlEntireDomain" in data
45
+ assert data["crawlEntireDomain"] is False
46
+ assert "allowExternalLinks" in data
47
+ assert data["allowExternalLinks"] is True
48
+
49
+ # Check that snake_case fields are not present
50
+ assert "ignore_sitemap" not in data
51
+ assert "crawl_entire_domain" not in data
52
+ assert "allow_external_links" not in data
53
+
54
+ def test_scrape_options_conversion(self):
55
+ """Test that nested ScrapeOptions are converted to camelCase."""
56
+ scrape_opts = ScrapeOptions(
57
+ formats=["markdown", "html"],
58
+ headers={"User-Agent": "Test"},
59
+ include_tags=["h1", "h2"],
60
+ exclude_tags=["nav"],
61
+ only_main_content=False,
62
+ timeout=15000,
63
+ wait_for=2000,
64
+ mobile=True,
65
+ skip_tls_verification=True,
66
+ remove_base64_images=False
67
+ )
68
+
69
+ request = CrawlRequest(
70
+ url="https://example.com",
71
+ scrape_options=scrape_opts
72
+ )
73
+
74
+ data = _prepare_crawl_request(request)
75
+
76
+ assert "scrapeOptions" in data
77
+ assert "scrape_options" not in data
78
+
79
+ # Check nested conversions
80
+ scrape_data = data["scrapeOptions"]
81
+ assert "includeTags" in scrape_data
82
+ assert scrape_data["includeTags"] == ["h1", "h2"]
83
+ assert "excludeTags" in scrape_data
84
+ assert scrape_data["excludeTags"] == ["nav"]
85
+ assert "onlyMainContent" in scrape_data
86
+ assert scrape_data["onlyMainContent"] is False
87
+ assert "waitFor" in scrape_data
88
+ assert scrape_data["waitFor"] == 2000
89
+ assert "skipTlsVerification" in scrape_data
90
+ assert scrape_data["skipTlsVerification"] is True
91
+ assert "removeBase64Images" in scrape_data
92
+ assert scrape_data["removeBase64Images"] is False
93
+
94
+ def test_all_fields_conversion(self):
95
+ """Test request preparation with all possible fields."""
96
+ scrape_opts = ScrapeOptions(
97
+ formats=["markdown"],
98
+ headers={"User-Agent": "Test"},
99
+ only_main_content=False,
100
+ mobile=True
101
+ )
102
+
103
+ request = CrawlRequest(
104
+ url="https://example.com",
105
+ prompt="Extract all blog posts and documentation",
106
+ include_paths=["/blog/*", "/docs/*"],
107
+ exclude_paths=["/admin/*"],
108
+ max_discovery_depth=3,
109
+ ignore_sitemap=False,
110
+ limit=100,
111
+ crawl_entire_domain=True,
112
+ allow_external_links=False,
113
+ scrape_options=scrape_opts
114
+ )
115
+
116
+ data = _prepare_crawl_request(request)
117
+
118
+ # Check basic fields
119
+ assert data["url"] == "https://example.com"
120
+ assert data["prompt"] == "Extract all blog posts and documentation"
121
+
122
+ # Check all CrawlOptions fields
123
+ assert "includePaths" in data
124
+ assert data["includePaths"] == ["/blog/*", "/docs/*"]
125
+ assert "excludePaths" in data
126
+ assert data["excludePaths"] == ["/admin/*"]
127
+ assert "maxDiscoveryDepth" in data
128
+ assert data["maxDiscoveryDepth"] == 3
129
+ assert "ignoreSitemap" in data
130
+ assert data["ignoreSitemap"] is False
131
+ assert "limit" in data
132
+ assert data["limit"] == 100
133
+ assert "crawlEntireDomain" in data
134
+ assert data["crawlEntireDomain"] is True
135
+ assert "allowExternalLinks" in data
136
+ assert data["allowExternalLinks"] is False
137
+
138
+ # Check nested scrape options
139
+ assert "scrapeOptions" in data
140
+ scrape_data = data["scrapeOptions"]
141
+ assert "onlyMainContent" in scrape_data
142
+ assert scrape_data["onlyMainContent"] is False
143
+ assert "mobile" in scrape_data
144
+ assert scrape_data["mobile"] is True
145
+
146
+ def test_none_values_handling(self):
147
+ """Test that None values are handled correctly."""
148
+ request = CrawlRequest(
149
+ url="https://example.com",
150
+ prompt=None,
151
+ limit=None,
152
+ scrape_options=None
153
+ )
154
+
155
+ data = _prepare_crawl_request(request)
156
+
157
+ # Only the required field should be present
158
+ assert "url" in data
159
+ assert len(data) == 1 # Only url should be present
160
+
161
+ def test_prompt_parameter(self):
162
+ """Test that prompt parameter is included when provided."""
163
+ request = CrawlRequest(
164
+ url="https://example.com",
165
+ prompt="Extract all blog posts"
166
+ )
167
+
168
+ data = _prepare_crawl_request(request)
169
+
170
+ assert "url" in data
171
+ assert "prompt" in data
172
+ assert data["prompt"] == "Extract all blog posts"
173
+
174
+ def test_empty_options(self):
175
+ """Test that empty options are handled correctly."""
176
+ request = CrawlRequest(
177
+ url="https://example.com"
178
+ )
179
+
180
+ data = _prepare_crawl_request(request)
181
+
182
+ # Should only have the required url field
183
+ assert "url" in data
184
+ assert len(data) == 1 # Only url should be present
185
+
186
+ def test_validation_integration(self):
187
+ """Test that validation is called during preparation."""
188
+ # This should raise an error due to validation
189
+ with pytest.raises(ValueError, match="URL cannot be empty"):
190
+ request = CrawlRequest(url="")
191
+ _prepare_crawl_request(request)
192
+
193
+ # This should raise an error due to validation
194
+ with pytest.raises(ValueError, match="Limit must be positive"):
195
+ request = CrawlRequest(
196
+ url="https://example.com",
197
+ limit=0
198
+ )
199
+ _prepare_crawl_request(request)
200
+
201
+ def test_scrape_options_shared_function_integration(self):
202
+ """Test that the shared prepare_scrape_options function is being used."""
203
+ # Test with all snake_case fields to ensure conversion
204
+ scrape_opts = ScrapeOptions(
205
+ include_tags=["h1", "h2"],
206
+ exclude_tags=["nav"],
207
+ only_main_content=False,
208
+ wait_for=2000,
209
+ skip_tls_verification=True,
210
+ remove_base64_images=False
211
+ )
212
+
213
+ request = CrawlRequest(
214
+ url="https://example.com",
215
+ scrape_options=scrape_opts
216
+ )
217
+
218
+ data = _prepare_crawl_request(request)
219
+
220
+ # Check that scrapeOptions is present and converted
221
+ assert "scrapeOptions" in data
222
+ scrape_data = data["scrapeOptions"]
223
+
224
+ # Check all conversions are working
225
+ assert "includeTags" in scrape_data
226
+ assert "excludeTags" in scrape_data
227
+ assert "onlyMainContent" in scrape_data
228
+ assert "waitFor" in scrape_data
229
+ assert "skipTlsVerification" in scrape_data
230
+ assert "removeBase64Images" in scrape_data
231
+
232
+ # Check that snake_case fields are not present
233
+ assert "include_tags" not in scrape_data
234
+ assert "exclude_tags" not in scrape_data
235
+ assert "only_main_content" not in scrape_data
236
+ assert "wait_for" not in scrape_data
237
+ assert "skip_tls_verification" not in scrape_data
238
+ assert "remove_base64_images" not in scrape_data
239
+ assert "raw_html" not in scrape_data
240
+ assert "screenshot_full_page" not in scrape_data
@@ -0,0 +1,107 @@
1
+ import pytest
2
+ from firecrawl.v2.types import CrawlRequest, ScrapeOptions
3
+ from firecrawl.v2.methods.crawl import _validate_crawl_request
4
+
5
+
6
+ class TestCrawlRequestValidation:
7
+ """Unit tests for crawl request validation."""
8
+
9
+ def test_validate_empty_url(self):
10
+ """Test validation with empty URL."""
11
+ with pytest.raises(ValueError, match="URL cannot be empty"):
12
+ request = CrawlRequest(url="")
13
+ _validate_crawl_request(request)
14
+
15
+ def test_validate_whitespace_url(self):
16
+ """Test validation with whitespace-only URL."""
17
+ with pytest.raises(ValueError, match="URL cannot be empty"):
18
+ request = CrawlRequest(url=" ")
19
+ _validate_crawl_request(request)
20
+
21
+ def test_validate_valid_url(self):
22
+ """Test validation with valid URL."""
23
+ request = CrawlRequest(url="https://example.com")
24
+ _validate_crawl_request(request) # Should not raise
25
+
26
+ def test_validate_invalid_limit(self):
27
+ """Test validation with invalid limit."""
28
+ with pytest.raises(ValueError, match="Limit must be positive"):
29
+ request = CrawlRequest(
30
+ url="https://example.com",
31
+ limit=0
32
+ )
33
+ _validate_crawl_request(request)
34
+
35
+ def test_validate_negative_limit(self):
36
+ """Test validation with negative limit."""
37
+ with pytest.raises(ValueError, match="Limit must be positive"):
38
+ request = CrawlRequest(
39
+ url="https://example.com",
40
+ limit=-5
41
+ )
42
+ _validate_crawl_request(request)
43
+
44
+ def test_validate_valid_limit(self):
45
+ """Test validation with valid limit."""
46
+ request = CrawlRequest(
47
+ url="https://example.com",
48
+ limit=10
49
+ )
50
+ _validate_crawl_request(request) # Should not raise
51
+
52
+ def test_validate_with_prompt(self):
53
+ """Test validation with prompt."""
54
+ request = CrawlRequest(
55
+ url="https://example.com",
56
+ prompt="Extract all blog posts"
57
+ )
58
+ _validate_crawl_request(request) # Should not raise
59
+
60
+ def test_validate_with_prompt_and_options(self):
61
+ """Test validation with prompt and options."""
62
+ request = CrawlRequest(
63
+ url="https://example.com",
64
+ prompt="Extract all blog posts",
65
+ limit=10
66
+ )
67
+ _validate_crawl_request(request) # Should not raise
68
+
69
+ def test_validate_none_options(self):
70
+ """Test validation with None options."""
71
+ request = CrawlRequest(url="https://example.com")
72
+ _validate_crawl_request(request) # Should not raise
73
+
74
+ def test_validate_complex_options(self):
75
+ """Test validation with complex options."""
76
+ scrape_opts = ScrapeOptions(
77
+ formats=["markdown"],
78
+ only_main_content=False,
79
+ mobile=True
80
+ )
81
+
82
+ request = CrawlRequest(
83
+ url="https://example.com",
84
+ limit=50,
85
+ max_discovery_depth=3,
86
+ scrape_options=scrape_opts
87
+ )
88
+ _validate_crawl_request(request) # Should not raise
89
+
90
+ def test_validate_scrape_options_integration(self):
91
+ """Test that scrape_options validation is integrated."""
92
+ # Test with valid scrape options
93
+ scrape_opts = ScrapeOptions(formats=["markdown"], timeout=30000)
94
+ request = CrawlRequest(
95
+ url="https://example.com",
96
+ scrape_options=scrape_opts
97
+ )
98
+ _validate_crawl_request(request) # Should not raise
99
+
100
+ # Test with invalid scrape options (should raise error)
101
+ invalid_scrape_opts = ScrapeOptions(timeout=-1000)
102
+ request = CrawlRequest(
103
+ url="https://example.com",
104
+ scrape_options=invalid_scrape_opts
105
+ )
106
+ with pytest.raises(ValueError, match="Timeout must be positive"):
107
+ _validate_crawl_request(request)
@@ -0,0 +1,53 @@
1
+ import pytest
2
+ from firecrawl.v2.types import MapOptions
3
+ from firecrawl.v2.methods.map import _prepare_map_request
4
+
5
+
6
+ class TestMapRequestPreparation:
7
+ """Unit tests for map request preparation."""
8
+
9
+ def test_basic_request_preparation(self):
10
+ data = _prepare_map_request("https://example.com")
11
+ assert data["url"] == "https://example.com"
12
+ # Default sitemap handling should be "include" when no flags provided
13
+ assert "sitemap" not in data # we only send when options provided
14
+
15
+ def test_sitemap_transformations(self):
16
+ # sitemap -> "only"
17
+ opts = MapOptions(sitemap="only")
18
+ data = _prepare_map_request("https://example.com", opts)
19
+ assert data["sitemap"] == "only"
20
+
21
+ # sitemap -> "skip"
22
+ opts = MapOptions(sitemap="skip")
23
+ data = _prepare_map_request("https://example.com", opts)
24
+ assert data["sitemap"] == "skip"
25
+
26
+ # default when options present but sitemap left as default -> include
27
+ opts = MapOptions(search="docs")
28
+ data = _prepare_map_request("https://example.com", opts)
29
+ assert data["sitemap"] == "include"
30
+
31
+ def test_field_conversions(self):
32
+ opts = MapOptions(
33
+ search="docs",
34
+ include_subdomains=True,
35
+ limit=25,
36
+ sitemap="only",
37
+ timeout=15000,
38
+ )
39
+ data = _prepare_map_request("https://example.com", opts)
40
+
41
+ assert data["url"] == "https://example.com"
42
+ assert data["search"] == "docs"
43
+ assert data["includeSubdomains"] is True
44
+ assert data["limit"] == 25
45
+ assert data["sitemap"] == "only"
46
+ assert data["timeout"] == 15000
47
+
48
+ def test_invalid_url(self):
49
+ with pytest.raises(ValueError):
50
+ _prepare_map_request("")
51
+ with pytest.raises(ValueError):
52
+ _prepare_map_request(" ")
53
+