firecrawl 2.16.5__py3-none-any.whl → 3.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of firecrawl might be problematic. Click here for more details.
- firecrawl/__init__.py +27 -19
- firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +79 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +189 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +38 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +40 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +137 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +183 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +35 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +43 -0
- firecrawl/__tests__/e2e/v2/conftest.py +73 -0
- firecrawl/__tests__/e2e/v2/test_async.py +73 -0
- firecrawl/__tests__/e2e/v2/test_batch_scrape.py +105 -0
- firecrawl/__tests__/e2e/v2/test_crawl.py +276 -0
- firecrawl/__tests__/e2e/v2/test_extract.py +54 -0
- firecrawl/__tests__/e2e/v2/test_map.py +60 -0
- firecrawl/__tests__/e2e/v2/test_scrape.py +154 -0
- firecrawl/__tests__/e2e/v2/test_search.py +265 -0
- firecrawl/__tests__/e2e/v2/test_usage.py +26 -0
- firecrawl/__tests__/e2e/v2/test_watcher.py +65 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +12 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +61 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +12 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +19 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +50 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +63 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +28 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +117 -0
- firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +90 -0
- firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +70 -0
- firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +240 -0
- firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +107 -0
- firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +53 -0
- firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +92 -0
- firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +167 -0
- firecrawl/__tests__/unit/v2/methods/test_search_validation.py +206 -0
- firecrawl/__tests__/unit/v2/methods/test_usage_types.py +18 -0
- firecrawl/__tests__/unit/v2/methods/test_webhook.py +123 -0
- firecrawl/__tests__/unit/v2/utils/test_validation.py +290 -0
- firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +332 -0
- firecrawl/client.py +241 -0
- firecrawl/{firecrawl.py → firecrawl.backup.py} +17 -15
- firecrawl/types.py +157 -0
- firecrawl/v1/__init__.py +14 -0
- firecrawl/v1/client.py +4653 -0
- firecrawl/v2/__init__.py +4 -0
- firecrawl/v2/client.py +802 -0
- firecrawl/v2/client_async.py +250 -0
- firecrawl/v2/methods/aio/__init__.py +1 -0
- firecrawl/v2/methods/aio/batch.py +85 -0
- firecrawl/v2/methods/aio/crawl.py +174 -0
- firecrawl/v2/methods/aio/extract.py +126 -0
- firecrawl/v2/methods/aio/map.py +59 -0
- firecrawl/v2/methods/aio/scrape.py +36 -0
- firecrawl/v2/methods/aio/search.py +58 -0
- firecrawl/v2/methods/aio/usage.py +42 -0
- firecrawl/v2/methods/batch.py +420 -0
- firecrawl/v2/methods/crawl.py +468 -0
- firecrawl/v2/methods/extract.py +131 -0
- firecrawl/v2/methods/map.py +77 -0
- firecrawl/v2/methods/scrape.py +68 -0
- firecrawl/v2/methods/search.py +173 -0
- firecrawl/v2/methods/usage.py +41 -0
- firecrawl/v2/types.py +546 -0
- firecrawl/v2/utils/__init__.py +9 -0
- firecrawl/v2/utils/error_handler.py +107 -0
- firecrawl/v2/utils/get_version.py +15 -0
- firecrawl/v2/utils/http_client.py +153 -0
- firecrawl/v2/utils/http_client_async.py +64 -0
- firecrawl/v2/utils/validation.py +324 -0
- firecrawl/v2/watcher.py +312 -0
- firecrawl/v2/watcher_async.py +245 -0
- {firecrawl-2.16.5.dist-info → firecrawl-3.0.3.dist-info}/LICENSE +0 -0
- {firecrawl-2.16.5.dist-info → firecrawl-3.0.3.dist-info}/METADATA +49 -32
- firecrawl-3.0.3.dist-info/RECORD +78 -0
- tests/test_timeout_conversion.py +117 -0
- firecrawl/__tests__/e2e_withAuth/__init__.py +0 -0
- firecrawl/__tests__/e2e_withAuth/test.py +0 -170
- firecrawl/__tests__/v1/e2e_withAuth/__init__.py +0 -0
- firecrawl/__tests__/v1/e2e_withAuth/test.py +0 -465
- firecrawl-2.16.5.dist-info/RECORD +0 -12
- {firecrawl-2.16.5.dist-info → firecrawl-3.0.3.dist-info}/WHEEL +0 -0
- {firecrawl-2.16.5.dist-info → firecrawl-3.0.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from firecrawl.v2.types import ScrapeOptions, Viewport, ScreenshotAction
|
|
3
|
+
from firecrawl.v2.methods.scrape import _prepare_scrape_request
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class TestScrapeRequestPreparation:
|
|
7
|
+
"""Unit tests for scrape request preparation."""
|
|
8
|
+
|
|
9
|
+
def test_basic_request_preparation(self):
|
|
10
|
+
"""Test basic request preparation with minimal fields."""
|
|
11
|
+
data = _prepare_scrape_request("https://example.com")
|
|
12
|
+
|
|
13
|
+
# Check basic fields
|
|
14
|
+
assert data["url"] == "https://example.com"
|
|
15
|
+
|
|
16
|
+
# Check that no options are present
|
|
17
|
+
assert "formats" not in data
|
|
18
|
+
assert "headers" not in data
|
|
19
|
+
|
|
20
|
+
def test_scrape_options_conversion(self):
|
|
21
|
+
"""Test that ScrapeOptions fields are converted to camelCase."""
|
|
22
|
+
options = ScrapeOptions(
|
|
23
|
+
formats=["markdown", "html"],
|
|
24
|
+
headers={"User-Agent": "Test"},
|
|
25
|
+
include_tags=["h1", "h2"],
|
|
26
|
+
exclude_tags=["nav"],
|
|
27
|
+
only_main_content=False,
|
|
28
|
+
timeout=15000,
|
|
29
|
+
wait_for=2000,
|
|
30
|
+
mobile=True,
|
|
31
|
+
skip_tls_verification=True,
|
|
32
|
+
remove_base64_images=False
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
data = _prepare_scrape_request("https://example.com", options)
|
|
36
|
+
|
|
37
|
+
# Check basic field
|
|
38
|
+
assert data["url"] == "https://example.com"
|
|
39
|
+
|
|
40
|
+
# Check snake_case to camelCase conversions
|
|
41
|
+
assert "includeTags" in data
|
|
42
|
+
assert data["includeTags"] == ["h1", "h2"]
|
|
43
|
+
assert "excludeTags" in data
|
|
44
|
+
assert data["excludeTags"] == ["nav"]
|
|
45
|
+
assert "onlyMainContent" in data
|
|
46
|
+
assert data["onlyMainContent"] is False
|
|
47
|
+
assert "waitFor" in data
|
|
48
|
+
assert data["waitFor"] == 2000
|
|
49
|
+
assert "skipTlsVerification" in data
|
|
50
|
+
assert data["skipTlsVerification"] is True
|
|
51
|
+
assert "removeBase64Images" in data
|
|
52
|
+
assert data["removeBase64Images"] is False
|
|
53
|
+
|
|
54
|
+
# Check that snake_case fields are not present
|
|
55
|
+
assert "include_tags" not in data
|
|
56
|
+
assert "exclude_tags" not in data
|
|
57
|
+
assert "only_main_content" not in data
|
|
58
|
+
|
|
59
|
+
def test_actions_conversion(self):
|
|
60
|
+
"""Test that actions are converted to camelCase."""
|
|
61
|
+
viewport = Viewport(width=800, height=600)
|
|
62
|
+
action = ScreenshotAction(full_page=False, quality=80, viewport=viewport)
|
|
63
|
+
|
|
64
|
+
options = ScrapeOptions(actions=[action])
|
|
65
|
+
data = _prepare_scrape_request("https://example.com", options)
|
|
66
|
+
|
|
67
|
+
assert "actions" in data
|
|
68
|
+
assert len(data["actions"]) == 1
|
|
69
|
+
|
|
70
|
+
action_data = data["actions"][0]
|
|
71
|
+
assert action_data["type"] == "screenshot"
|
|
72
|
+
assert action_data["fullPage"] is False
|
|
73
|
+
assert action_data["quality"] == 80
|
|
74
|
+
assert "viewport" in action_data
|
|
75
|
+
|
|
76
|
+
def test_none_options_handling(self):
|
|
77
|
+
"""Test handling of None options."""
|
|
78
|
+
data = _prepare_scrape_request("https://example.com", None)
|
|
79
|
+
|
|
80
|
+
assert data["url"] == "https://example.com"
|
|
81
|
+
# Should not have any option fields
|
|
82
|
+
assert len(data) == 1
|
|
83
|
+
|
|
84
|
+
def test_empty_url_validation(self):
|
|
85
|
+
"""Test validation with empty URL."""
|
|
86
|
+
with pytest.raises(ValueError, match="URL cannot be empty"):
|
|
87
|
+
_prepare_scrape_request("")
|
|
88
|
+
|
|
89
|
+
def test_whitespace_url_validation(self):
|
|
90
|
+
"""Test validation with whitespace-only URL."""
|
|
91
|
+
with pytest.raises(ValueError, match="URL cannot be empty"):
|
|
92
|
+
_prepare_scrape_request(" ")
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from firecrawl.v2.types import SearchRequest, ScrapeOptions, Source
|
|
3
|
+
from firecrawl.v2.methods.search import _prepare_search_request
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class TestSearchRequestPreparation:
|
|
7
|
+
"""Unit tests for search request preparation."""
|
|
8
|
+
|
|
9
|
+
def test_basic_request_preparation(self):
|
|
10
|
+
"""Test basic request preparation with minimal fields."""
|
|
11
|
+
request = SearchRequest(query="test query")
|
|
12
|
+
data = _prepare_search_request(request)
|
|
13
|
+
|
|
14
|
+
# Check basic fields
|
|
15
|
+
assert data["query"] == "test query"
|
|
16
|
+
assert data["limit"] == 5
|
|
17
|
+
assert data["timeout"] == 60000
|
|
18
|
+
|
|
19
|
+
# Check that snake_case fields are not present
|
|
20
|
+
assert "ignore_invalid_urls" not in data
|
|
21
|
+
assert "scrape_options" not in data
|
|
22
|
+
|
|
23
|
+
def test_all_fields_conversion(self):
|
|
24
|
+
"""Test request preparation with all possible fields."""
|
|
25
|
+
scrape_opts = ScrapeOptions(
|
|
26
|
+
formats=["markdown"],
|
|
27
|
+
headers={"User-Agent": "Test"},
|
|
28
|
+
include_tags=["h1", "h2"],
|
|
29
|
+
exclude_tags=["nav"],
|
|
30
|
+
only_main_content=False,
|
|
31
|
+
timeout=15000,
|
|
32
|
+
wait_for=2000,
|
|
33
|
+
mobile=True,
|
|
34
|
+
skip_tls_verification=True,
|
|
35
|
+
remove_base64_images=False
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
request = SearchRequest(
|
|
39
|
+
query="test query",
|
|
40
|
+
sources=["web", "news"],
|
|
41
|
+
limit=10,
|
|
42
|
+
tbs="qdr:w",
|
|
43
|
+
location="US",
|
|
44
|
+
ignore_invalid_urls=False,
|
|
45
|
+
timeout=30000,
|
|
46
|
+
scrape_options=scrape_opts
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
data = _prepare_search_request(request)
|
|
50
|
+
|
|
51
|
+
# Check all basic fields
|
|
52
|
+
assert data["query"] == "test query"
|
|
53
|
+
assert data["limit"] == 10
|
|
54
|
+
assert data["tbs"] == "qdr:w"
|
|
55
|
+
assert data["location"] == "US"
|
|
56
|
+
assert data["timeout"] == 30000
|
|
57
|
+
|
|
58
|
+
# Check snake_case to camelCase conversions
|
|
59
|
+
assert "ignoreInvalidURLs" in data
|
|
60
|
+
assert data["ignoreInvalidURLs"] is False
|
|
61
|
+
assert "ignore_invalid_urls" not in data
|
|
62
|
+
|
|
63
|
+
assert "scrapeOptions" in data
|
|
64
|
+
assert "scrape_options" not in data
|
|
65
|
+
|
|
66
|
+
# Check sources
|
|
67
|
+
assert "sources" in data
|
|
68
|
+
assert len(data["sources"]) == 2
|
|
69
|
+
assert data["sources"][0]["type"] == "web"
|
|
70
|
+
assert data["sources"][1]["type"] == "news"
|
|
71
|
+
|
|
72
|
+
# Check nested scrape options conversions
|
|
73
|
+
scrape_data = data["scrapeOptions"]
|
|
74
|
+
assert "includeTags" in scrape_data
|
|
75
|
+
assert scrape_data["includeTags"] == ["h1", "h2"]
|
|
76
|
+
assert "excludeTags" in scrape_data
|
|
77
|
+
assert scrape_data["excludeTags"] == ["nav"]
|
|
78
|
+
assert "onlyMainContent" in scrape_data
|
|
79
|
+
assert scrape_data["onlyMainContent"] is False
|
|
80
|
+
assert "waitFor" in scrape_data
|
|
81
|
+
assert scrape_data["waitFor"] == 2000
|
|
82
|
+
assert "skipTlsVerification" in scrape_data
|
|
83
|
+
assert scrape_data["skipTlsVerification"] is True
|
|
84
|
+
assert "removeBase64Images" in scrape_data
|
|
85
|
+
assert scrape_data["removeBase64Images"] is False
|
|
86
|
+
|
|
87
|
+
def test_exclude_none_behavior(self):
|
|
88
|
+
"""Test that exclude_none=True behavior is working."""
|
|
89
|
+
request = SearchRequest(
|
|
90
|
+
query="test",
|
|
91
|
+
sources=None,
|
|
92
|
+
limit=None,
|
|
93
|
+
tbs=None,
|
|
94
|
+
location=None,
|
|
95
|
+
ignore_invalid_urls=None,
|
|
96
|
+
timeout=None,
|
|
97
|
+
scrape_options=None
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
data = _prepare_search_request(request)
|
|
101
|
+
|
|
102
|
+
# When limit and timeout are explicitly None, they should be excluded
|
|
103
|
+
assert "query" in data
|
|
104
|
+
assert len(data) == 1 # Only query should be present
|
|
105
|
+
|
|
106
|
+
def test_empty_scrape_options(self):
|
|
107
|
+
"""Test that empty scrape options are handled correctly."""
|
|
108
|
+
scrape_opts = ScrapeOptions() # All defaults
|
|
109
|
+
|
|
110
|
+
request = SearchRequest(
|
|
111
|
+
query="test",
|
|
112
|
+
scrape_options=scrape_opts
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
data = _prepare_search_request(request)
|
|
116
|
+
|
|
117
|
+
assert "scrapeOptions" in data
|
|
118
|
+
scrape_data = data["scrapeOptions"]
|
|
119
|
+
|
|
120
|
+
# Should have default values
|
|
121
|
+
assert "onlyMainContent" in scrape_data
|
|
122
|
+
assert scrape_data["onlyMainContent"] is True
|
|
123
|
+
assert "mobile" in scrape_data
|
|
124
|
+
assert scrape_data["mobile"] is False
|
|
125
|
+
|
|
126
|
+
def test_scrape_options_shared_function_integration(self):
|
|
127
|
+
"""Test that the shared prepare_scrape_options function is being used."""
|
|
128
|
+
# Test with all snake_case fields to ensure conversion
|
|
129
|
+
scrape_opts = ScrapeOptions(
|
|
130
|
+
formats=["markdown", "rawHtml"],
|
|
131
|
+
include_tags=["h1", "h2"],
|
|
132
|
+
exclude_tags=["nav"],
|
|
133
|
+
only_main_content=False,
|
|
134
|
+
wait_for=2000,
|
|
135
|
+
skip_tls_verification=True,
|
|
136
|
+
remove_base64_images=False
|
|
137
|
+
# Note: raw_html should be in formats array, not as a separate field
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
request = SearchRequest(
|
|
141
|
+
query="test",
|
|
142
|
+
scrape_options=scrape_opts
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
data = _prepare_search_request(request)
|
|
146
|
+
|
|
147
|
+
# Check that scrapeOptions is present and converted
|
|
148
|
+
assert "scrapeOptions" in data
|
|
149
|
+
scrape_data = data["scrapeOptions"]
|
|
150
|
+
|
|
151
|
+
# Check all conversions are working
|
|
152
|
+
assert "formats" in scrape_data
|
|
153
|
+
assert scrape_data["formats"] == ["markdown", "rawHtml"]
|
|
154
|
+
assert "includeTags" in scrape_data
|
|
155
|
+
assert "excludeTags" in scrape_data
|
|
156
|
+
assert "onlyMainContent" in scrape_data
|
|
157
|
+
assert "waitFor" in scrape_data
|
|
158
|
+
assert "skipTlsVerification" in scrape_data
|
|
159
|
+
assert "removeBase64Images" in scrape_data
|
|
160
|
+
|
|
161
|
+
# Check that snake_case fields are not present
|
|
162
|
+
assert "include_tags" not in scrape_data
|
|
163
|
+
assert "exclude_tags" not in scrape_data
|
|
164
|
+
assert "only_main_content" not in scrape_data
|
|
165
|
+
assert "wait_for" not in scrape_data
|
|
166
|
+
assert "skip_tls_verification" not in scrape_data
|
|
167
|
+
assert "remove_base64_images" not in scrape_data
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from firecrawl.v2.types import SearchRequest, Source, ScrapeOptions, ScrapeFormats
|
|
3
|
+
from firecrawl.v2.methods.search import _validate_search_request
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class TestSearchValidation:
|
|
7
|
+
"""Unit tests for search request validation."""
|
|
8
|
+
|
|
9
|
+
def test_validate_empty_query(self):
|
|
10
|
+
"""Test validation of empty query."""
|
|
11
|
+
request = SearchRequest(query="")
|
|
12
|
+
with pytest.raises(ValueError, match="Query cannot be empty"):
|
|
13
|
+
_validate_search_request(request)
|
|
14
|
+
|
|
15
|
+
request = SearchRequest(query=" ")
|
|
16
|
+
with pytest.raises(ValueError, match="Query cannot be empty"):
|
|
17
|
+
_validate_search_request(request)
|
|
18
|
+
|
|
19
|
+
def test_validate_invalid_limit(self):
|
|
20
|
+
"""Test validation of invalid limits."""
|
|
21
|
+
# Zero limit
|
|
22
|
+
request = SearchRequest(query="test", limit=0)
|
|
23
|
+
with pytest.raises(ValueError, match="Limit must be positive"):
|
|
24
|
+
_validate_search_request(request)
|
|
25
|
+
|
|
26
|
+
# Negative limit
|
|
27
|
+
request = SearchRequest(query="test", limit=-1)
|
|
28
|
+
with pytest.raises(ValueError, match="Limit must be positive"):
|
|
29
|
+
_validate_search_request(request)
|
|
30
|
+
|
|
31
|
+
# Too high limit
|
|
32
|
+
request = SearchRequest(query="test", limit=101)
|
|
33
|
+
with pytest.raises(ValueError, match="Limit cannot exceed 100"):
|
|
34
|
+
_validate_search_request(request)
|
|
35
|
+
|
|
36
|
+
def test_validate_invalid_timeout(self):
|
|
37
|
+
"""Test validation of invalid timeouts."""
|
|
38
|
+
# Zero timeout
|
|
39
|
+
request = SearchRequest(query="test", timeout=0)
|
|
40
|
+
with pytest.raises(ValueError, match="Timeout must be positive"):
|
|
41
|
+
_validate_search_request(request)
|
|
42
|
+
|
|
43
|
+
# Negative timeout
|
|
44
|
+
request = SearchRequest(query="test", timeout=-1000)
|
|
45
|
+
with pytest.raises(ValueError, match="Timeout must be positive"):
|
|
46
|
+
_validate_search_request(request)
|
|
47
|
+
|
|
48
|
+
# Too high timeout
|
|
49
|
+
request = SearchRequest(query="test", timeout=300001)
|
|
50
|
+
with pytest.raises(ValueError, match="Timeout cannot exceed 300000ms"):
|
|
51
|
+
_validate_search_request(request)
|
|
52
|
+
|
|
53
|
+
def test_validate_invalid_sources(self):
|
|
54
|
+
"""Test validation of invalid sources."""
|
|
55
|
+
# Invalid string source
|
|
56
|
+
request = SearchRequest(query="test", sources=["invalid_source"])
|
|
57
|
+
with pytest.raises(ValueError, match="Invalid source type"):
|
|
58
|
+
_validate_search_request(request)
|
|
59
|
+
|
|
60
|
+
# Invalid object source
|
|
61
|
+
request = SearchRequest(query="test", sources=[Source(type="invalid_source")])
|
|
62
|
+
with pytest.raises(ValueError, match="Invalid source type"):
|
|
63
|
+
_validate_search_request(request)
|
|
64
|
+
|
|
65
|
+
# Mixed valid/invalid sources
|
|
66
|
+
request = SearchRequest(query="test", sources=["web", "invalid_source"])
|
|
67
|
+
with pytest.raises(ValueError, match="Invalid source type"):
|
|
68
|
+
_validate_search_request(request)
|
|
69
|
+
|
|
70
|
+
def test_validate_invalid_location(self):
|
|
71
|
+
"""Test validation of invalid location."""
|
|
72
|
+
# Empty location
|
|
73
|
+
request = SearchRequest(query="test", location="")
|
|
74
|
+
with pytest.raises(ValueError, match="Location must be a non-empty string"):
|
|
75
|
+
_validate_search_request(request)
|
|
76
|
+
|
|
77
|
+
# Whitespace location
|
|
78
|
+
request = SearchRequest(query="test", location=" ")
|
|
79
|
+
with pytest.raises(ValueError, match="Location must be a non-empty string"):
|
|
80
|
+
_validate_search_request(request)
|
|
81
|
+
|
|
82
|
+
def test_validate_invalid_tbs(self):
|
|
83
|
+
"""Test validation of invalid tbs values."""
|
|
84
|
+
invalid_tbs_values = ["invalid", "qdr:x", "yesterday", "last_week"]
|
|
85
|
+
|
|
86
|
+
for invalid_tbs in invalid_tbs_values:
|
|
87
|
+
request = SearchRequest(query="test", tbs=invalid_tbs)
|
|
88
|
+
with pytest.raises(ValueError, match="Invalid tbs value"):
|
|
89
|
+
_validate_search_request(request)
|
|
90
|
+
|
|
91
|
+
def test_validate_valid_requests(self):
|
|
92
|
+
"""Test that valid requests pass validation."""
|
|
93
|
+
# Minimal valid request
|
|
94
|
+
request = SearchRequest(query="test")
|
|
95
|
+
validated = _validate_search_request(request)
|
|
96
|
+
assert validated == request
|
|
97
|
+
|
|
98
|
+
# Request with all optional parameters
|
|
99
|
+
request = SearchRequest(
|
|
100
|
+
query="test query",
|
|
101
|
+
sources=["web", "news"],
|
|
102
|
+
limit=10,
|
|
103
|
+
tbs="qdr:w",
|
|
104
|
+
location="US",
|
|
105
|
+
ignore_invalid_urls=False,
|
|
106
|
+
timeout=30000
|
|
107
|
+
)
|
|
108
|
+
validated = _validate_search_request(request)
|
|
109
|
+
assert validated == request
|
|
110
|
+
|
|
111
|
+
# Request with object sources
|
|
112
|
+
request = SearchRequest(
|
|
113
|
+
query="test",
|
|
114
|
+
sources=[Source(type="web"), Source(type="images")]
|
|
115
|
+
)
|
|
116
|
+
validated = _validate_search_request(request)
|
|
117
|
+
assert validated == request
|
|
118
|
+
|
|
119
|
+
def test_validate_edge_cases(self):
|
|
120
|
+
"""Test edge cases and boundary values."""
|
|
121
|
+
# Maximum valid limit
|
|
122
|
+
request = SearchRequest(query="test", limit=100)
|
|
123
|
+
validated = _validate_search_request(request)
|
|
124
|
+
assert validated == request
|
|
125
|
+
|
|
126
|
+
# Maximum valid timeout
|
|
127
|
+
request = SearchRequest(query="test", timeout=300000)
|
|
128
|
+
validated = _validate_search_request(request)
|
|
129
|
+
assert validated == request
|
|
130
|
+
|
|
131
|
+
# Minimum valid limit
|
|
132
|
+
request = SearchRequest(query="test", limit=1)
|
|
133
|
+
validated = _validate_search_request(request)
|
|
134
|
+
assert validated == request
|
|
135
|
+
|
|
136
|
+
# Minimum valid timeout
|
|
137
|
+
request = SearchRequest(query="test", timeout=1)
|
|
138
|
+
validated = _validate_search_request(request)
|
|
139
|
+
assert validated == request
|
|
140
|
+
|
|
141
|
+
def test_validate_none_values(self):
|
|
142
|
+
"""Test that None values for optional fields are handled correctly."""
|
|
143
|
+
request = SearchRequest(
|
|
144
|
+
query="test",
|
|
145
|
+
sources=None,
|
|
146
|
+
limit=None,
|
|
147
|
+
tbs=None,
|
|
148
|
+
location=None,
|
|
149
|
+
ignore_invalid_urls=None,
|
|
150
|
+
timeout=None
|
|
151
|
+
)
|
|
152
|
+
validated = _validate_search_request(request)
|
|
153
|
+
assert validated == request
|
|
154
|
+
|
|
155
|
+
def test_validate_scrape_options_integration(self):
|
|
156
|
+
"""Test that scrape_options validation is integrated."""
|
|
157
|
+
# Test with valid scrape options
|
|
158
|
+
scrape_opts = ScrapeOptions(formats=["markdown"], timeout=30000)
|
|
159
|
+
request = SearchRequest(query="test", scrape_options=scrape_opts)
|
|
160
|
+
validated = _validate_search_request(request)
|
|
161
|
+
assert validated == request
|
|
162
|
+
|
|
163
|
+
# Test with invalid scrape options (should raise error)
|
|
164
|
+
invalid_scrape_opts = ScrapeOptions(timeout=-1000)
|
|
165
|
+
request = SearchRequest(query="test", scrape_options=invalid_scrape_opts)
|
|
166
|
+
with pytest.raises(ValueError, match="Timeout must be positive"):
|
|
167
|
+
_validate_search_request(request)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
class TestSearchRequestModel:
|
|
174
|
+
"""Unit tests for SearchRequest model behavior."""
|
|
175
|
+
|
|
176
|
+
def test_default_values(self):
|
|
177
|
+
"""Test that default values are set correctly."""
|
|
178
|
+
request = SearchRequest(query="test")
|
|
179
|
+
assert request.limit == 5
|
|
180
|
+
assert request.ignore_invalid_urls is None # No default in model
|
|
181
|
+
assert request.timeout == 60000
|
|
182
|
+
assert request.sources is None
|
|
183
|
+
assert request.tbs is None
|
|
184
|
+
assert request.location is None
|
|
185
|
+
assert request.scrape_options is None
|
|
186
|
+
|
|
187
|
+
def test_field_aliases(self):
|
|
188
|
+
"""Test that field aliases work correctly for API serialization."""
|
|
189
|
+
# Test with None value (no default)
|
|
190
|
+
request1 = SearchRequest(query="test")
|
|
191
|
+
data1 = request1.model_dump(by_alias=True)
|
|
192
|
+
assert "ignore_invalid_urls" in data1 # No alias, uses snake_case
|
|
193
|
+
assert data1["ignore_invalid_urls"] is None
|
|
194
|
+
|
|
195
|
+
# Test with explicit False value
|
|
196
|
+
request2 = SearchRequest(
|
|
197
|
+
query="test",
|
|
198
|
+
ignore_invalid_urls=False,
|
|
199
|
+
scrape_options=ScrapeOptions(formats=["markdown"])
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
# Check that aliases are used in model_dump with by_alias=True
|
|
203
|
+
data2 = request2.model_dump(by_alias=True)
|
|
204
|
+
assert "ignore_invalid_urls" in data2 # No alias, uses snake_case
|
|
205
|
+
assert "scrape_options" in data2 # No alias, uses snake_case
|
|
206
|
+
assert data2["ignore_invalid_urls"] is False
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from firecrawl.v2.types import ConcurrencyCheck, CreditUsage, TokenUsage
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class TestUsageTypes:
|
|
5
|
+
def test_concurrency_check_model(self):
|
|
6
|
+
cc = ConcurrencyCheck(concurrency=3, max_concurrency=10)
|
|
7
|
+
assert cc.concurrency == 3
|
|
8
|
+
assert cc.max_concurrency == 10
|
|
9
|
+
|
|
10
|
+
def test_credit_usage_model(self):
|
|
11
|
+
cu = CreditUsage(remaining_credits=123)
|
|
12
|
+
assert isinstance(cu.remaining_credits, int)
|
|
13
|
+
assert cu.remaining_credits == 123
|
|
14
|
+
|
|
15
|
+
def test_token_usage_model(self):
|
|
16
|
+
tu = TokenUsage(remaining_tokens=10)
|
|
17
|
+
assert tu.remaining_tokens == 10
|
|
18
|
+
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Unit tests for webhook functionality in Firecrawl v2 SDK.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
from firecrawl.v2.types import WebhookConfig, CrawlRequest
|
|
7
|
+
from firecrawl.v2.methods.crawl import _prepare_crawl_request
|
|
8
|
+
|
|
9
|
+
class TestWebhookConfig:
|
|
10
|
+
"""Test WebhookConfig class functionality."""
|
|
11
|
+
|
|
12
|
+
def test_webhook_config_creation_minimal(self):
|
|
13
|
+
"""Test creating WebhookConfig with minimal parameters."""
|
|
14
|
+
webhook = WebhookConfig(url="https://example.com/webhook")
|
|
15
|
+
assert webhook.url == "https://example.com/webhook"
|
|
16
|
+
assert webhook.headers is None
|
|
17
|
+
assert webhook.metadata is None
|
|
18
|
+
assert webhook.events is None
|
|
19
|
+
|
|
20
|
+
def test_webhook_config_creation_full(self):
|
|
21
|
+
"""Test creating WebhookConfig with all parameters."""
|
|
22
|
+
webhook = WebhookConfig(
|
|
23
|
+
url="https://example.com/webhook",
|
|
24
|
+
headers={"Authorization": "Bearer token"},
|
|
25
|
+
metadata={"project": "test"},
|
|
26
|
+
events=["completed", "failed"]
|
|
27
|
+
)
|
|
28
|
+
assert webhook.url == "https://example.com/webhook"
|
|
29
|
+
assert webhook.headers == {"Authorization": "Bearer token"}
|
|
30
|
+
assert webhook.metadata == {"project": "test"}
|
|
31
|
+
assert webhook.events == ["completed", "failed"]
|
|
32
|
+
|
|
33
|
+
def test_webhook_config_validation(self):
|
|
34
|
+
"""Test WebhookConfig validation."""
|
|
35
|
+
# URL is required
|
|
36
|
+
with pytest.raises(Exception): # Pydantic validation error
|
|
37
|
+
WebhookConfig()
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class TestCrawlRequestWebhook:
|
|
41
|
+
"""Test CrawlRequest webhook functionality."""
|
|
42
|
+
|
|
43
|
+
def test_crawl_request_with_string_webhook(self):
|
|
44
|
+
"""Test CrawlRequest with string webhook."""
|
|
45
|
+
request = CrawlRequest(
|
|
46
|
+
url="https://example.com",
|
|
47
|
+
webhook="https://example.com/webhook"
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
data = _prepare_crawl_request(request)
|
|
51
|
+
assert data["webhook"] == "https://example.com/webhook"
|
|
52
|
+
|
|
53
|
+
def test_crawl_request_with_webhook_config(self):
|
|
54
|
+
"""Test CrawlRequest with WebhookConfig object."""
|
|
55
|
+
webhook_config = WebhookConfig(
|
|
56
|
+
url="https://example.com/webhook",
|
|
57
|
+
headers={"Authorization": "Bearer token"},
|
|
58
|
+
events=["completed"]
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
request = CrawlRequest(
|
|
62
|
+
url="https://example.com",
|
|
63
|
+
webhook=webhook_config
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
data = _prepare_crawl_request(request)
|
|
67
|
+
assert data["webhook"]["url"] == "https://example.com/webhook"
|
|
68
|
+
assert data["webhook"]["headers"] == {"Authorization": "Bearer token"}
|
|
69
|
+
assert data["webhook"]["events"] == ["completed"]
|
|
70
|
+
|
|
71
|
+
def test_crawl_request_without_webhook(self):
|
|
72
|
+
"""Test CrawlRequest without webhook."""
|
|
73
|
+
request = CrawlRequest(url="https://example.com")
|
|
74
|
+
|
|
75
|
+
data = _prepare_crawl_request(request)
|
|
76
|
+
assert "webhook" not in data
|
|
77
|
+
|
|
78
|
+
def test_crawl_request_webhook_serialization(self):
|
|
79
|
+
"""Test that webhook config is properly serialized."""
|
|
80
|
+
webhook_config = WebhookConfig(
|
|
81
|
+
url="https://example.com/webhook",
|
|
82
|
+
headers={"Content-Type": "application/json"},
|
|
83
|
+
metadata={"test": "value"},
|
|
84
|
+
events=["page", "completed"]
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
request = CrawlRequest(
|
|
88
|
+
url="https://example.com",
|
|
89
|
+
webhook=webhook_config
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
data = _prepare_crawl_request(request)
|
|
93
|
+
webhook_data = data["webhook"]
|
|
94
|
+
|
|
95
|
+
# Check that all fields are properly serialized
|
|
96
|
+
assert webhook_data["url"] == "https://example.com/webhook"
|
|
97
|
+
assert webhook_data["headers"] == {"Content-Type": "application/json"}
|
|
98
|
+
assert webhook_data["metadata"] == {"test": "value"}
|
|
99
|
+
assert webhook_data["events"] == ["page", "completed"]
|
|
100
|
+
|
|
101
|
+
def test_crawl_request_webhook_with_none_values(self):
|
|
102
|
+
"""Test webhook config with None values are excluded from serialization."""
|
|
103
|
+
webhook_config = WebhookConfig(
|
|
104
|
+
url="https://example.com/webhook",
|
|
105
|
+
headers=None,
|
|
106
|
+
metadata=None,
|
|
107
|
+
events=None
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
request = CrawlRequest(
|
|
111
|
+
url="https://example.com",
|
|
112
|
+
webhook=webhook_config
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
data = _prepare_crawl_request(request)
|
|
116
|
+
webhook_data = data["webhook"]
|
|
117
|
+
|
|
118
|
+
# Only url should be present, None values should be excluded
|
|
119
|
+
assert webhook_data["url"] == "https://example.com/webhook"
|
|
120
|
+
assert "headers" not in webhook_data
|
|
121
|
+
assert "metadata" not in webhook_data
|
|
122
|
+
assert "events" not in webhook_data
|
|
123
|
+
|