firecrawl 4.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. firecrawl/__init__.py +87 -0
  2. firecrawl/__tests__/e2e/v2/aio/conftest.py +62 -0
  3. firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +69 -0
  4. firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +189 -0
  5. firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +39 -0
  6. firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +41 -0
  7. firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +138 -0
  8. firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +249 -0
  9. firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +42 -0
  10. firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +43 -0
  11. firecrawl/__tests__/e2e/v2/conftest.py +73 -0
  12. firecrawl/__tests__/e2e/v2/test_async.py +73 -0
  13. firecrawl/__tests__/e2e/v2/test_batch_scrape.py +106 -0
  14. firecrawl/__tests__/e2e/v2/test_crawl.py +278 -0
  15. firecrawl/__tests__/e2e/v2/test_extract.py +55 -0
  16. firecrawl/__tests__/e2e/v2/test_map.py +61 -0
  17. firecrawl/__tests__/e2e/v2/test_scrape.py +191 -0
  18. firecrawl/__tests__/e2e/v2/test_search.py +270 -0
  19. firecrawl/__tests__/e2e/v2/test_usage.py +26 -0
  20. firecrawl/__tests__/e2e/v2/test_watcher.py +65 -0
  21. firecrawl/__tests__/unit/test_recursive_schema_v1.py +1209 -0
  22. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +12 -0
  23. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +79 -0
  24. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +12 -0
  25. firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +20 -0
  26. firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +50 -0
  27. firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +64 -0
  28. firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +28 -0
  29. firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +117 -0
  30. firecrawl/__tests__/unit/v2/methods/test_agent.py +367 -0
  31. firecrawl/__tests__/unit/v2/methods/test_agent_request_preparation.py +226 -0
  32. firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +90 -0
  33. firecrawl/__tests__/unit/v2/methods/test_branding.py +214 -0
  34. firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +70 -0
  35. firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +240 -0
  36. firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +107 -0
  37. firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +54 -0
  38. firecrawl/__tests__/unit/v2/methods/test_pagination.py +671 -0
  39. firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +109 -0
  40. firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +169 -0
  41. firecrawl/__tests__/unit/v2/methods/test_search_validation.py +236 -0
  42. firecrawl/__tests__/unit/v2/methods/test_usage_types.py +18 -0
  43. firecrawl/__tests__/unit/v2/methods/test_webhook.py +123 -0
  44. firecrawl/__tests__/unit/v2/utils/test_metadata_extras.py +94 -0
  45. firecrawl/__tests__/unit/v2/utils/test_metadata_extras_multivalue.py +22 -0
  46. firecrawl/__tests__/unit/v2/utils/test_recursive_schema.py +1133 -0
  47. firecrawl/__tests__/unit/v2/utils/test_validation.py +311 -0
  48. firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +332 -0
  49. firecrawl/client.py +281 -0
  50. firecrawl/firecrawl.backup.py +4635 -0
  51. firecrawl/types.py +167 -0
  52. firecrawl/v1/__init__.py +14 -0
  53. firecrawl/v1/client.py +5164 -0
  54. firecrawl/v2/__init__.py +4 -0
  55. firecrawl/v2/client.py +967 -0
  56. firecrawl/v2/client_async.py +408 -0
  57. firecrawl/v2/methods/agent.py +144 -0
  58. firecrawl/v2/methods/aio/__init__.py +1 -0
  59. firecrawl/v2/methods/aio/agent.py +137 -0
  60. firecrawl/v2/methods/aio/batch.py +188 -0
  61. firecrawl/v2/methods/aio/crawl.py +351 -0
  62. firecrawl/v2/methods/aio/extract.py +133 -0
  63. firecrawl/v2/methods/aio/map.py +65 -0
  64. firecrawl/v2/methods/aio/scrape.py +33 -0
  65. firecrawl/v2/methods/aio/search.py +176 -0
  66. firecrawl/v2/methods/aio/usage.py +89 -0
  67. firecrawl/v2/methods/batch.py +499 -0
  68. firecrawl/v2/methods/crawl.py +592 -0
  69. firecrawl/v2/methods/extract.py +161 -0
  70. firecrawl/v2/methods/map.py +83 -0
  71. firecrawl/v2/methods/scrape.py +64 -0
  72. firecrawl/v2/methods/search.py +215 -0
  73. firecrawl/v2/methods/usage.py +84 -0
  74. firecrawl/v2/types.py +1143 -0
  75. firecrawl/v2/utils/__init__.py +9 -0
  76. firecrawl/v2/utils/error_handler.py +107 -0
  77. firecrawl/v2/utils/get_version.py +15 -0
  78. firecrawl/v2/utils/http_client.py +178 -0
  79. firecrawl/v2/utils/http_client_async.py +69 -0
  80. firecrawl/v2/utils/normalize.py +125 -0
  81. firecrawl/v2/utils/validation.py +692 -0
  82. firecrawl/v2/watcher.py +301 -0
  83. firecrawl/v2/watcher_async.py +243 -0
  84. firecrawl-4.12.0.dist-info/METADATA +234 -0
  85. firecrawl-4.12.0.dist-info/RECORD +92 -0
  86. firecrawl-4.12.0.dist-info/WHEEL +5 -0
  87. firecrawl-4.12.0.dist-info/licenses/LICENSE +21 -0
  88. firecrawl-4.12.0.dist-info/top_level.txt +2 -0
  89. tests/test_agent_integration.py +277 -0
  90. tests/test_api_key_handling.py +44 -0
  91. tests/test_change_tracking.py +98 -0
  92. tests/test_timeout_conversion.py +117 -0
@@ -0,0 +1,109 @@
1
+ import pytest
2
+ from firecrawl.v2.types import ScrapeOptions, Viewport, ScreenshotAction
3
+ from firecrawl.v2.methods.scrape import _prepare_scrape_request
4
+
5
+
6
+ class TestScrapeRequestPreparation:
7
+ """Unit tests for scrape request preparation."""
8
+
9
+ def test_basic_request_preparation(self):
10
+ """Test basic request preparation with minimal fields."""
11
+ data = _prepare_scrape_request("https://example.com")
12
+
13
+ # Check basic fields
14
+ assert data["url"] == "https://example.com"
15
+
16
+ # Check that no options are present
17
+ assert "formats" not in data
18
+ assert "headers" not in data
19
+
20
+ def test_scrape_options_conversion(self):
21
+ """Test that ScrapeOptions fields are converted to camelCase."""
22
+ options = ScrapeOptions(
23
+ formats=["markdown", "html"],
24
+ headers={"User-Agent": "Test"},
25
+ include_tags=["h1", "h2"],
26
+ exclude_tags=["nav"],
27
+ only_main_content=False,
28
+ timeout=15000,
29
+ wait_for=2000,
30
+ mobile=True,
31
+ skip_tls_verification=True,
32
+ remove_base64_images=False
33
+ )
34
+
35
+ data = _prepare_scrape_request("https://example.com", options)
36
+
37
+ # Check basic field
38
+ assert data["url"] == "https://example.com"
39
+
40
+ # Check snake_case to camelCase conversions
41
+ assert "includeTags" in data
42
+ assert data["includeTags"] == ["h1", "h2"]
43
+ assert "excludeTags" in data
44
+ assert data["excludeTags"] == ["nav"]
45
+ assert "onlyMainContent" in data
46
+ assert data["onlyMainContent"] is False
47
+ assert "waitFor" in data
48
+ assert data["waitFor"] == 2000
49
+ assert "skipTlsVerification" in data
50
+ assert data["skipTlsVerification"] is True
51
+ assert "removeBase64Images" in data
52
+ assert data["removeBase64Images"] is False
53
+
54
+ # Check that snake_case fields are not present
55
+ assert "include_tags" not in data
56
+ assert "exclude_tags" not in data
57
+ assert "only_main_content" not in data
58
+
59
+ def test_actions_conversion(self):
60
+ """Test that actions are converted to camelCase."""
61
+ viewport = Viewport(width=800, height=600)
62
+ action = ScreenshotAction(full_page=False, quality=80, viewport=viewport)
63
+
64
+ options = ScrapeOptions(actions=[action])
65
+ data = _prepare_scrape_request("https://example.com", options)
66
+
67
+ assert "actions" in data
68
+ assert len(data["actions"]) == 1
69
+
70
+ action_data = data["actions"][0]
71
+ assert action_data["type"] == "screenshot"
72
+ assert action_data["fullPage"] is False
73
+ assert action_data["quality"] == 80
74
+ assert "viewport" in action_data
75
+
76
+ def test_none_options_handling(self):
77
+ """Test handling of None options."""
78
+ data = _prepare_scrape_request("https://example.com", None)
79
+
80
+ assert data["url"] == "https://example.com"
81
+ # Should not have any option fields
82
+ assert len(data) == 1
83
+
84
+ def test_empty_url_validation(self):
85
+ """Test validation with empty URL."""
86
+ with pytest.raises(ValueError, match="URL cannot be empty"):
87
+ _prepare_scrape_request("")
88
+
89
+ def test_whitespace_url_validation(self):
90
+ """Test validation with whitespace-only URL."""
91
+ with pytest.raises(ValueError, match="URL cannot be empty"):
92
+ _prepare_scrape_request(" ")
93
+
94
+ def test_all_params_including_integration(self):
95
+ opts = ScrapeOptions(
96
+ formats=["markdown"],
97
+ headers={"User-Agent": "Test"},
98
+ include_tags=["h1"],
99
+ exclude_tags=["nav"],
100
+ only_main_content=False,
101
+ timeout=15000,
102
+ wait_for=2000,
103
+ mobile=True,
104
+ skip_tls_verification=True,
105
+ remove_base64_images=False,
106
+ integration=" _unit-test ",
107
+ )
108
+ data = _prepare_scrape_request("https://example.com", opts)
109
+ assert data["integration"] == "_unit-test"
@@ -0,0 +1,169 @@
1
+ import pytest
2
+ from firecrawl.v2.types import SearchRequest, ScrapeOptions, Source
3
+ from firecrawl.v2.methods.search import _prepare_search_request
4
+
5
+
6
+ class TestSearchRequestPreparation:
7
+ """Unit tests for search request preparation."""
8
+
9
+ def test_basic_request_preparation(self):
10
+ """Test basic request preparation with minimal fields."""
11
+ request = SearchRequest(query="test query")
12
+ data = _prepare_search_request(request)
13
+
14
+ # Check basic fields
15
+ assert data["query"] == "test query"
16
+ assert data["limit"] == 5
17
+ assert data["timeout"] == 300000
18
+
19
+ # Check that snake_case fields are not present
20
+ assert "ignore_invalid_urls" not in data
21
+ assert "scrape_options" not in data
22
+
23
+ def test_all_fields_conversion(self):
24
+ """Test request preparation with all possible fields."""
25
+ scrape_opts = ScrapeOptions(
26
+ formats=["markdown"],
27
+ headers={"User-Agent": "Test"},
28
+ include_tags=["h1", "h2"],
29
+ exclude_tags=["nav"],
30
+ only_main_content=False,
31
+ timeout=15000,
32
+ wait_for=2000,
33
+ mobile=True,
34
+ skip_tls_verification=True,
35
+ remove_base64_images=False
36
+ )
37
+
38
+ request = SearchRequest(
39
+ query="test query",
40
+ sources=["web", "news"],
41
+ limit=10,
42
+ tbs="qdr:w",
43
+ location="US",
44
+ ignore_invalid_urls=False,
45
+ timeout=30000,
46
+ scrape_options=scrape_opts,
47
+ integration=" _e2e-test ",
48
+ )
49
+
50
+ data = _prepare_search_request(request)
51
+
52
+ # Check all basic fields
53
+ assert data["query"] == "test query"
54
+ assert data["limit"] == 10
55
+ assert data["tbs"] == "qdr:w"
56
+ assert data["location"] == "US"
57
+ assert data["timeout"] == 30000
58
+
59
+ # Check snake_case to camelCase conversions
60
+ assert "ignoreInvalidURLs" in data
61
+ assert data["ignoreInvalidURLs"] is False
62
+ assert "ignore_invalid_urls" not in data
63
+
64
+ assert "scrapeOptions" in data
65
+ assert "scrape_options" not in data
66
+
67
+ # Check sources
68
+ assert "sources" in data
69
+ assert len(data["sources"]) == 2
70
+ assert data["sources"][0]["type"] == "web"
71
+ assert data["sources"][1]["type"] == "news"
72
+
73
+ # Check nested scrape options conversions
74
+ scrape_data = data["scrapeOptions"]
75
+ assert "includeTags" in scrape_data
76
+ assert scrape_data["includeTags"] == ["h1", "h2"]
77
+ assert "excludeTags" in scrape_data
78
+ assert scrape_data["excludeTags"] == ["nav"]
79
+ assert "onlyMainContent" in scrape_data
80
+ assert scrape_data["onlyMainContent"] is False
81
+ assert "waitFor" in scrape_data
82
+ assert scrape_data["waitFor"] == 2000
83
+ assert "skipTlsVerification" in scrape_data
84
+ assert scrape_data["skipTlsVerification"] is True
85
+ assert "removeBase64Images" in scrape_data
86
+ assert scrape_data["removeBase64Images"] is False
87
+ assert data["integration"] == "_e2e-test"
88
+
89
+ def test_exclude_none_behavior(self):
90
+ """Test that exclude_none=True behavior is working."""
91
+ request = SearchRequest(
92
+ query="test",
93
+ sources=None,
94
+ limit=None,
95
+ tbs=None,
96
+ location=None,
97
+ ignore_invalid_urls=None,
98
+ timeout=None,
99
+ scrape_options=None
100
+ )
101
+
102
+ data = _prepare_search_request(request)
103
+
104
+ # When limit and timeout are explicitly None, they should be excluded
105
+ assert "query" in data
106
+ assert len(data) == 1 # Only query should be present
107
+
108
+ def test_empty_scrape_options(self):
109
+ """Test that empty scrape options are handled correctly."""
110
+ scrape_opts = ScrapeOptions() # All defaults
111
+
112
+ request = SearchRequest(
113
+ query="test",
114
+ scrape_options=scrape_opts
115
+ )
116
+
117
+ data = _prepare_search_request(request)
118
+
119
+ assert "scrapeOptions" in data
120
+ scrape_data = data["scrapeOptions"]
121
+
122
+ # Should have default values
123
+ assert "onlyMainContent" in scrape_data
124
+ assert scrape_data["onlyMainContent"] is True
125
+ assert "mobile" in scrape_data
126
+ assert scrape_data["mobile"] is False
127
+
128
+ def test_scrape_options_shared_function_integration(self):
129
+ """Test that the shared prepare_scrape_options function is being used."""
130
+ # Test with all snake_case fields to ensure conversion
131
+ scrape_opts = ScrapeOptions(
132
+ formats=["markdown", "rawHtml"],
133
+ include_tags=["h1", "h2"],
134
+ exclude_tags=["nav"],
135
+ only_main_content=False,
136
+ wait_for=2000,
137
+ skip_tls_verification=True,
138
+ remove_base64_images=False
139
+ # Note: raw_html should be in formats array, not as a separate field
140
+ )
141
+
142
+ request = SearchRequest(
143
+ query="test",
144
+ scrape_options=scrape_opts
145
+ )
146
+
147
+ data = _prepare_search_request(request)
148
+
149
+ # Check that scrapeOptions is present and converted
150
+ assert "scrapeOptions" in data
151
+ scrape_data = data["scrapeOptions"]
152
+
153
+ # Check all conversions are working
154
+ assert "formats" in scrape_data
155
+ assert scrape_data["formats"] == ["markdown", "rawHtml"]
156
+ assert "includeTags" in scrape_data
157
+ assert "excludeTags" in scrape_data
158
+ assert "onlyMainContent" in scrape_data
159
+ assert "waitFor" in scrape_data
160
+ assert "skipTlsVerification" in scrape_data
161
+ assert "removeBase64Images" in scrape_data
162
+
163
+ # Check that snake_case fields are not present
164
+ assert "include_tags" not in scrape_data
165
+ assert "exclude_tags" not in scrape_data
166
+ assert "only_main_content" not in scrape_data
167
+ assert "wait_for" not in scrape_data
168
+ assert "skip_tls_verification" not in scrape_data
169
+ assert "remove_base64_images" not in scrape_data
@@ -0,0 +1,236 @@
1
+ import pytest
2
+ from firecrawl.v2.types import SearchRequest, Source, ScrapeOptions, ScrapeFormats
3
+ from firecrawl.v2.methods.search import _validate_search_request
4
+
5
+
6
+ class TestSearchValidation:
7
+ """Unit tests for search request validation."""
8
+
9
+ def test_validate_empty_query(self):
10
+ """Test validation of empty query."""
11
+ request = SearchRequest(query="")
12
+ with pytest.raises(ValueError, match="Query cannot be empty"):
13
+ _validate_search_request(request)
14
+
15
+ request = SearchRequest(query=" ")
16
+ with pytest.raises(ValueError, match="Query cannot be empty"):
17
+ _validate_search_request(request)
18
+
19
+ def test_validate_invalid_limit(self):
20
+ """Test validation of invalid limits."""
21
+ # Zero limit
22
+ request = SearchRequest(query="test", limit=0)
23
+ with pytest.raises(ValueError, match="Limit must be positive"):
24
+ _validate_search_request(request)
25
+
26
+ # Negative limit
27
+ request = SearchRequest(query="test", limit=-1)
28
+ with pytest.raises(ValueError, match="Limit must be positive"):
29
+ _validate_search_request(request)
30
+
31
+ # Too high limit
32
+ request = SearchRequest(query="test", limit=101)
33
+ with pytest.raises(ValueError, match="Limit cannot exceed 100"):
34
+ _validate_search_request(request)
35
+
36
+ def test_validate_invalid_timeout(self):
37
+ """Test validation of invalid timeouts."""
38
+ # Zero timeout
39
+ request = SearchRequest(query="test", timeout=0)
40
+ with pytest.raises(ValueError, match="Timeout must be positive"):
41
+ _validate_search_request(request)
42
+
43
+ # Negative timeout
44
+ request = SearchRequest(query="test", timeout=-1000)
45
+ with pytest.raises(ValueError, match="Timeout must be positive"):
46
+ _validate_search_request(request)
47
+
48
+ # Too high timeout
49
+ request = SearchRequest(query="test", timeout=300001)
50
+ with pytest.raises(ValueError, match="Timeout cannot exceed 300000ms"):
51
+ _validate_search_request(request)
52
+
53
+ def test_validate_invalid_sources(self):
54
+ """Test validation of invalid sources."""
55
+ # Invalid string source
56
+ request = SearchRequest(query="test", sources=["invalid_source"])
57
+ with pytest.raises(ValueError, match="Invalid source type"):
58
+ _validate_search_request(request)
59
+
60
+ # Invalid object source
61
+ request = SearchRequest(query="test", sources=[Source(type="invalid_source")])
62
+ with pytest.raises(ValueError, match="Invalid source type"):
63
+ _validate_search_request(request)
64
+
65
+ # Mixed valid/invalid sources
66
+ request = SearchRequest(query="test", sources=["web", "invalid_source"])
67
+ with pytest.raises(ValueError, match="Invalid source type"):
68
+ _validate_search_request(request)
69
+
70
+ def test_validate_invalid_location(self):
71
+ """Test validation of invalid location."""
72
+ # Empty location
73
+ request = SearchRequest(query="test", location="")
74
+ with pytest.raises(ValueError, match="Location must be a non-empty string"):
75
+ _validate_search_request(request)
76
+
77
+ # Whitespace location
78
+ request = SearchRequest(query="test", location=" ")
79
+ with pytest.raises(ValueError, match="Location must be a non-empty string"):
80
+ _validate_search_request(request)
81
+
82
+ def test_validate_invalid_tbs(self):
83
+ """Test validation of invalid tbs values."""
84
+ invalid_tbs_values = ["invalid", "qdr:x", "yesterday", "last_week"]
85
+
86
+ for invalid_tbs in invalid_tbs_values:
87
+ request = SearchRequest(query="test", tbs=invalid_tbs)
88
+ with pytest.raises(ValueError, match="Invalid tbs value"):
89
+ _validate_search_request(request)
90
+
91
+ def test_validate_custom_date_ranges(self):
92
+ """Test validation of custom date range formats."""
93
+ valid_custom_ranges = [
94
+ "cdr:1,cd_min:1/1/2024,cd_max:12/31/2024",
95
+ "cdr:1,cd_min:12/1/2024,cd_max:12/31/2024",
96
+ "cdr:1,cd_min:2/28/2023,cd_max:3/1/2023",
97
+ "cdr:1,cd_min:10/15/2023,cd_max:11/15/2023"
98
+ ]
99
+
100
+ for valid_range in valid_custom_ranges:
101
+ request = SearchRequest(query="test", tbs=valid_range)
102
+ validated = _validate_search_request(request)
103
+ assert validated == request
104
+
105
+ def test_validate_invalid_custom_date_ranges(self):
106
+ """Test validation of invalid custom date range formats."""
107
+ # Invalid custom date ranges
108
+ invalid_custom_ranges = [
109
+ "cdr:1,cd_min:2/28/2023", # Missing cd_max
110
+ "cdr:1,cd_max:2/28/2023", # Missing cd_min
111
+ "cdr:2,cd_min:1/1/2024,cd_max:12/31/2024", # Wrong cdr value
112
+ "cdr:cd_min:1/1/2024,cd_max:12/31/2024", # Missing :1
113
+ "custom:1,cd_min:1/1/2024,cd_max:12/31/2024" # Wrong prefix
114
+ ]
115
+
116
+ for invalid_range in invalid_custom_ranges:
117
+ request = SearchRequest(query="test", tbs=invalid_range)
118
+ with pytest.raises(ValueError, match="Invalid"):
119
+ _validate_search_request(request)
120
+
121
+ def test_validate_valid_requests(self):
122
+ """Test that valid requests pass validation."""
123
+ # Minimal valid request
124
+ request = SearchRequest(query="test")
125
+ validated = _validate_search_request(request)
126
+ assert validated == request
127
+
128
+ # Request with all optional parameters
129
+ request = SearchRequest(
130
+ query="test query",
131
+ sources=["web", "news"],
132
+ limit=10,
133
+ tbs="qdr:w",
134
+ location="US",
135
+ ignore_invalid_urls=False,
136
+ timeout=30000
137
+ )
138
+ validated = _validate_search_request(request)
139
+ assert validated == request
140
+
141
+ # Request with object sources
142
+ request = SearchRequest(
143
+ query="test",
144
+ sources=[Source(type="web"), Source(type="images")]
145
+ )
146
+ validated = _validate_search_request(request)
147
+ assert validated == request
148
+
149
+ def test_validate_edge_cases(self):
150
+ """Test edge cases and boundary values."""
151
+ # Maximum valid limit
152
+ request = SearchRequest(query="test", limit=100)
153
+ validated = _validate_search_request(request)
154
+ assert validated == request
155
+
156
+ # Maximum valid timeout
157
+ request = SearchRequest(query="test", timeout=300000)
158
+ validated = _validate_search_request(request)
159
+ assert validated == request
160
+
161
+ # Minimum valid limit
162
+ request = SearchRequest(query="test", limit=1)
163
+ validated = _validate_search_request(request)
164
+ assert validated == request
165
+
166
+ # Minimum valid timeout
167
+ request = SearchRequest(query="test", timeout=1)
168
+ validated = _validate_search_request(request)
169
+ assert validated == request
170
+
171
+ def test_validate_none_values(self):
172
+ """Test that None values for optional fields are handled correctly."""
173
+ request = SearchRequest(
174
+ query="test",
175
+ sources=None,
176
+ limit=None,
177
+ tbs=None,
178
+ location=None,
179
+ ignore_invalid_urls=None,
180
+ timeout=None
181
+ )
182
+ validated = _validate_search_request(request)
183
+ assert validated == request
184
+
185
+ def test_validate_scrape_options_integration(self):
186
+ """Test that scrape_options validation is integrated."""
187
+ # Test with valid scrape options
188
+ scrape_opts = ScrapeOptions(formats=["markdown"], timeout=30000)
189
+ request = SearchRequest(query="test", scrape_options=scrape_opts)
190
+ validated = _validate_search_request(request)
191
+ assert validated == request
192
+
193
+ # Test with invalid scrape options (should raise error)
194
+ invalid_scrape_opts = ScrapeOptions(timeout=-1000)
195
+ request = SearchRequest(query="test", scrape_options=invalid_scrape_opts)
196
+ with pytest.raises(ValueError, match="Timeout must be positive"):
197
+ _validate_search_request(request)
198
+
199
+
200
+
201
+
202
+
203
+ class TestSearchRequestModel:
204
+ """Unit tests for SearchRequest model behavior."""
205
+
206
+ def test_default_values(self):
207
+ """Test that default values are set correctly."""
208
+ request = SearchRequest(query="test")
209
+ assert request.limit == 5
210
+ assert request.ignore_invalid_urls is None # No default in model
211
+ assert request.timeout == 300000
212
+ assert request.sources is None
213
+ assert request.tbs is None
214
+ assert request.location is None
215
+ assert request.scrape_options is None
216
+
217
+ def test_field_aliases(self):
218
+ """Test that field aliases work correctly for API serialization."""
219
+ # Test with None value (no default)
220
+ request1 = SearchRequest(query="test")
221
+ data1 = request1.model_dump(by_alias=True)
222
+ assert "ignore_invalid_urls" in data1 # No alias, uses snake_case
223
+ assert data1["ignore_invalid_urls"] is None
224
+
225
+ # Test with explicit False value
226
+ request2 = SearchRequest(
227
+ query="test",
228
+ ignore_invalid_urls=False,
229
+ scrape_options=ScrapeOptions(formats=["markdown"])
230
+ )
231
+
232
+ # Check that aliases are used in model_dump with by_alias=True
233
+ data2 = request2.model_dump(by_alias=True)
234
+ assert "ignore_invalid_urls" in data2 # No alias, uses snake_case
235
+ assert "scrape_options" in data2 # No alias, uses snake_case
236
+ assert data2["ignore_invalid_urls"] is False
@@ -0,0 +1,18 @@
1
+ from firecrawl.v2.types import ConcurrencyCheck, CreditUsage, TokenUsage
2
+
3
+
4
+ class TestUsageTypes:
5
+ def test_concurrency_check_model(self):
6
+ cc = ConcurrencyCheck(concurrency=3, max_concurrency=10)
7
+ assert cc.concurrency == 3
8
+ assert cc.max_concurrency == 10
9
+
10
+ def test_credit_usage_model(self):
11
+ cu = CreditUsage(remaining_credits=123)
12
+ assert isinstance(cu.remaining_credits, int)
13
+ assert cu.remaining_credits == 123
14
+
15
+ def test_token_usage_model(self):
16
+ tu = TokenUsage(remaining_tokens=10)
17
+ assert tu.remaining_tokens == 10
18
+
@@ -0,0 +1,123 @@
1
+ """
2
+ Unit tests for webhook functionality in Firecrawl v2 SDK.
3
+ """
4
+
5
+ import pytest
6
+ from firecrawl.v2.types import WebhookConfig, CrawlRequest
7
+ from firecrawl.v2.methods.crawl import _prepare_crawl_request
8
+
9
+ class TestWebhookConfig:
10
+ """Test WebhookConfig class functionality."""
11
+
12
+ def test_webhook_config_creation_minimal(self):
13
+ """Test creating WebhookConfig with minimal parameters."""
14
+ webhook = WebhookConfig(url="https://example.com/webhook")
15
+ assert webhook.url == "https://example.com/webhook"
16
+ assert webhook.headers is None
17
+ assert webhook.metadata is None
18
+ assert webhook.events is None
19
+
20
+ def test_webhook_config_creation_full(self):
21
+ """Test creating WebhookConfig with all parameters."""
22
+ webhook = WebhookConfig(
23
+ url="https://example.com/webhook",
24
+ headers={"Authorization": "Bearer token"},
25
+ metadata={"project": "test"},
26
+ events=["completed", "failed"]
27
+ )
28
+ assert webhook.url == "https://example.com/webhook"
29
+ assert webhook.headers == {"Authorization": "Bearer token"}
30
+ assert webhook.metadata == {"project": "test"}
31
+ assert webhook.events == ["completed", "failed"]
32
+
33
+ def test_webhook_config_validation(self):
34
+ """Test WebhookConfig validation."""
35
+ # URL is required
36
+ with pytest.raises(Exception): # Pydantic validation error
37
+ WebhookConfig()
38
+
39
+
40
+ class TestCrawlRequestWebhook:
41
+ """Test CrawlRequest webhook functionality."""
42
+
43
+ def test_crawl_request_with_string_webhook(self):
44
+ """Test CrawlRequest with string webhook."""
45
+ request = CrawlRequest(
46
+ url="https://example.com",
47
+ webhook="https://example.com/webhook"
48
+ )
49
+
50
+ data = _prepare_crawl_request(request)
51
+ assert data["webhook"] == "https://example.com/webhook"
52
+
53
+ def test_crawl_request_with_webhook_config(self):
54
+ """Test CrawlRequest with WebhookConfig object."""
55
+ webhook_config = WebhookConfig(
56
+ url="https://example.com/webhook",
57
+ headers={"Authorization": "Bearer token"},
58
+ events=["completed"]
59
+ )
60
+
61
+ request = CrawlRequest(
62
+ url="https://example.com",
63
+ webhook=webhook_config
64
+ )
65
+
66
+ data = _prepare_crawl_request(request)
67
+ assert data["webhook"]["url"] == "https://example.com/webhook"
68
+ assert data["webhook"]["headers"] == {"Authorization": "Bearer token"}
69
+ assert data["webhook"]["events"] == ["completed"]
70
+
71
+ def test_crawl_request_without_webhook(self):
72
+ """Test CrawlRequest without webhook."""
73
+ request = CrawlRequest(url="https://example.com")
74
+
75
+ data = _prepare_crawl_request(request)
76
+ assert "webhook" not in data
77
+
78
+ def test_crawl_request_webhook_serialization(self):
79
+ """Test that webhook config is properly serialized."""
80
+ webhook_config = WebhookConfig(
81
+ url="https://example.com/webhook",
82
+ headers={"Content-Type": "application/json"},
83
+ metadata={"test": "value"},
84
+ events=["page", "completed"]
85
+ )
86
+
87
+ request = CrawlRequest(
88
+ url="https://example.com",
89
+ webhook=webhook_config
90
+ )
91
+
92
+ data = _prepare_crawl_request(request)
93
+ webhook_data = data["webhook"]
94
+
95
+ # Check that all fields are properly serialized
96
+ assert webhook_data["url"] == "https://example.com/webhook"
97
+ assert webhook_data["headers"] == {"Content-Type": "application/json"}
98
+ assert webhook_data["metadata"] == {"test": "value"}
99
+ assert webhook_data["events"] == ["page", "completed"]
100
+
101
+ def test_crawl_request_webhook_with_none_values(self):
102
+ """Test webhook config with None values are excluded from serialization."""
103
+ webhook_config = WebhookConfig(
104
+ url="https://example.com/webhook",
105
+ headers=None,
106
+ metadata=None,
107
+ events=None
108
+ )
109
+
110
+ request = CrawlRequest(
111
+ url="https://example.com",
112
+ webhook=webhook_config
113
+ )
114
+
115
+ data = _prepare_crawl_request(request)
116
+ webhook_data = data["webhook"]
117
+
118
+ # Only url should be present, None values should be excluded
119
+ assert webhook_data["url"] == "https://example.com/webhook"
120
+ assert "headers" not in webhook_data
121
+ assert "metadata" not in webhook_data
122
+ assert "events" not in webhook_data
123
+