firecrawl 3.1.0__py3-none-any.whl → 3.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

firecrawl/__init__.py CHANGED
@@ -17,7 +17,7 @@ from .v1 import (
17
17
  V1ChangeTrackingOptions,
18
18
  )
19
19
 
20
- __version__ = "3.1.0"
20
+ __version__ = "3.2.0"
21
21
 
22
22
  # Define the logger for the Firecrawl project
23
23
  logger: logging.Logger = logging.getLogger("firecrawl")
@@ -84,4 +84,4 @@ __all__ = [
84
84
  'V1JsonConfig',
85
85
  'V1ScrapeOptions',
86
86
  'V1ChangeTrackingOptions',
87
- ]
87
+ ]
@@ -2,8 +2,19 @@ import os
2
2
  import pytest
3
3
  from dotenv import load_dotenv
4
4
  from firecrawl import AsyncFirecrawl
5
- from firecrawl.v2.types import ScrapeOptions, ScrapeFormats, SearchData, SearchResult, Document
5
+ from firecrawl.types import (
6
+ SearchData,
7
+ Document,
8
+ ScrapeOptions,
9
+ ScrapeFormats,
10
+ SearchResultWeb,
11
+ SearchResultNews,
12
+ SearchResultImages,
13
+ )
6
14
 
15
+ load_dotenv()
16
+
17
+ firecrawl = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
7
18
 
8
19
  def _collect_texts(entries):
9
20
  texts = []
@@ -36,148 +47,202 @@ def _is_document(entry) -> bool:
36
47
  hasattr(entry, 'change_tracking') or \
37
48
  hasattr(entry, 'summary')
38
49
 
50
+ @pytest.mark.asyncio
51
+ async def test_async_search_minimal_request():
52
+ results = await firecrawl.search(
53
+ query="What is the capital of France?"
54
+ )
55
+ assert isinstance(results, SearchData)
56
+ assert hasattr(results, 'web')
57
+ assert results.web is not None
58
+ assert len(results.web) > 0
59
+ assert hasattr(results, 'news')
60
+ assert results.news is None
61
+ assert hasattr(results, 'images')
62
+ assert results.images is None
63
+
64
+ for result in results.web:
65
+ assert isinstance(result, SearchResultWeb)
66
+ assert hasattr(result, 'url')
67
+ assert hasattr(result, 'title')
68
+ assert hasattr(result, 'description')
69
+ assert result.url.startswith('http')
70
+ assert result.title is not None
71
+ assert result.description is not None
72
+
73
+ all_text = ' '.join(_collect_texts(results.web))
74
+ assert 'paris' in all_text
75
+
76
+ assert results.news is None
77
+ assert results.images is None
39
78
 
40
- load_dotenv()
41
-
42
- if not os.getenv("API_KEY"):
43
- raise ValueError("API_KEY is not set")
44
-
45
- if not os.getenv("API_URL"):
46
- raise ValueError("API_URL is not set")
79
+ @pytest.mark.asyncio
80
+ async def test_async_search_with_sources():
81
+ results = await firecrawl.search(
82
+ query="firecrawl",
83
+ sources=["web", "news", "images"],
84
+ limit=3
85
+ )
86
+ assert isinstance(results, SearchData)
87
+ assert results.web is not None
88
+ assert len(results.web) <= 3
89
+ assert isinstance(results.web[0], SearchResultWeb)
47
90
 
91
+ if results.news is not None:
92
+ assert len(results.news) <= 3
93
+ assert isinstance(results.news[0], SearchResultNews)
48
94
 
49
- @pytest.mark.asyncio
50
- async def test_async_search_minimal():
51
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
52
- data = await client.search("What is the capital of France?")
53
- # Assert sections like sync tests
54
- assert hasattr(data, "web")
55
- assert hasattr(data, "news")
56
- assert hasattr(data, "images")
57
- assert data.web is not None
58
- assert len(data.web) > 0
59
- titles = [getattr(r, "title", None) for r in data.web]
60
- descs = [getattr(r, "description", None) for r in data.web]
61
- all_text = " ".join([t.lower() for t in titles if t] + [d.lower() for d in descs if d])
62
- assert "paris" in all_text
63
- assert data.news is None
64
- assert data.images is None
95
+ if results.images is not None:
96
+ assert len(results.images) <= 3
97
+ assert isinstance(results.images[0], SearchResultImages)
65
98
 
99
+ web_titles = [result.title.lower() for result in results.web]
100
+ web_descriptions = [result.description.lower() for result in results.web]
101
+ all_web_text = ' '.join(web_titles + web_descriptions)
102
+ assert 'firecrawl' in all_web_text
66
103
 
67
104
  @pytest.mark.asyncio
68
- async def test_async_search_with_sources_and_limit():
69
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
70
- data = await client.search("firecrawl", sources=["web", "news"], limit=3)
71
- # Sections present
72
- assert hasattr(data, "web") and hasattr(data, "news") and hasattr(data, "images")
73
- # Web present, images absent, news optional but if present respects limit
74
- if data.web is not None:
75
- assert len(data.web) <= 3
76
- if data.news is not None:
77
- assert len(data.news) <= 3
78
- assert data.images is None
79
-
105
+ async def test_async_search_result_structure():
106
+ results = await firecrawl.search(
107
+ query="test query",
108
+ limit=1
109
+ )
110
+ if results.web and len(results.web) > 0:
111
+ result = results.web[0]
112
+ assert hasattr(result, 'url')
113
+ assert hasattr(result, 'title')
114
+ assert hasattr(result, 'description')
115
+ assert isinstance(result.url, str)
116
+ assert isinstance(result.title, str) or result.title is None
117
+ assert isinstance(result.description, str) or result.description is None
118
+ assert result.url.startswith('http')
80
119
 
81
120
  @pytest.mark.asyncio
82
- async def test_async_search_with_all_params():
83
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
84
- data = await client.search(
85
- "artificial intelligence",
86
- sources=["web", "news"],
121
+ async def test_async_search_all_parameters():
122
+ from firecrawl.types import ScrapeOptions, Location, WaitAction
123
+ schema = {
124
+ "type": "object",
125
+ "properties": {
126
+ "title": {"type": "string"},
127
+ "description": {"type": "string"},
128
+ "url": {"type": "string"}
129
+ },
130
+ "required": ["title", "description"]
131
+ }
132
+ results = await firecrawl.search(
133
+ query="artificial intelligence",
134
+ sources=[
135
+ {"type": "web"},
136
+ {"type": "news"}
137
+ ],
87
138
  limit=3,
88
- tbs="qdr:w",
139
+ tbs="qdr:m",
89
140
  location="US",
90
- ignore_invalid_urls=False,
91
- timeout=30000,
92
- scrape_options={
93
- "formats": ["markdown"],
94
- "headers": {"User-Agent": "E2E-AIO"},
95
- "include_tags": ["h1"],
96
- "exclude_tags": ["nav"],
97
- "only_main_content": False,
98
- "timeout": 15000,
99
- "wait_for": 2000,
100
- "mobile": True,
101
- "skip_tls_verification": True,
102
- "remove_base64_images": False,
103
- },
141
+ ignore_invalid_urls=True,
142
+ timeout=60000,
143
+ scrape_options=ScrapeOptions(
144
+ formats=[
145
+ "markdown",
146
+ "html",
147
+ {
148
+ "type": "json",
149
+ "prompt": "Extract the title and description from the page",
150
+ "schema": schema
151
+ },
152
+ {"type": "summary"}
153
+ ],
154
+ headers={"User-Agent": "Firecrawl-Test/1.0"},
155
+ include_tags=["h1", "h2", "p"],
156
+ exclude_tags=["nav", "footer"],
157
+ only_main_content=True,
158
+ wait_for=2000,
159
+ mobile=False,
160
+ skip_tls_verification=False,
161
+ remove_base64_images=True,
162
+ block_ads=True,
163
+ proxy="basic",
164
+ max_age=3600000,
165
+ store_in_cache=True,
166
+ location=Location(
167
+ country="US",
168
+ languages=["en"]
169
+ ),
170
+ actions=[
171
+ WaitAction(milliseconds=1000)
172
+ ]
173
+ )
104
174
  )
105
- # Structure and type assertions mirroring sync
106
- assert isinstance(data, SearchData)
107
- assert hasattr(data, "web") and hasattr(data, "news") and hasattr(data, "images")
108
- assert data.web is not None
109
- assert len(data.web) <= 3
110
- non_doc = [r for r in (data.web or []) if not _is_document(r)]
111
- if non_doc:
112
- combined = " ".join(_collect_texts(non_doc))
113
- ai_terms = ["artificial", "intelligence", "ai", "machine", "learning"]
114
- assert any(term in combined for term in ai_terms)
115
- for r in data.web:
116
- assert isinstance(r, (SearchResult, Document))
117
- if isinstance(r, Document):
118
- assert (r.markdown is not None) or (r.html is not None)
175
+ assert isinstance(results, SearchData)
176
+ assert hasattr(results, 'web')
177
+ assert hasattr(results, 'news')
178
+ assert hasattr(results, 'images')
179
+ assert results.web is not None
180
+ assert len(results.web) <= 3
181
+
182
+ non_doc_entries = [r for r in (results.web or []) if not _is_document(r)]
183
+ if non_doc_entries:
184
+ all_web_text = ' '.join(_collect_texts(non_doc_entries))
185
+ ai_terms = ['artificial', 'intelligence', 'ai', 'machine', 'learning']
186
+ assert any(term in all_web_text for term in ai_terms)
187
+
188
+ for result in results.web:
189
+ assert isinstance(result, (SearchResultWeb, Document))
190
+ if isinstance(result, Document):
191
+ assert (result.markdown is not None) or (result.html is not None)
119
192
  else:
120
- assert hasattr(r, "url")
121
- assert isinstance(r.url, str) and r.url.startswith("http")
122
- if data.news is not None:
123
- assert len(data.news) <= 10
124
- for r in data.news:
125
- assert isinstance(r, (SearchResult, Document))
126
- if isinstance(r, Document):
127
- assert (r.markdown is not None) or (r.html is not None)
193
+ assert hasattr(result, 'url')
194
+ assert isinstance(result.url, str) and result.url.startswith('http')
195
+
196
+ if results.news is not None:
197
+ assert len(results.news) <= 3
198
+ for result in results.news:
199
+ assert isinstance(result, (SearchResultNews, Document))
200
+ if isinstance(result, Document):
201
+ assert (result.markdown is not None) or (result.html is not None)
128
202
  else:
129
- assert isinstance(r.url, str) and r.url.startswith("http")
130
- assert data.images is None
131
-
132
-
133
- @pytest.mark.asyncio
134
- async def test_async_search_minimal_content_check():
135
- """Stronger assertion similar to sync: content check on a known query."""
136
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
137
- data = await client.search("What is the capital of France?")
138
- assert hasattr(data, "web") and data.web is not None
139
- non_doc = [r for r in (data.web or []) if not _is_document(r)]
140
- if non_doc:
141
- combined = " ".join(_collect_texts(non_doc))
142
- assert "paris" in combined
143
-
144
-
145
- @pytest.mark.asyncio
146
- async def test_async_search_result_structure():
147
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
148
- data = await client.search("test query", limit=1)
149
- if data.web and len(data.web) > 0:
150
- result = data.web[0]
151
- assert hasattr(result, "url")
152
- assert hasattr(result, "title")
153
- assert hasattr(result, "description")
154
- assert isinstance(result.url, str) and result.url.startswith("http")
155
- assert isinstance(getattr(result, "title", None), (str, type(None)))
156
- assert isinstance(getattr(result, "description", None), (str, type(None)))
203
+ assert hasattr(result, 'url')
204
+ assert isinstance(result.url, str) and result.url.startswith('http')
157
205
 
206
+ assert results.images is None
158
207
 
159
208
  @pytest.mark.asyncio
160
209
  async def test_async_search_formats_flexibility():
161
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
162
- # list string
163
- res1 = await client.search("python programming", limit=1, scrape_options=ScrapeOptions(formats=["markdown"]))
164
- # list objects
165
- res2 = await client.search("python programming", limit=1, scrape_options=ScrapeOptions(formats=[{"type": "markdown"}]))
166
- # ScrapeFormats object
167
- res3 = await client.search("python programming", limit=1, scrape_options=ScrapeOptions(formats=ScrapeFormats(markdown=True)))
168
- assert isinstance(res1, SearchData) and hasattr(res1, "web")
169
- assert isinstance(res2, SearchData) and hasattr(res2, "web")
170
- assert isinstance(res3, SearchData) and hasattr(res3, "web")
171
-
210
+ # Test with list format
211
+ results1 = await firecrawl.search(
212
+ query="python programming",
213
+ limit=1,
214
+ scrape_options=ScrapeOptions(
215
+ formats=["markdown"]
216
+ )
217
+ )
218
+ # Test with ScrapeFormats object
219
+ results2 = await firecrawl.search(
220
+ query="python programming",
221
+ limit=1,
222
+ scrape_options=ScrapeOptions(
223
+ formats=ScrapeFormats(markdown=True)
224
+ )
225
+ )
226
+ assert isinstance(results1, SearchData)
227
+ assert isinstance(results2, SearchData)
228
+ assert results1.web is not None
229
+ assert results2.web is not None
172
230
 
173
231
  @pytest.mark.asyncio
174
- async def test_async_search_json_format_object():
175
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
176
- json_schema = {"type": "object", "properties": {"title": {"type": "string"}}, "required": ["title"]}
177
- data = await client.search(
178
- "site:docs.firecrawl.dev",
232
+ async def test_async_search_with_json_format_object():
233
+ json_schema = {
234
+ "type": "object",
235
+ "properties": {
236
+ "title": {"type": "string"}
237
+ },
238
+ "required": ["title"],
239
+ }
240
+ results = await firecrawl.search(
241
+ query="site:docs.firecrawl.dev",
179
242
  limit=1,
180
- scrape_options={"formats": [{"type": "json", "prompt": "Extract page title", "schema": json_schema}]},
243
+ scrape_options=ScrapeOptions(
244
+ formats=[{"type": "json", "prompt": "Extract page title", "schema": json_schema}]
245
+ ),
181
246
  )
182
- assert hasattr(data, "web")
183
-
247
+ assert isinstance(results, SearchData)
248
+ assert results.web is not None and len(results.web) >= 0
@@ -1,7 +1,7 @@
1
1
  from firecrawl import Firecrawl
2
2
  import os
3
3
  from dotenv import load_dotenv
4
- from firecrawl.types import SearchData, SearchResult, Document, ScrapeFormats, ScrapeOptions
4
+ from firecrawl.types import SearchData, Document, ScrapeOptions, SearchResultWeb, SearchResultNews, SearchResultImages
5
5
 
6
6
  load_dotenv()
7
7
 
@@ -53,7 +53,7 @@ def test_search_minimal_request():
53
53
  assert results.images is None
54
54
 
55
55
  for result in results.web:
56
- assert isinstance(result, SearchResult)
56
+ assert isinstance(result, SearchResultWeb)
57
57
  assert hasattr(result, 'url')
58
58
  assert hasattr(result, 'title')
59
59
  assert hasattr(result, 'description')
@@ -73,7 +73,7 @@ def test_search_with_sources():
73
73
  """Test search with specific sources."""
74
74
  results = firecrawl.search(
75
75
  query="firecrawl",
76
- sources=["web", "news"],
76
+ sources=["web", "news", "images"],
77
77
  limit=3
78
78
  )
79
79
 
@@ -81,11 +81,15 @@ def test_search_with_sources():
81
81
 
82
82
  assert results.web is not None
83
83
  assert len(results.web) <= 3
84
+ assert isinstance(results.web[0], SearchResultWeb)
84
85
 
85
86
  if results.news is not None:
86
87
  assert len(results.news) <= 3
88
+ assert isinstance(results.news[0], SearchResultNews)
87
89
 
88
- assert results.images is None
90
+ if results.images is not None:
91
+ assert len(results.images) <= 3
92
+ assert isinstance(results.images[0], SearchResultImages)
89
93
 
90
94
  web_titles = [result.title.lower() for result in results.web]
91
95
  web_descriptions = [result.description.lower() for result in results.web]
@@ -193,7 +197,7 @@ def test_search_all_parameters():
193
197
 
194
198
  # Test that each result has proper structure
195
199
  for result in results.web:
196
- assert isinstance(result, (SearchResult, Document))
200
+ assert isinstance(result, (SearchResultWeb, Document))
197
201
  if isinstance(result, Document):
198
202
  # Document path: ensure content present
199
203
  assert (result.markdown is not None) or (result.html is not None)
@@ -206,7 +210,7 @@ def test_search_all_parameters():
206
210
  if results.news is not None:
207
211
  assert len(results.news) <= 3
208
212
  for result in results.news:
209
- assert isinstance(result, (SearchResult, Document))
213
+ assert isinstance(result, (SearchResultNews, Document))
210
214
  if isinstance(result, Document):
211
215
  assert (result.markdown is not None) or (result.html is not None)
212
216
  else:
@@ -11,7 +11,7 @@ class TestSearchValidation:
11
11
  request = SearchRequest(query="")
12
12
  with pytest.raises(ValueError, match="Query cannot be empty"):
13
13
  _validate_search_request(request)
14
-
14
+
15
15
  request = SearchRequest(query=" ")
16
16
  with pytest.raises(ValueError, match="Query cannot be empty"):
17
17
  _validate_search_request(request)
@@ -22,12 +22,12 @@ class TestSearchValidation:
22
22
  request = SearchRequest(query="test", limit=0)
23
23
  with pytest.raises(ValueError, match="Limit must be positive"):
24
24
  _validate_search_request(request)
25
-
25
+
26
26
  # Negative limit
27
27
  request = SearchRequest(query="test", limit=-1)
28
28
  with pytest.raises(ValueError, match="Limit must be positive"):
29
29
  _validate_search_request(request)
30
-
30
+
31
31
  # Too high limit
32
32
  request = SearchRequest(query="test", limit=101)
33
33
  with pytest.raises(ValueError, match="Limit cannot exceed 100"):
@@ -39,12 +39,12 @@ class TestSearchValidation:
39
39
  request = SearchRequest(query="test", timeout=0)
40
40
  with pytest.raises(ValueError, match="Timeout must be positive"):
41
41
  _validate_search_request(request)
42
-
42
+
43
43
  # Negative timeout
44
44
  request = SearchRequest(query="test", timeout=-1000)
45
45
  with pytest.raises(ValueError, match="Timeout must be positive"):
46
46
  _validate_search_request(request)
47
-
47
+
48
48
  # Too high timeout
49
49
  request = SearchRequest(query="test", timeout=300001)
50
50
  with pytest.raises(ValueError, match="Timeout cannot exceed 300000ms"):
@@ -56,12 +56,12 @@ class TestSearchValidation:
56
56
  request = SearchRequest(query="test", sources=["invalid_source"])
57
57
  with pytest.raises(ValueError, match="Invalid source type"):
58
58
  _validate_search_request(request)
59
-
59
+
60
60
  # Invalid object source
61
61
  request = SearchRequest(query="test", sources=[Source(type="invalid_source")])
62
62
  with pytest.raises(ValueError, match="Invalid source type"):
63
63
  _validate_search_request(request)
64
-
64
+
65
65
  # Mixed valid/invalid sources
66
66
  request = SearchRequest(query="test", sources=["web", "invalid_source"])
67
67
  with pytest.raises(ValueError, match="Invalid source type"):
@@ -73,7 +73,7 @@ class TestSearchValidation:
73
73
  request = SearchRequest(query="test", location="")
74
74
  with pytest.raises(ValueError, match="Location must be a non-empty string"):
75
75
  _validate_search_request(request)
76
-
76
+
77
77
  # Whitespace location
78
78
  request = SearchRequest(query="test", location=" ")
79
79
  with pytest.raises(ValueError, match="Location must be a non-empty string"):
@@ -82,19 +82,49 @@ class TestSearchValidation:
82
82
  def test_validate_invalid_tbs(self):
83
83
  """Test validation of invalid tbs values."""
84
84
  invalid_tbs_values = ["invalid", "qdr:x", "yesterday", "last_week"]
85
-
85
+
86
86
  for invalid_tbs in invalid_tbs_values:
87
87
  request = SearchRequest(query="test", tbs=invalid_tbs)
88
88
  with pytest.raises(ValueError, match="Invalid tbs value"):
89
89
  _validate_search_request(request)
90
90
 
91
+ def test_validate_custom_date_ranges(self):
92
+ """Test validation of custom date range formats."""
93
+ valid_custom_ranges = [
94
+ "cdr:1,cd_min:1/1/2024,cd_max:12/31/2024",
95
+ "cdr:1,cd_min:12/1/2024,cd_max:12/31/2024",
96
+ "cdr:1,cd_min:2/28/2023,cd_max:3/1/2023",
97
+ "cdr:1,cd_min:10/15/2023,cd_max:11/15/2023"
98
+ ]
99
+
100
+ for valid_range in valid_custom_ranges:
101
+ request = SearchRequest(query="test", tbs=valid_range)
102
+ validated = _validate_search_request(request)
103
+ assert validated == request
104
+
105
+ def test_validate_invalid_custom_date_ranges(self):
106
+ """Test validation of invalid custom date range formats."""
107
+ # Invalid custom date ranges
108
+ invalid_custom_ranges = [
109
+ "cdr:1,cd_min:2/28/2023", # Missing cd_max
110
+ "cdr:1,cd_max:2/28/2023", # Missing cd_min
111
+ "cdr:2,cd_min:1/1/2024,cd_max:12/31/2024", # Wrong cdr value
112
+ "cdr:cd_min:1/1/2024,cd_max:12/31/2024", # Missing :1
113
+ "custom:1,cd_min:1/1/2024,cd_max:12/31/2024" # Wrong prefix
114
+ ]
115
+
116
+ for invalid_range in invalid_custom_ranges:
117
+ request = SearchRequest(query="test", tbs=invalid_range)
118
+ with pytest.raises(ValueError, match="Invalid"):
119
+ _validate_search_request(request)
120
+
91
121
  def test_validate_valid_requests(self):
92
122
  """Test that valid requests pass validation."""
93
123
  # Minimal valid request
94
124
  request = SearchRequest(query="test")
95
125
  validated = _validate_search_request(request)
96
126
  assert validated == request
97
-
127
+
98
128
  # Request with all optional parameters
99
129
  request = SearchRequest(
100
130
  query="test query",
@@ -107,7 +137,7 @@ class TestSearchValidation:
107
137
  )
108
138
  validated = _validate_search_request(request)
109
139
  assert validated == request
110
-
140
+
111
141
  # Request with object sources
112
142
  request = SearchRequest(
113
143
  query="test",
@@ -122,17 +152,17 @@ class TestSearchValidation:
122
152
  request = SearchRequest(query="test", limit=100)
123
153
  validated = _validate_search_request(request)
124
154
  assert validated == request
125
-
155
+
126
156
  # Maximum valid timeout
127
157
  request = SearchRequest(query="test", timeout=300000)
128
158
  validated = _validate_search_request(request)
129
159
  assert validated == request
130
-
160
+
131
161
  # Minimum valid limit
132
162
  request = SearchRequest(query="test", limit=1)
133
163
  validated = _validate_search_request(request)
134
164
  assert validated == request
135
-
165
+
136
166
  # Minimum valid timeout
137
167
  request = SearchRequest(query="test", timeout=1)
138
168
  validated = _validate_search_request(request)
@@ -191,16 +221,16 @@ class TestSearchRequestModel:
191
221
  data1 = request1.model_dump(by_alias=True)
192
222
  assert "ignore_invalid_urls" in data1 # No alias, uses snake_case
193
223
  assert data1["ignore_invalid_urls"] is None
194
-
224
+
195
225
  # Test with explicit False value
196
226
  request2 = SearchRequest(
197
227
  query="test",
198
228
  ignore_invalid_urls=False,
199
229
  scrape_options=ScrapeOptions(formats=["markdown"])
200
230
  )
201
-
231
+
202
232
  # Check that aliases are used in model_dump with by_alias=True
203
233
  data2 = request2.model_dump(by_alias=True)
204
234
  assert "ignore_invalid_urls" in data2 # No alias, uses snake_case
205
235
  assert "scrape_options" in data2 # No alias, uses snake_case
206
- assert data2["ignore_invalid_urls"] is False
236
+ assert data2["ignore_invalid_urls"] is False
firecrawl/types.py CHANGED
@@ -48,7 +48,9 @@ from .v2.types import (
48
48
  JsonFormat,
49
49
  FormatOption,
50
50
  SearchRequest,
51
- SearchResult,
51
+ SearchResultWeb,
52
+ SearchResultNews,
53
+ SearchResultImages,
52
54
  SearchData,
53
55
  SearchResponse,
54
56
 
@@ -124,7 +126,9 @@ __all__ = [
124
126
  'JsonFormat',
125
127
  'FormatOption',
126
128
  'SearchRequest',
127
- 'SearchResult',
129
+ 'SearchResultWeb',
130
+ 'SearchResultNews',
131
+ 'SearchResultImages',
128
132
  'SearchData',
129
133
  'SearchResponse',
130
134
 
@@ -1,55 +1,172 @@
1
- from typing import Dict, Any
2
- from ...types import SearchRequest, SearchData, SearchResult, Document
3
- from ...utils.normalize import normalize_document_input
1
+ import re
2
+ from typing import Dict, Any, Union, List, TypeVar, Type
3
+ from ...types import (
4
+ SearchRequest,
5
+ SearchData,
6
+ Document,
7
+ SearchResultWeb,
8
+ SearchResultNews,
9
+ SearchResultImages,
10
+ )
4
11
  from ...utils.http_client_async import AsyncHttpClient
5
12
  from ...utils.error_handler import handle_response_error
6
- from ...utils.validation import prepare_scrape_options, validate_scrape_options
13
+ from ...utils.validation import validate_scrape_options, prepare_scrape_options
7
14
 
15
+ T = TypeVar("T")
16
+
17
+ async def search(
18
+ client: AsyncHttpClient,
19
+ request: SearchRequest
20
+ ) -> SearchData:
21
+ """
22
+ Async search for documents.
23
+
24
+ Args:
25
+ client: Async HTTP client instance
26
+ request: Search request
27
+
28
+ Returns:
29
+ SearchData with search results grouped by source type
30
+
31
+ Raises:
32
+ FirecrawlError: If the search operation fails
33
+ """
34
+ request_data = _prepare_search_request(request)
35
+ try:
36
+ response = await client.post("/v2/search", request_data)
37
+ if response.status_code != 200:
38
+ handle_response_error(response, "search")
39
+ response_data = response.json()
40
+ if not response_data.get("success"):
41
+ handle_response_error(response, "search")
42
+ data = response_data.get("data", {}) or {}
43
+ out = SearchData()
44
+ if "web" in data:
45
+ out.web = _transform_array(data["web"], SearchResultWeb)
46
+ if "news" in data:
47
+ out.news = _transform_array(data["news"], SearchResultNews)
48
+ if "images" in data:
49
+ out.images = _transform_array(data["images"], SearchResultImages)
50
+ return out
51
+ except Exception as err:
52
+ if hasattr(err, "response"):
53
+ handle_response_error(getattr(err, "response"), "search")
54
+ raise err
55
+
56
+ def _transform_array(arr: List[Any], result_type: Type[T]) -> List[Union[T, Document]]:
57
+ """
58
+ Transforms an array of items into a list of result_type or Document.
59
+ If the item dict contains any of the special keys, it is treated as a Document.
60
+ Otherwise, it is treated as result_type.
61
+ If the item is not a dict, it is wrapped as result_type with url=item.
62
+ """
63
+ results: List[Union[T, Document]] = []
64
+ for item in arr:
65
+ if item and isinstance(item, dict):
66
+ if (
67
+ "markdown" in item or
68
+ "html" in item or
69
+ "rawHtml" in item or
70
+ "links" in item or
71
+ "screenshot" in item or
72
+ "changeTracking" in item or
73
+ "summary" in item or
74
+ "json" in item
75
+ ):
76
+ results.append(Document(**item))
77
+ else:
78
+ results.append(result_type(**item))
79
+ else:
80
+ results.append(result_type(url=item))
81
+ return results
82
+
83
+ def _validate_search_request(request: SearchRequest) -> SearchRequest:
84
+ """
85
+ Validate and normalize search request.
86
+
87
+ Args:
88
+ request: Search request to validate
89
+
90
+ Returns:
91
+ Validated request
92
+
93
+ Raises:
94
+ ValueError: If request is invalid
95
+ """
96
+ if not request.query or not request.query.strip():
97
+ raise ValueError("Query cannot be empty")
98
+
99
+ if request.limit is not None:
100
+ if request.limit <= 0:
101
+ raise ValueError("Limit must be positive")
102
+ if request.limit > 100:
103
+ raise ValueError("Limit cannot exceed 100")
104
+
105
+ if request.timeout is not None:
106
+ if request.timeout <= 0:
107
+ raise ValueError("Timeout must be positive")
108
+ if request.timeout > 300000:
109
+ raise ValueError("Timeout cannot exceed 300000ms (5 minutes)")
110
+
111
+ if request.sources is not None:
112
+ valid_sources = {"web", "news", "images"}
113
+ for source in request.sources:
114
+ if isinstance(source, str):
115
+ if source not in valid_sources:
116
+ raise ValueError(f"Invalid source type: {source}. Valid types: {valid_sources}")
117
+ elif hasattr(source, 'type'):
118
+ if source.type not in valid_sources:
119
+ raise ValueError(f"Invalid source type: {source.type}. Valid types: {valid_sources}")
120
+
121
+ if request.location is not None:
122
+ if not isinstance(request.location, str) or len(request.location.strip()) == 0:
123
+ raise ValueError("Location must be a non-empty string")
124
+
125
+ if request.tbs is not None:
126
+ valid_tbs_values = {
127
+ "qdr:h", "qdr:d", "qdr:w", "qdr:m", "qdr:y",
128
+ "d", "w", "m", "y"
129
+ }
130
+ if request.tbs in valid_tbs_values:
131
+ pass
132
+ elif request.tbs.startswith("cdr:"):
133
+ custom_date_pattern = r"^cdr:1,cd_min:\d{1,2}/\d{1,2}/\d{4},cd_max:\d{1,2}/\d{1,2}/\d{4}$"
134
+ if not re.match(custom_date_pattern, request.tbs):
135
+ raise ValueError(f"Invalid custom date range format: {request.tbs}. Expected format: cdr:1,cd_min:MM/DD/YYYY,cd_max:MM/DD/YYYY")
136
+ else:
137
+ raise ValueError(f"Invalid tbs value: {request.tbs}. Valid values: {valid_tbs_values} or custom date range format: cdr:1,cd_min:MM/DD/YYYY,cd_max:MM/DD/YYYY")
8
138
 
9
- def _prepare_search_request(request: SearchRequest) -> Dict[str, Any]:
10
- data = request.model_dump(exclude_none=True)
11
- if request.ignore_invalid_urls is not None:
12
- data["ignoreInvalidURLs"] = request.ignore_invalid_urls
13
- data.pop("ignore_invalid_urls", None)
14
139
  if request.scrape_options is not None:
15
140
  validate_scrape_options(request.scrape_options)
16
- scrape_data = prepare_scrape_options(request.scrape_options)
141
+
142
+ return request
143
+
144
+ def _prepare_search_request(request: SearchRequest) -> Dict[str, Any]:
145
+ """
146
+ Prepare a search request payload.
147
+
148
+ Args:
149
+ request: Search request
150
+
151
+ Returns:
152
+ Request payload dictionary
153
+ """
154
+ validated_request = _validate_search_request(request)
155
+ data = validated_request.model_dump(exclude_none=True, by_alias=True)
156
+
157
+ if "limit" not in data and validated_request.limit is not None:
158
+ data["limit"] = validated_request.limit
159
+ if "timeout" not in data and validated_request.timeout is not None:
160
+ data["timeout"] = validated_request.timeout
161
+
162
+ if validated_request.ignore_invalid_urls is not None:
163
+ data["ignoreInvalidURLs"] = validated_request.ignore_invalid_urls
164
+ data.pop("ignore_invalid_urls", None)
165
+
166
+ if validated_request.scrape_options is not None:
167
+ scrape_data = prepare_scrape_options(validated_request.scrape_options)
17
168
  if scrape_data:
18
169
  data["scrapeOptions"] = scrape_data
19
170
  data.pop("scrape_options", None)
20
- return data
21
-
22
-
23
- async def search(client: AsyncHttpClient, request: SearchRequest) -> SearchData:
24
- payload = _prepare_search_request(request)
25
- response = await client.post("/v2/search", payload)
26
- if response.status_code >= 400:
27
- handle_response_error(response, "search")
28
- body = response.json()
29
- if not body.get("success"):
30
- raise Exception(body.get("error", "Unknown error occurred"))
31
-
32
- data = body.get("data", {})
33
- search_data = SearchData()
34
- for source_type, source_documents in data.items():
35
- if isinstance(source_documents, list):
36
- results = []
37
- for doc_data in source_documents:
38
- if isinstance(doc_data, dict):
39
- if request.scrape_options is not None and any(
40
- key in doc_data for key in ['markdown', 'html', 'rawHtml', 'links', 'summary', 'screenshot', 'changeTracking']
41
- ):
42
- normalized = normalize_document_input(doc_data)
43
- results.append(Document(**normalized))
44
- else:
45
- results.append(SearchResult(
46
- url=doc_data.get('url', ''),
47
- title=doc_data.get('title'),
48
- description=doc_data.get('description')
49
- ))
50
- elif isinstance(doc_data, str):
51
- results.append(SearchResult(url=doc_data))
52
- if hasattr(search_data, source_type):
53
- setattr(search_data, source_type, results)
54
- return search_data
55
171
 
172
+ return data
@@ -2,11 +2,13 @@
2
2
  Search functionality for Firecrawl v2 API.
3
3
  """
4
4
 
5
- from typing import Optional, Dict, Any, Union
6
- from ..types import SearchRequest, SearchData, SearchResult, Document
5
+ import re
6
+ from typing import Dict, Any, Union, List, TypeVar, Type
7
+ from ..types import SearchRequest, SearchData, Document, SearchResultWeb, SearchResultNews, SearchResultImages
7
8
  from ..utils.normalize import normalize_document_input
8
9
  from ..utils import HttpClient, handle_response_error, validate_scrape_options, prepare_scrape_options
9
10
 
11
+ T = TypeVar("T")
10
12
 
11
13
  def search(
12
14
  client: HttpClient,
@@ -26,48 +28,56 @@ def search(
26
28
  FirecrawlError: If the search operation fails
27
29
  """
28
30
  request_data = _prepare_search_request(request)
29
-
30
- response = client.post("/v2/search", request_data)
31
-
32
- if not response.ok:
33
- handle_response_error(response, "search")
34
-
35
- response_data = response.json()
36
-
37
- if not response_data.get("success"):
38
- # Handle error case
39
- error_msg = response_data.get("error", "Unknown error occurred")
40
- raise Exception(f"Search failed: {error_msg}")
41
-
42
- data = response_data.get("data", {})
43
- search_data = SearchData()
44
-
45
- for source_type, source_documents in data.items():
46
- if isinstance(source_documents, list):
47
- results = []
48
- for doc_data in source_documents:
49
- if isinstance(doc_data, dict):
50
- # If page scraping options were provided, API returns full Document objects
51
- if request.scrape_options is not None and any(
52
- key in doc_data for key in ['markdown', 'html', 'rawHtml', 'links', 'summary', 'screenshot', 'changeTracking']
53
- ):
54
- normalized = normalize_document_input(doc_data)
55
- results.append(Document(**normalized))
56
- else:
57
- # Minimal search result shape
58
- results.append(SearchResult(
59
- url=doc_data.get('url', ''),
60
- title=doc_data.get('title'),
61
- description=doc_data.get('description')
62
- ))
63
- elif isinstance(doc_data, str):
64
- results.append(SearchResult(url=doc_data))
65
-
66
- if hasattr(search_data, source_type):
67
- setattr(search_data, source_type, results)
68
-
69
- return search_data
31
+ try:
32
+ response = client.post("/v2/search", request_data)
33
+ if response.status_code != 200:
34
+ handle_response_error(response, "search")
35
+ response_data = response.json()
36
+ if not response_data.get("success"):
37
+ handle_response_error(response, "search")
38
+ data = response_data.get("data", {}) or {}
39
+ out = SearchData()
40
+ if "web" in data:
41
+ out.web = _transform_array(data["web"], SearchResultWeb)
42
+ if "news" in data:
43
+ out.news = _transform_array(data["news"], SearchResultNews)
44
+ if "images" in data:
45
+ out.images = _transform_array(data["images"], SearchResultImages)
46
+ return out
47
+ except Exception as err:
48
+ # If the error is an HTTP error from requests, handle it
49
+ # (simulate isAxiosError by checking for requests' HTTPError or Response)
50
+ if hasattr(err, "response"):
51
+ handle_response_error(getattr(err, "response"), "search")
52
+ raise err
70
53
 
54
+ def _transform_array(arr: List[Any], result_type: Type[T]) -> List[Union[T, 'Document']]:
55
+ """
56
+ Transforms an array of items into a list of result_type or Document.
57
+ If the item dict contains any of the special keys, it is treated as a Document.
58
+ Otherwise, it is treated as result_type.
59
+ If the item is not a dict, it is wrapped as result_type with url=item.
60
+ """
61
+ results: List[Union[T, 'Document']] = []
62
+ for item in arr:
63
+ if item and isinstance(item, dict):
64
+ if (
65
+ "markdown" in item or
66
+ "html" in item or
67
+ "rawHtml" in item or
68
+ "links" in item or
69
+ "screenshot" in item or
70
+ "changeTracking" in item or
71
+ "summary" in item or
72
+ "json" in item
73
+ ):
74
+ results.append(Document(**item))
75
+ else:
76
+ results.append(result_type(**item))
77
+ else:
78
+ # For non-dict items, assume it's a URL and wrap in result_type
79
+ results.append(result_type(url=item))
80
+ return results
71
81
 
72
82
  def _validate_search_request(request: SearchRequest) -> SearchRequest:
73
83
  """
@@ -119,11 +129,18 @@ def _validate_search_request(request: SearchRequest) -> SearchRequest:
119
129
  # Validate tbs (time-based search, if provided)
120
130
  if request.tbs is not None:
121
131
  valid_tbs_values = {
122
- "qdr:d", "qdr:w", "qdr:m", "qdr:y", # Google time filters
132
+ "qdr:h", "qdr:d", "qdr:w", "qdr:m", "qdr:y", # Google time filters
123
133
  "d", "w", "m", "y" # Short forms
124
134
  }
125
- if request.tbs not in valid_tbs_values:
126
- raise ValueError(f"Invalid tbs value: {request.tbs}. Valid values: {valid_tbs_values}")
135
+
136
+ if request.tbs in valid_tbs_values:
137
+ pass # Valid predefined value
138
+ elif request.tbs.startswith("cdr:"):
139
+ custom_date_pattern = r"^cdr:1,cd_min:\d{1,2}/\d{1,2}/\d{4},cd_max:\d{1,2}/\d{1,2}/\d{4}$"
140
+ if not re.match(custom_date_pattern, request.tbs):
141
+ raise ValueError(f"Invalid custom date range format: {request.tbs}. Expected format: cdr:1,cd_min:MM/DD/YYYY,cd_max:MM/DD/YYYY")
142
+ else:
143
+ raise ValueError(f"Invalid tbs value: {request.tbs}. Valid values: {valid_tbs_values} or custom date range format: cdr:1,cd_min:MM/DD/YYYY,cd_max:MM/DD/YYYY")
127
144
 
128
145
  # Validate scrape_options (if provided)
129
146
  if request.scrape_options is not None:
@@ -166,4 +183,4 @@ def _prepare_search_request(request: SearchRequest) -> Dict[str, Any]:
166
183
  data["scrapeOptions"] = scrape_data
167
184
  data.pop("scrape_options", None)
168
185
 
169
- return data
186
+ return data
firecrawl/v2/types.py CHANGED
@@ -327,11 +327,35 @@ class CrawlJob(BaseModel):
327
327
  next: Optional[str] = None
328
328
  data: List[Document] = []
329
329
 
330
- class SearchDocument(Document):
331
- """A document from a search operation with URL and description."""
330
+ class SearchResultWeb(BaseModel):
331
+ """A web search result with URL, title, and description."""
332
332
  url: str
333
333
  title: Optional[str] = None
334
- description: Optional[str] = None
334
+ description: Optional[str] = None
335
+
336
+ class SearchResultNews(BaseModel):
337
+ """A news search result with URL, title, snippet, date, image URL, and position."""
338
+ title: Optional[str] = None
339
+ url: Optional[str] = None
340
+ snippet: Optional[str] = None
341
+ date: Optional[str] = None
342
+ image_url: Optional[str] = None
343
+ position: Optional[int] = None
344
+
345
+ class SearchResultImages(BaseModel):
346
+ """An image search result with URL, title, image URL, image width, image height, and position."""
347
+ title: Optional[str] = None
348
+ image_url: Optional[str] = None
349
+ image_width: Optional[int] = None
350
+ image_height: Optional[int] = None
351
+ url: Optional[str] = None
352
+ position: Optional[int] = None
353
+
354
+ class SearchData(BaseModel):
355
+ """Search results grouped by source type."""
356
+ web: Optional[List[Union[SearchResultWeb, Document]]] = None
357
+ news: Optional[List[Union[SearchResultNews, Document]]] = None
358
+ images: Optional[List[Union[SearchResultImages, Document]]] = None
335
359
 
336
360
  class MapDocument(Document):
337
361
  """A document from a map operation with URL and description."""
@@ -535,9 +559,9 @@ SearchResult = LinkResult
535
559
 
536
560
  class SearchData(BaseModel):
537
561
  """Search results grouped by source type."""
538
- web: Optional[List[Union[LinkResult, SearchDocument]]] = None
539
- news: Optional[List[Union[LinkResult, SearchDocument]]] = None
540
- images: Optional[List[Union[LinkResult, SearchDocument]]] = None
562
+ web: Optional[List[Union[SearchResultWeb, Document]]] = None
563
+ news: Optional[List[Union[SearchResultNews, Document]]] = None
564
+ images: Optional[List[Union[SearchResultImages, Document]]] = None
541
565
 
542
566
  class SearchResponse(BaseResponse[SearchData]):
543
567
  """Response from search operation."""
@@ -15,6 +15,7 @@ class AsyncHttpClient:
15
15
  "Authorization": f"Bearer {api_key}",
16
16
  "Content-Type": "application/json",
17
17
  },
18
+ limits=httpx.Limits(max_keepalive_connections=0),
18
19
  )
19
20
 
20
21
  async def close(self) -> None:
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: firecrawl
3
- Version: 3.1.0
3
+ Version: 3.2.0
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/firecrawl/firecrawl
6
6
  Author: Mendable.ai
@@ -40,6 +40,10 @@ Requires-Dist: websockets
40
40
  Requires-Dist: nest-asyncio
41
41
  Requires-Dist: pydantic
42
42
  Requires-Dist: aiohttp
43
+ Dynamic: author
44
+ Dynamic: home-page
45
+ Dynamic: license-file
46
+ Dynamic: requires-python
43
47
 
44
48
  # Firecrawl Python SDK
45
49
 
@@ -1,7 +1,7 @@
1
- firecrawl/__init__.py,sha256=OoUlrci9zB9_QHxXASXoaO90qLMv0B1ck7yUsRhX7gE,2191
1
+ firecrawl/__init__.py,sha256=ork6Ayurc2D3XvO14vevIf--UPrptN81Ldxytzxa8ho,2192
2
2
  firecrawl/client.py,sha256=2BGIRTiW2eR6q3wu_g2s3VTQtrHYauoDeNF1YklQpHo,11089
3
3
  firecrawl/firecrawl.backup.py,sha256=v1FEN3jR4g5Aupg4xp6SLkuFvYMQuUKND2YELbYjE6c,200430
4
- firecrawl/types.py,sha256=yZ4iza0M1T2kxNbt-tLEOKH7o6mFKZZ11VAZGodHSq4,2734
4
+ firecrawl/types.py,sha256=W9N2pqQuevEIIjYHN9rbDf31E-nwdCECqIn11Foz2T8,2836
5
5
  firecrawl/__tests__/e2e/v2/conftest.py,sha256=I28TUpN5j0-9gM79NlbrDS8Jlsheao657od2f-2xK0Y,2587
6
6
  firecrawl/__tests__/e2e/v2/test_async.py,sha256=ZXpf1FVOJgNclITglrxIyFwP4cOiqzWLicGaxIm70BQ,2526
7
7
  firecrawl/__tests__/e2e/v2/test_batch_scrape.py,sha256=H9GtuwHIFdOQ958SOVThi_kvDDxcXAK_ECRh95ogonQ,3265
@@ -9,7 +9,7 @@ firecrawl/__tests__/e2e/v2/test_crawl.py,sha256=cOssZvIwtghAtLiM1QdNLhPEwAxZ9j9u
9
9
  firecrawl/__tests__/e2e/v2/test_extract.py,sha256=HgvGiDlyWtFygiPo5EP44Dem1oWrwgRF-hfc1LfeVSU,1670
10
10
  firecrawl/__tests__/e2e/v2/test_map.py,sha256=9sT-Yq8V_8c9esl_bv5hnTA9WXb2Dg81kj6M-s0484c,1618
11
11
  firecrawl/__tests__/e2e/v2/test_scrape.py,sha256=psW2nfcA_hMFpZ4msL_VJWJTMa3Sidp11ubhftbm52g,5759
12
- firecrawl/__tests__/e2e/v2/test_search.py,sha256=MN-q82gHlm5DT2HsnAQgW1NwVbgowlFYmKW1KGJd1ig,8811
12
+ firecrawl/__tests__/e2e/v2/test_search.py,sha256=tvU9_eg_3H5em0fhIwPPjuYe9BRAQ5St-BLM0l_FfVs,9079
13
13
  firecrawl/__tests__/e2e/v2/test_usage.py,sha256=JlBkYblhThua5qF2crRjsPpq4Ja0cBsdzxZ5zxXnQ_Y,805
14
14
  firecrawl/__tests__/e2e/v2/test_watcher.py,sha256=OPTKLhVAKWqXl2Tieo6zCN1xpEwZDsz-B977CVJgLMA,1932
15
15
  firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py,sha256=gJv_mLzzoAYftETB2TLkrpSfB5c04kaYgkD4hQTYsIg,2639
@@ -17,7 +17,7 @@ firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py,sha256=X-nk5tkYUYIkM6kTYl7GDjvx
17
17
  firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py,sha256=3CNRIFzgBMcOYOLhnKcK1k5a3Gy--u08EGDkL31uieM,1199
18
18
  firecrawl/__tests__/e2e/v2/aio/test_aio_map.py,sha256=nckl1kbiEaaTdu5lm__tOoTDG-txTYwwSH3KZEvyKzc,1199
19
19
  firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py,sha256=b17A7advBEjxrjdait2w8GHztZeKy_P3zZ3ixm5H7xw,4453
20
- firecrawl/__tests__/e2e/v2/aio/test_aio_search.py,sha256=dnrRyTIzivlwe5wt5Wa0hdghZcJmNjC1l-XrAA_JZUU,7308
20
+ firecrawl/__tests__/e2e/v2/aio/test_aio_search.py,sha256=ehV0Ai_hknAkaoE551j2lbktV4bi_J0h3FKzC7G15Iw,8246
21
21
  firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py,sha256=Dh9BVo48NKSZOKgLbO7n8fpMjvYmeMXDFzbIhnCTMhE,1014
22
22
  firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py,sha256=hwES4Nu5c0hniZ9heIPDfvh_2JmJ2wPoX9ULTZ0Asjs,1471
23
23
  firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py,sha256=HeOxN-sPYSssytcIRAEicJSZsFt_Oa5qGXAtdumR54c,4040
@@ -27,7 +27,7 @@ firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py,sha256=kErOmHSD01eM
27
27
  firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py,sha256=toVcgnMp_cFeYsIUuyKGEWZGp0nAAkzaeFGUbY0zY0o,1868
28
28
  firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py,sha256=wDOslsA5BN4kyezlaT5GeMv_Ifn8f461EaA7i5ujnaQ,3482
29
29
  firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py,sha256=14lUgFpQsiosgMKjDustBRVE0zXnHujBI76F8BC5PZ4,6072
30
- firecrawl/__tests__/unit/v2/methods/test_search_validation.py,sha256=PaV_kSgzjW8A3eFBCCn1-y4WFZBR2nf84NZk4UEBPX8,8275
30
+ firecrawl/__tests__/unit/v2/methods/test_search_validation.py,sha256=7UGcNHpQzCpZbAPYjthfdPFWmAPcoApY-ED-khtuANs,9498
31
31
  firecrawl/__tests__/unit/v2/methods/test_usage_types.py,sha256=cCHHfa6agSjD0brQ9rcAcw2kaI9riUH5C0dXV-fqktg,591
32
32
  firecrawl/__tests__/unit/v2/methods/test_webhook.py,sha256=AvvW-bKpUA--Lvtif2bmUIp-AxiaMJ29ie1i9dk8WbI,4586
33
33
  firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py,sha256=9azJxVvDOBqUevLp-wBF9gF7Ptj-7nN6LOkPQncFX2M,456
@@ -45,7 +45,7 @@ firecrawl/v1/client.py,sha256=sydurfEFTsXyowyaGryA1lkPxN_r9Nf6iQpM43OwJyM,201672
45
45
  firecrawl/v2/__init__.py,sha256=Jc6a8tBjYG5OPkjDM5pl-notyys-7DEj7PLEfepv3fc,137
46
46
  firecrawl/v2/client.py,sha256=P6WAzwYGLLIANTrqAM-K4EUdGWQoFsi-zCjBibbxKQw,30507
47
47
  firecrawl/v2/client_async.py,sha256=zwxHis1bSh0tSF1480ze-4XDQEDJ5yDur1ZqtL94dwc,10127
48
- firecrawl/v2/types.py,sha256=zV0XAX_pJaJj41uxfJewKPANxd45BCL48nvbN_ybLOc,20222
48
+ firecrawl/v2/types.py,sha256=bbHXPWJp6Kvjx9rKkTPyWZwdqVTErS4VYZKfHsb7ZQc,21137
49
49
  firecrawl/v2/watcher.py,sha256=FOU71tqSKxgeuGycu4ye0SLc2dw7clIcoQjPsi-4Csc,14229
50
50
  firecrawl/v2/watcher_async.py,sha256=AVjW2mgABniolSsauK4u0FW8ya6WzRUdyEg2R-8vGCw,10278
51
51
  firecrawl/v2/methods/batch.py,sha256=us7zUGl7u9ZDIEk2J3rNqj87bkaNjXU27SMFW_fdcg8,11932
@@ -53,7 +53,7 @@ firecrawl/v2/methods/crawl.py,sha256=4ZUmanHNuNtq9wbKMAZ3lenuPcNdOaV0kYXqMI5XJJ8
53
53
  firecrawl/v2/methods/extract.py,sha256=-Jr4BtraU3b7hd3JIY73V-S69rUclxyXyUpoQb6DCQk,4274
54
54
  firecrawl/v2/methods/map.py,sha256=4SADb0-lkbdOWDmO6k8_TzK0yRti5xsN40N45nUl9uA,2592
55
55
  firecrawl/v2/methods/scrape.py,sha256=CSHBwC-P91UfrW3zHirjNAs2h899FKcWvd1DY_4fJdo,1921
56
- firecrawl/v2/methods/search.py,sha256=AdiaisKW4I5_Cjr_UQZYiRqDHu3nNrZqpfG4U2OhM6c,6131
56
+ firecrawl/v2/methods/search.py,sha256=c6tkDQGYZeLsPABPVfzhjalsasnhlien3w80aoe89t0,7077
57
57
  firecrawl/v2/methods/usage.py,sha256=OJlkxwaB-AAtgO3WLr9QiqBRmjdh6GVhroCgleegupQ,1460
58
58
  firecrawl/v2/methods/aio/__init__.py,sha256=RocMJnGwnLIvGu3G8ZvY8INkipC7WHZiu2bE31eSyJs,35
59
59
  firecrawl/v2/methods/aio/batch.py,sha256=GS_xsd_Uib1fxFITBK1sH88VGzFMrIcqJVQqOvMQ540,3735
@@ -61,19 +61,19 @@ firecrawl/v2/methods/aio/crawl.py,sha256=pC6bHVk30Hj1EJdAChxpMOg0Xx_GVqq4tIlvU2e
61
61
  firecrawl/v2/methods/aio/extract.py,sha256=IfNr2ETqt4dR73JFzrEYI4kk5vpKnJOG0BmPEjGEoO4,4217
62
62
  firecrawl/v2/methods/aio/map.py,sha256=EuT-5A0cQr_e5SBfEZ6pnl8u0JUwEEvSwhyT2N-QoKU,2326
63
63
  firecrawl/v2/methods/aio/scrape.py,sha256=ilA9qco8YGwCFpE0PN1XBQUyuHPQwH2QioZ-xsfxhgU,1386
64
- firecrawl/v2/methods/aio/search.py,sha256=nuRmFCA_ymBw2tXJZ88vjZY-BueIRNonkSsrxExwusM,2501
64
+ firecrawl/v2/methods/aio/search.py,sha256=_TqTFGQLlOCCLNdWcOvakTqPGD2r9AOlBg8RasOgmvw,6177
65
65
  firecrawl/v2/methods/aio/usage.py,sha256=OtBi6X-aT09MMR2dpm3vBCm9JrJZIJLCQ8jJ3L7vie4,1606
66
66
  firecrawl/v2/utils/__init__.py,sha256=i1GgxySmqEXpWSBQCu3iZBPIJG7fXj0QXCDWGwerWNs,338
67
67
  firecrawl/v2/utils/error_handler.py,sha256=Iuf916dHphDY8ObNNlWy75628DFeJ0Rv8ljRp4LttLE,4199
68
68
  firecrawl/v2/utils/get_version.py,sha256=0CxW_41q2hlzIxEWOivUCaYw3GFiSIH32RPUMcIgwAY,492
69
69
  firecrawl/v2/utils/http_client.py,sha256=_n8mp4xi6GGihg662Lsv6TSlvw9zykyADwEk0fg8mYA,4873
70
- firecrawl/v2/utils/http_client_async.py,sha256=P4XG6nTz6kKH3vCPTz6i7DRhbpK4IImRGaFvQFGBFRc,1874
70
+ firecrawl/v2/utils/http_client_async.py,sha256=iy89_bk2HS3afSRHZ8016eMCa9Fk-5MFTntcOHfbPgE,1936
71
71
  firecrawl/v2/utils/normalize.py,sha256=nlTU6QRghT1YKZzNZlIQj4STSRuSUGrS9cCErZIcY5w,3636
72
72
  firecrawl/v2/utils/validation.py,sha256=L8by7z-t6GuMGIYkK7il1BM8d-4_-sAdG9hDMF_LeG4,14518
73
+ firecrawl-3.2.0.dist-info/licenses/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
73
74
  tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
74
75
  tests/test_timeout_conversion.py,sha256=PWlIEMASQNhu4cp1OW_ebklnE9NCiigPnEFCtI5N3w0,3996
75
- firecrawl-3.1.0.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
76
- firecrawl-3.1.0.dist-info/METADATA,sha256=ObOWXXGQNP1FF8Y0wQK29tPBqgg9n5PecurI5c1MRC4,7305
77
- firecrawl-3.1.0.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
78
- firecrawl-3.1.0.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
79
- firecrawl-3.1.0.dist-info/RECORD,,
76
+ firecrawl-3.2.0.dist-info/METADATA,sha256=KNaWrAOA_ZRYnzqY-asiVcXstr7n_qeilUO8R2Gz1Os,7387
77
+ firecrawl-3.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
78
+ firecrawl-3.2.0.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
79
+ firecrawl-3.2.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.38.4)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5