firecrawl 3.0.3__tar.gz → 3.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

Files changed (87) hide show
  1. {firecrawl-3.0.3 → firecrawl-3.2.0}/LICENSE +0 -0
  2. {firecrawl-3.0.3 → firecrawl-3.2.0}/PKG-INFO +6 -2
  3. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__init__.py +2 -2
  4. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +0 -1
  5. firecrawl-3.2.0/firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +248 -0
  6. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/e2e/v2/test_search.py +10 -6
  7. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/unit/v2/methods/test_search_validation.py +47 -17
  8. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/client.py +1 -0
  9. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/types.py +6 -2
  10. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/v2/methods/aio/crawl.py +2 -5
  11. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/v2/methods/aio/scrape.py +2 -5
  12. firecrawl-3.2.0/firecrawl/v2/methods/aio/search.py +172 -0
  13. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/v2/methods/batch.py +2 -5
  14. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/v2/methods/crawl.py +2 -1
  15. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/v2/methods/scrape.py +2 -6
  16. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/v2/methods/search.py +65 -52
  17. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/v2/types.py +98 -8
  18. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/v2/utils/http_client_async.py +1 -0
  19. firecrawl-3.2.0/firecrawl/v2/utils/normalize.py +107 -0
  20. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/v2/watcher.py +4 -15
  21. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/v2/watcher_async.py +2 -5
  22. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl.egg-info/PKG-INFO +6 -2
  23. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl.egg-info/SOURCES.txt +1 -0
  24. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl.egg-info/top_level.txt +1 -0
  25. {firecrawl-3.0.3 → firecrawl-3.2.0}/tests/test_change_tracking.py +0 -0
  26. firecrawl-3.0.3/firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +0 -183
  27. firecrawl-3.0.3/firecrawl/v2/methods/aio/search.py +0 -58
  28. {firecrawl-3.0.3 → firecrawl-3.2.0}/README.md +0 -0
  29. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +0 -0
  30. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +0 -0
  31. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +0 -0
  32. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +0 -0
  33. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +0 -0
  34. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +0 -0
  35. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/e2e/v2/conftest.py +0 -0
  36. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/e2e/v2/test_async.py +0 -0
  37. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/e2e/v2/test_batch_scrape.py +0 -0
  38. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/e2e/v2/test_crawl.py +0 -0
  39. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/e2e/v2/test_extract.py +0 -0
  40. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/e2e/v2/test_map.py +0 -0
  41. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/e2e/v2/test_scrape.py +0 -0
  42. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/e2e/v2/test_usage.py +0 -0
  43. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/e2e/v2/test_watcher.py +0 -0
  44. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +0 -0
  45. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +0 -0
  46. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +0 -0
  47. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +0 -0
  48. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +0 -0
  49. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +0 -0
  50. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +0 -0
  51. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +0 -0
  52. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +0 -0
  53. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +0 -0
  54. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +0 -0
  55. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +0 -0
  56. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +0 -0
  57. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +0 -0
  58. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +0 -0
  59. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/unit/v2/methods/test_usage_types.py +0 -0
  60. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/unit/v2/methods/test_webhook.py +0 -0
  61. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/unit/v2/utils/test_validation.py +0 -0
  62. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +0 -0
  63. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/firecrawl.backup.py +0 -0
  64. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/v1/__init__.py +0 -0
  65. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/v1/client.py +0 -0
  66. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/v2/__init__.py +0 -0
  67. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/v2/client.py +0 -0
  68. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/v2/client_async.py +0 -0
  69. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/v2/methods/aio/__init__.py +0 -0
  70. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/v2/methods/aio/batch.py +0 -0
  71. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/v2/methods/aio/extract.py +0 -0
  72. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/v2/methods/aio/map.py +0 -0
  73. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/v2/methods/aio/usage.py +0 -0
  74. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/v2/methods/extract.py +0 -0
  75. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/v2/methods/map.py +0 -0
  76. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/v2/methods/usage.py +0 -0
  77. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/v2/utils/__init__.py +0 -0
  78. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/v2/utils/error_handler.py +0 -0
  79. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/v2/utils/get_version.py +0 -0
  80. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/v2/utils/http_client.py +0 -0
  81. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl/v2/utils/validation.py +0 -0
  82. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl.egg-info/dependency_links.txt +0 -0
  83. {firecrawl-3.0.3 → firecrawl-3.2.0}/firecrawl.egg-info/requires.txt +0 -0
  84. {firecrawl-3.0.3 → firecrawl-3.2.0}/pyproject.toml +0 -0
  85. {firecrawl-3.0.3 → firecrawl-3.2.0}/setup.cfg +0 -0
  86. {firecrawl-3.0.3 → firecrawl-3.2.0}/setup.py +0 -0
  87. {firecrawl-3.0.3 → firecrawl-3.2.0}/tests/test_timeout_conversion.py +0 -0
File without changes
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: firecrawl
3
- Version: 3.0.3
3
+ Version: 3.2.0
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/firecrawl/firecrawl
6
6
  Author: Mendable.ai
@@ -40,6 +40,10 @@ Requires-Dist: websockets
40
40
  Requires-Dist: nest-asyncio
41
41
  Requires-Dist: pydantic
42
42
  Requires-Dist: aiohttp
43
+ Dynamic: author
44
+ Dynamic: home-page
45
+ Dynamic: license-file
46
+ Dynamic: requires-python
43
47
 
44
48
  # Firecrawl Python SDK
45
49
 
@@ -17,7 +17,7 @@ from .v1 import (
17
17
  V1ChangeTrackingOptions,
18
18
  )
19
19
 
20
- __version__ = "3.0.3"
20
+ __version__ = "3.2.0"
21
21
 
22
22
  # Define the logger for the Firecrawl project
23
23
  logger: logging.Logger = logging.getLogger("firecrawl")
@@ -84,4 +84,4 @@ __all__ = [
84
84
  'V1JsonConfig',
85
85
  'V1ScrapeOptions',
86
86
  'V1ChangeTrackingOptions',
87
- ]
87
+ ]
@@ -96,7 +96,6 @@ async def test_async_get_crawl_status_shape():
96
96
  assert status.status in ("scraping", "completed", "failed")
97
97
  assert status.completed >= 0
98
98
  assert status.expires_at is not None
99
- assert status.next is not None
100
99
  assert isinstance(status.data, list)
101
100
 
102
101
 
@@ -0,0 +1,248 @@
1
+ import os
2
+ import pytest
3
+ from dotenv import load_dotenv
4
+ from firecrawl import AsyncFirecrawl
5
+ from firecrawl.types import (
6
+ SearchData,
7
+ Document,
8
+ ScrapeOptions,
9
+ ScrapeFormats,
10
+ SearchResultWeb,
11
+ SearchResultNews,
12
+ SearchResultImages,
13
+ )
14
+
15
+ load_dotenv()
16
+
17
+ firecrawl = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
18
+
19
+ def _collect_texts(entries):
20
+ texts = []
21
+ for r in entries or []:
22
+ title = getattr(r, 'title', None) if hasattr(r, 'title') else None
23
+ desc = getattr(r, 'description', None) if hasattr(r, 'description') else None
24
+ if title:
25
+ texts.append(str(title).lower())
26
+ if desc:
27
+ texts.append(str(desc).lower())
28
+ return texts
29
+
30
+ def _is_document(entry) -> bool:
31
+ try:
32
+ from firecrawl.v2.types import Document
33
+ return isinstance(entry, Document) or \
34
+ hasattr(entry, 'markdown') or \
35
+ hasattr(entry, 'html') or \
36
+ hasattr(entry, 'raw_html') or \
37
+ hasattr(entry, 'json') or \
38
+ hasattr(entry, 'screenshot') or \
39
+ hasattr(entry, 'change_tracking') or \
40
+ hasattr(entry, 'summary')
41
+ except Exception:
42
+ return hasattr(entry, 'markdown') or \
43
+ hasattr(entry, 'html') or \
44
+ hasattr(entry, 'raw_html') or \
45
+ hasattr(entry, 'json') or \
46
+ hasattr(entry, 'screenshot') or \
47
+ hasattr(entry, 'change_tracking') or \
48
+ hasattr(entry, 'summary')
49
+
50
+ @pytest.mark.asyncio
51
+ async def test_async_search_minimal_request():
52
+ results = await firecrawl.search(
53
+ query="What is the capital of France?"
54
+ )
55
+ assert isinstance(results, SearchData)
56
+ assert hasattr(results, 'web')
57
+ assert results.web is not None
58
+ assert len(results.web) > 0
59
+ assert hasattr(results, 'news')
60
+ assert results.news is None
61
+ assert hasattr(results, 'images')
62
+ assert results.images is None
63
+
64
+ for result in results.web:
65
+ assert isinstance(result, SearchResultWeb)
66
+ assert hasattr(result, 'url')
67
+ assert hasattr(result, 'title')
68
+ assert hasattr(result, 'description')
69
+ assert result.url.startswith('http')
70
+ assert result.title is not None
71
+ assert result.description is not None
72
+
73
+ all_text = ' '.join(_collect_texts(results.web))
74
+ assert 'paris' in all_text
75
+
76
+ assert results.news is None
77
+ assert results.images is None
78
+
79
+ @pytest.mark.asyncio
80
+ async def test_async_search_with_sources():
81
+ results = await firecrawl.search(
82
+ query="firecrawl",
83
+ sources=["web", "news", "images"],
84
+ limit=3
85
+ )
86
+ assert isinstance(results, SearchData)
87
+ assert results.web is not None
88
+ assert len(results.web) <= 3
89
+ assert isinstance(results.web[0], SearchResultWeb)
90
+
91
+ if results.news is not None:
92
+ assert len(results.news) <= 3
93
+ assert isinstance(results.news[0], SearchResultNews)
94
+
95
+ if results.images is not None:
96
+ assert len(results.images) <= 3
97
+ assert isinstance(results.images[0], SearchResultImages)
98
+
99
+ web_titles = [result.title.lower() for result in results.web]
100
+ web_descriptions = [result.description.lower() for result in results.web]
101
+ all_web_text = ' '.join(web_titles + web_descriptions)
102
+ assert 'firecrawl' in all_web_text
103
+
104
+ @pytest.mark.asyncio
105
+ async def test_async_search_result_structure():
106
+ results = await firecrawl.search(
107
+ query="test query",
108
+ limit=1
109
+ )
110
+ if results.web and len(results.web) > 0:
111
+ result = results.web[0]
112
+ assert hasattr(result, 'url')
113
+ assert hasattr(result, 'title')
114
+ assert hasattr(result, 'description')
115
+ assert isinstance(result.url, str)
116
+ assert isinstance(result.title, str) or result.title is None
117
+ assert isinstance(result.description, str) or result.description is None
118
+ assert result.url.startswith('http')
119
+
120
+ @pytest.mark.asyncio
121
+ async def test_async_search_all_parameters():
122
+ from firecrawl.types import ScrapeOptions, Location, WaitAction
123
+ schema = {
124
+ "type": "object",
125
+ "properties": {
126
+ "title": {"type": "string"},
127
+ "description": {"type": "string"},
128
+ "url": {"type": "string"}
129
+ },
130
+ "required": ["title", "description"]
131
+ }
132
+ results = await firecrawl.search(
133
+ query="artificial intelligence",
134
+ sources=[
135
+ {"type": "web"},
136
+ {"type": "news"}
137
+ ],
138
+ limit=3,
139
+ tbs="qdr:m",
140
+ location="US",
141
+ ignore_invalid_urls=True,
142
+ timeout=60000,
143
+ scrape_options=ScrapeOptions(
144
+ formats=[
145
+ "markdown",
146
+ "html",
147
+ {
148
+ "type": "json",
149
+ "prompt": "Extract the title and description from the page",
150
+ "schema": schema
151
+ },
152
+ {"type": "summary"}
153
+ ],
154
+ headers={"User-Agent": "Firecrawl-Test/1.0"},
155
+ include_tags=["h1", "h2", "p"],
156
+ exclude_tags=["nav", "footer"],
157
+ only_main_content=True,
158
+ wait_for=2000,
159
+ mobile=False,
160
+ skip_tls_verification=False,
161
+ remove_base64_images=True,
162
+ block_ads=True,
163
+ proxy="basic",
164
+ max_age=3600000,
165
+ store_in_cache=True,
166
+ location=Location(
167
+ country="US",
168
+ languages=["en"]
169
+ ),
170
+ actions=[
171
+ WaitAction(milliseconds=1000)
172
+ ]
173
+ )
174
+ )
175
+ assert isinstance(results, SearchData)
176
+ assert hasattr(results, 'web')
177
+ assert hasattr(results, 'news')
178
+ assert hasattr(results, 'images')
179
+ assert results.web is not None
180
+ assert len(results.web) <= 3
181
+
182
+ non_doc_entries = [r for r in (results.web or []) if not _is_document(r)]
183
+ if non_doc_entries:
184
+ all_web_text = ' '.join(_collect_texts(non_doc_entries))
185
+ ai_terms = ['artificial', 'intelligence', 'ai', 'machine', 'learning']
186
+ assert any(term in all_web_text for term in ai_terms)
187
+
188
+ for result in results.web:
189
+ assert isinstance(result, (SearchResultWeb, Document))
190
+ if isinstance(result, Document):
191
+ assert (result.markdown is not None) or (result.html is not None)
192
+ else:
193
+ assert hasattr(result, 'url')
194
+ assert isinstance(result.url, str) and result.url.startswith('http')
195
+
196
+ if results.news is not None:
197
+ assert len(results.news) <= 3
198
+ for result in results.news:
199
+ assert isinstance(result, (SearchResultNews, Document))
200
+ if isinstance(result, Document):
201
+ assert (result.markdown is not None) or (result.html is not None)
202
+ else:
203
+ assert hasattr(result, 'url')
204
+ assert isinstance(result.url, str) and result.url.startswith('http')
205
+
206
+ assert results.images is None
207
+
208
+ @pytest.mark.asyncio
209
+ async def test_async_search_formats_flexibility():
210
+ # Test with list format
211
+ results1 = await firecrawl.search(
212
+ query="python programming",
213
+ limit=1,
214
+ scrape_options=ScrapeOptions(
215
+ formats=["markdown"]
216
+ )
217
+ )
218
+ # Test with ScrapeFormats object
219
+ results2 = await firecrawl.search(
220
+ query="python programming",
221
+ limit=1,
222
+ scrape_options=ScrapeOptions(
223
+ formats=ScrapeFormats(markdown=True)
224
+ )
225
+ )
226
+ assert isinstance(results1, SearchData)
227
+ assert isinstance(results2, SearchData)
228
+ assert results1.web is not None
229
+ assert results2.web is not None
230
+
231
+ @pytest.mark.asyncio
232
+ async def test_async_search_with_json_format_object():
233
+ json_schema = {
234
+ "type": "object",
235
+ "properties": {
236
+ "title": {"type": "string"}
237
+ },
238
+ "required": ["title"],
239
+ }
240
+ results = await firecrawl.search(
241
+ query="site:docs.firecrawl.dev",
242
+ limit=1,
243
+ scrape_options=ScrapeOptions(
244
+ formats=[{"type": "json", "prompt": "Extract page title", "schema": json_schema}]
245
+ ),
246
+ )
247
+ assert isinstance(results, SearchData)
248
+ assert results.web is not None and len(results.web) >= 0
@@ -1,7 +1,7 @@
1
1
  from firecrawl import Firecrawl
2
2
  import os
3
3
  from dotenv import load_dotenv
4
- from firecrawl.types import SearchData, SearchResult, Document, ScrapeFormats, ScrapeOptions
4
+ from firecrawl.types import SearchData, Document, ScrapeOptions, SearchResultWeb, SearchResultNews, SearchResultImages
5
5
 
6
6
  load_dotenv()
7
7
 
@@ -53,7 +53,7 @@ def test_search_minimal_request():
53
53
  assert results.images is None
54
54
 
55
55
  for result in results.web:
56
- assert isinstance(result, SearchResult)
56
+ assert isinstance(result, SearchResultWeb)
57
57
  assert hasattr(result, 'url')
58
58
  assert hasattr(result, 'title')
59
59
  assert hasattr(result, 'description')
@@ -73,7 +73,7 @@ def test_search_with_sources():
73
73
  """Test search with specific sources."""
74
74
  results = firecrawl.search(
75
75
  query="firecrawl",
76
- sources=["web", "news"],
76
+ sources=["web", "news", "images"],
77
77
  limit=3
78
78
  )
79
79
 
@@ -81,11 +81,15 @@ def test_search_with_sources():
81
81
 
82
82
  assert results.web is not None
83
83
  assert len(results.web) <= 3
84
+ assert isinstance(results.web[0], SearchResultWeb)
84
85
 
85
86
  if results.news is not None:
86
87
  assert len(results.news) <= 3
88
+ assert isinstance(results.news[0], SearchResultNews)
87
89
 
88
- assert results.images is None
90
+ if results.images is not None:
91
+ assert len(results.images) <= 3
92
+ assert isinstance(results.images[0], SearchResultImages)
89
93
 
90
94
  web_titles = [result.title.lower() for result in results.web]
91
95
  web_descriptions = [result.description.lower() for result in results.web]
@@ -193,7 +197,7 @@ def test_search_all_parameters():
193
197
 
194
198
  # Test that each result has proper structure
195
199
  for result in results.web:
196
- assert isinstance(result, (SearchResult, Document))
200
+ assert isinstance(result, (SearchResultWeb, Document))
197
201
  if isinstance(result, Document):
198
202
  # Document path: ensure content present
199
203
  assert (result.markdown is not None) or (result.html is not None)
@@ -206,7 +210,7 @@ def test_search_all_parameters():
206
210
  if results.news is not None:
207
211
  assert len(results.news) <= 3
208
212
  for result in results.news:
209
- assert isinstance(result, (SearchResult, Document))
213
+ assert isinstance(result, (SearchResultNews, Document))
210
214
  if isinstance(result, Document):
211
215
  assert (result.markdown is not None) or (result.html is not None)
212
216
  else:
@@ -11,7 +11,7 @@ class TestSearchValidation:
11
11
  request = SearchRequest(query="")
12
12
  with pytest.raises(ValueError, match="Query cannot be empty"):
13
13
  _validate_search_request(request)
14
-
14
+
15
15
  request = SearchRequest(query=" ")
16
16
  with pytest.raises(ValueError, match="Query cannot be empty"):
17
17
  _validate_search_request(request)
@@ -22,12 +22,12 @@ class TestSearchValidation:
22
22
  request = SearchRequest(query="test", limit=0)
23
23
  with pytest.raises(ValueError, match="Limit must be positive"):
24
24
  _validate_search_request(request)
25
-
25
+
26
26
  # Negative limit
27
27
  request = SearchRequest(query="test", limit=-1)
28
28
  with pytest.raises(ValueError, match="Limit must be positive"):
29
29
  _validate_search_request(request)
30
-
30
+
31
31
  # Too high limit
32
32
  request = SearchRequest(query="test", limit=101)
33
33
  with pytest.raises(ValueError, match="Limit cannot exceed 100"):
@@ -39,12 +39,12 @@ class TestSearchValidation:
39
39
  request = SearchRequest(query="test", timeout=0)
40
40
  with pytest.raises(ValueError, match="Timeout must be positive"):
41
41
  _validate_search_request(request)
42
-
42
+
43
43
  # Negative timeout
44
44
  request = SearchRequest(query="test", timeout=-1000)
45
45
  with pytest.raises(ValueError, match="Timeout must be positive"):
46
46
  _validate_search_request(request)
47
-
47
+
48
48
  # Too high timeout
49
49
  request = SearchRequest(query="test", timeout=300001)
50
50
  with pytest.raises(ValueError, match="Timeout cannot exceed 300000ms"):
@@ -56,12 +56,12 @@ class TestSearchValidation:
56
56
  request = SearchRequest(query="test", sources=["invalid_source"])
57
57
  with pytest.raises(ValueError, match="Invalid source type"):
58
58
  _validate_search_request(request)
59
-
59
+
60
60
  # Invalid object source
61
61
  request = SearchRequest(query="test", sources=[Source(type="invalid_source")])
62
62
  with pytest.raises(ValueError, match="Invalid source type"):
63
63
  _validate_search_request(request)
64
-
64
+
65
65
  # Mixed valid/invalid sources
66
66
  request = SearchRequest(query="test", sources=["web", "invalid_source"])
67
67
  with pytest.raises(ValueError, match="Invalid source type"):
@@ -73,7 +73,7 @@ class TestSearchValidation:
73
73
  request = SearchRequest(query="test", location="")
74
74
  with pytest.raises(ValueError, match="Location must be a non-empty string"):
75
75
  _validate_search_request(request)
76
-
76
+
77
77
  # Whitespace location
78
78
  request = SearchRequest(query="test", location=" ")
79
79
  with pytest.raises(ValueError, match="Location must be a non-empty string"):
@@ -82,19 +82,49 @@ class TestSearchValidation:
82
82
  def test_validate_invalid_tbs(self):
83
83
  """Test validation of invalid tbs values."""
84
84
  invalid_tbs_values = ["invalid", "qdr:x", "yesterday", "last_week"]
85
-
85
+
86
86
  for invalid_tbs in invalid_tbs_values:
87
87
  request = SearchRequest(query="test", tbs=invalid_tbs)
88
88
  with pytest.raises(ValueError, match="Invalid tbs value"):
89
89
  _validate_search_request(request)
90
90
 
91
+ def test_validate_custom_date_ranges(self):
92
+ """Test validation of custom date range formats."""
93
+ valid_custom_ranges = [
94
+ "cdr:1,cd_min:1/1/2024,cd_max:12/31/2024",
95
+ "cdr:1,cd_min:12/1/2024,cd_max:12/31/2024",
96
+ "cdr:1,cd_min:2/28/2023,cd_max:3/1/2023",
97
+ "cdr:1,cd_min:10/15/2023,cd_max:11/15/2023"
98
+ ]
99
+
100
+ for valid_range in valid_custom_ranges:
101
+ request = SearchRequest(query="test", tbs=valid_range)
102
+ validated = _validate_search_request(request)
103
+ assert validated == request
104
+
105
+ def test_validate_invalid_custom_date_ranges(self):
106
+ """Test validation of invalid custom date range formats."""
107
+ # Invalid custom date ranges
108
+ invalid_custom_ranges = [
109
+ "cdr:1,cd_min:2/28/2023", # Missing cd_max
110
+ "cdr:1,cd_max:2/28/2023", # Missing cd_min
111
+ "cdr:2,cd_min:1/1/2024,cd_max:12/31/2024", # Wrong cdr value
112
+ "cdr:cd_min:1/1/2024,cd_max:12/31/2024", # Missing :1
113
+ "custom:1,cd_min:1/1/2024,cd_max:12/31/2024" # Wrong prefix
114
+ ]
115
+
116
+ for invalid_range in invalid_custom_ranges:
117
+ request = SearchRequest(query="test", tbs=invalid_range)
118
+ with pytest.raises(ValueError, match="Invalid"):
119
+ _validate_search_request(request)
120
+
91
121
  def test_validate_valid_requests(self):
92
122
  """Test that valid requests pass validation."""
93
123
  # Minimal valid request
94
124
  request = SearchRequest(query="test")
95
125
  validated = _validate_search_request(request)
96
126
  assert validated == request
97
-
127
+
98
128
  # Request with all optional parameters
99
129
  request = SearchRequest(
100
130
  query="test query",
@@ -107,7 +137,7 @@ class TestSearchValidation:
107
137
  )
108
138
  validated = _validate_search_request(request)
109
139
  assert validated == request
110
-
140
+
111
141
  # Request with object sources
112
142
  request = SearchRequest(
113
143
  query="test",
@@ -122,17 +152,17 @@ class TestSearchValidation:
122
152
  request = SearchRequest(query="test", limit=100)
123
153
  validated = _validate_search_request(request)
124
154
  assert validated == request
125
-
155
+
126
156
  # Maximum valid timeout
127
157
  request = SearchRequest(query="test", timeout=300000)
128
158
  validated = _validate_search_request(request)
129
159
  assert validated == request
130
-
160
+
131
161
  # Minimum valid limit
132
162
  request = SearchRequest(query="test", limit=1)
133
163
  validated = _validate_search_request(request)
134
164
  assert validated == request
135
-
165
+
136
166
  # Minimum valid timeout
137
167
  request = SearchRequest(query="test", timeout=1)
138
168
  validated = _validate_search_request(request)
@@ -191,16 +221,16 @@ class TestSearchRequestModel:
191
221
  data1 = request1.model_dump(by_alias=True)
192
222
  assert "ignore_invalid_urls" in data1 # No alias, uses snake_case
193
223
  assert data1["ignore_invalid_urls"] is None
194
-
224
+
195
225
  # Test with explicit False value
196
226
  request2 = SearchRequest(
197
227
  query="test",
198
228
  ignore_invalid_urls=False,
199
229
  scrape_options=ScrapeOptions(formats=["markdown"])
200
230
  )
201
-
231
+
202
232
  # Check that aliases are used in model_dump with by_alias=True
203
233
  data2 = request2.model_dump(by_alias=True)
204
234
  assert "ignore_invalid_urls" in data2 # No alias, uses snake_case
205
235
  assert "scrape_options" in data2 # No alias, uses snake_case
206
- assert data2["ignore_invalid_urls"] is False
236
+ assert data2["ignore_invalid_urls"] is False
@@ -25,6 +25,7 @@ import logging
25
25
  from .v1 import V1FirecrawlApp, AsyncV1FirecrawlApp
26
26
  from .v2 import FirecrawlClient as V2FirecrawlClient
27
27
  from .v2.client_async import AsyncFirecrawlClient
28
+ from .v2.types import Document
28
29
 
29
30
  logger = logging.getLogger("firecrawl")
30
31
 
@@ -48,7 +48,9 @@ from .v2.types import (
48
48
  JsonFormat,
49
49
  FormatOption,
50
50
  SearchRequest,
51
- SearchResult,
51
+ SearchResultWeb,
52
+ SearchResultNews,
53
+ SearchResultImages,
52
54
  SearchData,
53
55
  SearchResponse,
54
56
 
@@ -124,7 +126,9 @@ __all__ = [
124
126
  'JsonFormat',
125
127
  'FormatOption',
126
128
  'SearchRequest',
127
- 'SearchResult',
129
+ 'SearchResultWeb',
130
+ 'SearchResultNews',
131
+ 'SearchResultImages',
128
132
  'SearchData',
129
133
  'SearchResponse',
130
134
 
@@ -14,6 +14,7 @@ from ...types import (
14
14
  from ...utils.error_handler import handle_response_error
15
15
  from ...utils.validation import prepare_scrape_options
16
16
  from ...utils.http_client_async import AsyncHttpClient
17
+ from ...utils.normalize import normalize_document_input
17
18
 
18
19
 
19
20
  def _prepare_crawl_request(request: CrawlRequest) -> dict:
@@ -76,11 +77,7 @@ async def get_crawl_status(client: AsyncHttpClient, job_id: str) -> CrawlJob:
76
77
  documents = []
77
78
  for doc_data in body.get("data", []):
78
79
  if isinstance(doc_data, dict):
79
- normalized = dict(doc_data)
80
- if 'rawHtml' in normalized and 'raw_html' not in normalized:
81
- normalized['raw_html'] = normalized.pop('rawHtml')
82
- if 'changeTracking' in normalized and 'change_tracking' not in normalized:
83
- normalized['change_tracking'] = normalized.pop('changeTracking')
80
+ normalized = normalize_document_input(doc_data)
84
81
  documents.append(Document(**normalized))
85
82
  return CrawlJob(
86
83
  status=body.get("status"),
@@ -1,5 +1,6 @@
1
1
  from typing import Optional, Dict, Any
2
2
  from ...types import ScrapeOptions, Document
3
+ from ...utils.normalize import normalize_document_input
3
4
  from ...utils.error_handler import handle_response_error
4
5
  from ...utils.validation import prepare_scrape_options, validate_scrape_options
5
6
  from ...utils.http_client_async import AsyncHttpClient
@@ -27,10 +28,6 @@ async def scrape(client: AsyncHttpClient, url: str, options: Optional[ScrapeOpti
27
28
  if not body.get("success"):
28
29
  raise Exception(body.get("error", "Unknown error occurred"))
29
30
  document_data = body.get("data", {})
30
- normalized = dict(document_data)
31
- if 'rawHtml' in normalized and 'raw_html' not in normalized:
32
- normalized['raw_html'] = normalized.pop('rawHtml')
33
- if 'changeTracking' in normalized and 'change_tracking' not in normalized:
34
- normalized['change_tracking'] = normalized.pop('changeTracking')
31
+ normalized = normalize_document_input(document_data)
35
32
  return Document(**normalized)
36
33