firecrawl-py 3.3.1__py3-none-any.whl → 3.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl-py might be problematic. Click here for more details.

Files changed (84) hide show
  1. firecrawl/__init__.py +1 -1
  2. firecrawl/__tests__/e2e/v2/test_scrape.py +37 -1
  3. firecrawl/client.py +8 -4
  4. firecrawl/v2/types.py +19 -2
  5. {firecrawl_py-3.3.1.dist-info → firecrawl_py-3.3.3.dist-info}/METADATA +7 -3
  6. firecrawl_py-3.3.3.dist-info/RECORD +79 -0
  7. {firecrawl_py-3.3.1.dist-info → firecrawl_py-3.3.3.dist-info}/WHEEL +1 -1
  8. {firecrawl_py-3.3.1.dist-info → firecrawl_py-3.3.3.dist-info/licenses}/LICENSE +0 -0
  9. {firecrawl_py-3.3.1.dist-info → firecrawl_py-3.3.3.dist-info}/top_level.txt +0 -2
  10. build/lib/firecrawl/__init__.py +0 -87
  11. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +0 -79
  12. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +0 -188
  13. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +0 -38
  14. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +0 -40
  15. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +0 -137
  16. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +0 -248
  17. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +0 -35
  18. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +0 -43
  19. build/lib/firecrawl/__tests__/e2e/v2/conftest.py +0 -73
  20. build/lib/firecrawl/__tests__/e2e/v2/test_async.py +0 -73
  21. build/lib/firecrawl/__tests__/e2e/v2/test_batch_scrape.py +0 -105
  22. build/lib/firecrawl/__tests__/e2e/v2/test_crawl.py +0 -276
  23. build/lib/firecrawl/__tests__/e2e/v2/test_extract.py +0 -54
  24. build/lib/firecrawl/__tests__/e2e/v2/test_map.py +0 -60
  25. build/lib/firecrawl/__tests__/e2e/v2/test_scrape.py +0 -154
  26. build/lib/firecrawl/__tests__/e2e/v2/test_search.py +0 -269
  27. build/lib/firecrawl/__tests__/e2e/v2/test_usage.py +0 -26
  28. build/lib/firecrawl/__tests__/e2e/v2/test_watcher.py +0 -65
  29. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +0 -12
  30. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +0 -61
  31. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +0 -12
  32. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +0 -19
  33. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +0 -50
  34. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +0 -63
  35. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +0 -28
  36. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +0 -117
  37. build/lib/firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +0 -90
  38. build/lib/firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +0 -70
  39. build/lib/firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +0 -240
  40. build/lib/firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +0 -107
  41. build/lib/firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +0 -53
  42. build/lib/firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +0 -92
  43. build/lib/firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +0 -167
  44. build/lib/firecrawl/__tests__/unit/v2/methods/test_search_validation.py +0 -236
  45. build/lib/firecrawl/__tests__/unit/v2/methods/test_usage_types.py +0 -18
  46. build/lib/firecrawl/__tests__/unit/v2/methods/test_webhook.py +0 -123
  47. build/lib/firecrawl/__tests__/unit/v2/utils/test_validation.py +0 -290
  48. build/lib/firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +0 -332
  49. build/lib/firecrawl/client.py +0 -242
  50. build/lib/firecrawl/firecrawl.backup.py +0 -4635
  51. build/lib/firecrawl/types.py +0 -161
  52. build/lib/firecrawl/v1/__init__.py +0 -14
  53. build/lib/firecrawl/v1/client.py +0 -4653
  54. build/lib/firecrawl/v2/__init__.py +0 -4
  55. build/lib/firecrawl/v2/client.py +0 -805
  56. build/lib/firecrawl/v2/client_async.py +0 -250
  57. build/lib/firecrawl/v2/methods/aio/__init__.py +0 -1
  58. build/lib/firecrawl/v2/methods/aio/batch.py +0 -85
  59. build/lib/firecrawl/v2/methods/aio/crawl.py +0 -171
  60. build/lib/firecrawl/v2/methods/aio/extract.py +0 -126
  61. build/lib/firecrawl/v2/methods/aio/map.py +0 -59
  62. build/lib/firecrawl/v2/methods/aio/scrape.py +0 -33
  63. build/lib/firecrawl/v2/methods/aio/search.py +0 -172
  64. build/lib/firecrawl/v2/methods/aio/usage.py +0 -42
  65. build/lib/firecrawl/v2/methods/batch.py +0 -417
  66. build/lib/firecrawl/v2/methods/crawl.py +0 -469
  67. build/lib/firecrawl/v2/methods/extract.py +0 -131
  68. build/lib/firecrawl/v2/methods/map.py +0 -77
  69. build/lib/firecrawl/v2/methods/scrape.py +0 -64
  70. build/lib/firecrawl/v2/methods/search.py +0 -197
  71. build/lib/firecrawl/v2/methods/usage.py +0 -41
  72. build/lib/firecrawl/v2/types.py +0 -665
  73. build/lib/firecrawl/v2/utils/__init__.py +0 -9
  74. build/lib/firecrawl/v2/utils/error_handler.py +0 -107
  75. build/lib/firecrawl/v2/utils/get_version.py +0 -15
  76. build/lib/firecrawl/v2/utils/http_client.py +0 -153
  77. build/lib/firecrawl/v2/utils/http_client_async.py +0 -65
  78. build/lib/firecrawl/v2/utils/normalize.py +0 -107
  79. build/lib/firecrawl/v2/utils/validation.py +0 -324
  80. build/lib/firecrawl/v2/watcher.py +0 -301
  81. build/lib/firecrawl/v2/watcher_async.py +0 -242
  82. build/lib/tests/test_change_tracking.py +0 -98
  83. build/lib/tests/test_timeout_conversion.py +0 -117
  84. firecrawl_py-3.3.1.dist-info/RECORD +0 -153
firecrawl/__init__.py CHANGED
@@ -17,7 +17,7 @@ from .v1 import (
17
17
  V1ChangeTrackingOptions,
18
18
  )
19
19
 
20
- __version__ = "3.3.1"
20
+ __version__ = "3.3.3"
21
21
 
22
22
  # Define the logger for the Firecrawl project
23
23
  logger: logging.Logger = logging.getLogger("firecrawl")
@@ -151,4 +151,40 @@ class TestScrapeE2E:
151
151
  max_age=0,
152
152
  store_in_cache=False,
153
153
  )
154
- assert isinstance(doc, Document)
154
+ assert isinstance(doc, Document)
155
+
156
+ def test_scrape_images_format(self):
157
+ """Test images format extraction."""
158
+ doc = self.client.scrape(
159
+ "https://firecrawl.dev",
160
+ formats=["images"]
161
+ )
162
+ assert isinstance(doc, Document)
163
+ assert doc.images is not None
164
+ assert isinstance(doc.images, list)
165
+ assert len(doc.images) > 0
166
+ # Should find firecrawl logo/branding images
167
+ assert any("firecrawl" in img.lower() or "logo" in img.lower() for img in doc.images)
168
+
169
+ def test_scrape_images_with_multiple_formats(self):
170
+ """Test images format works with other formats."""
171
+ doc = self.client.scrape(
172
+ "https://github.com",
173
+ formats=["markdown", "links", "images"]
174
+ )
175
+ assert isinstance(doc, Document)
176
+ assert doc.markdown is not None
177
+ assert doc.links is not None
178
+ assert doc.images is not None
179
+ assert isinstance(doc.images, list)
180
+ assert len(doc.images) > 0
181
+
182
+ # Images should find content not available in links format
183
+ image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.svg', '.ico']
184
+ link_images = [
185
+ link for link in (doc.links or [])
186
+ if any(ext in link.lower() for ext in image_extensions)
187
+ ]
188
+
189
+ # Should discover additional images beyond those with obvious extensions
190
+ assert len(doc.images) >= len(link_images)
firecrawl/client.py CHANGED
@@ -56,7 +56,6 @@ class V2Proxy:
56
56
  self._client = client_instance
57
57
 
58
58
  if client_instance:
59
- # self.scrape = client_instance.scrape
60
59
  self.search = client_instance.search
61
60
  self.crawl = client_instance.crawl
62
61
  self.get_crawl_status = client_instance.get_crawl_status
@@ -168,14 +167,17 @@ class Firecrawl:
168
167
  self.v1 = V1Proxy(self._v1_client) if self._v1_client else None
169
168
  self.v2 = V2Proxy(self._v2_client)
170
169
 
171
-
172
170
  self.scrape = self._v2_client.scrape
171
+ self.search = self._v2_client.search
172
+ self.map = self._v2_client.map
173
+
173
174
  self.crawl = self._v2_client.crawl
174
175
  self.start_crawl = self._v2_client.start_crawl
175
176
  self.crawl_params_preview = self._v2_client.crawl_params_preview
176
177
  self.get_crawl_status = self._v2_client.get_crawl_status
177
178
  self.cancel_crawl = self._v2_client.cancel_crawl
178
179
  self.get_crawl_errors = self._v2_client.get_crawl_errors
180
+ self.get_active_crawls = self._v2_client.get_active_crawls
179
181
  self.active_crawls = self._v2_client.active_crawls
180
182
 
181
183
  self.start_batch_scrape = self._v2_client.start_batch_scrape
@@ -183,13 +185,15 @@ class Firecrawl:
183
185
  self.cancel_batch_scrape = self._v2_client.cancel_batch_scrape
184
186
  self.batch_scrape = self._v2_client.batch_scrape
185
187
  self.get_batch_scrape_errors = self._v2_client.get_batch_scrape_errors
188
+
189
+ self.start_extract = self._v2_client.start_extract
186
190
  self.get_extract_status = self._v2_client.get_extract_status
187
- self.map = self._v2_client.map
188
- self.search = self._v2_client.search
189
191
  self.extract = self._v2_client.extract
192
+
190
193
  self.get_concurrency = self._v2_client.get_concurrency
191
194
  self.get_credit_usage = self._v2_client.get_credit_usage
192
195
  self.get_token_usage = self._v2_client.get_token_usage
196
+
193
197
  self.watcher = self._v2_client.watcher
194
198
 
195
199
  class AsyncFirecrawl:
firecrawl/v2/types.py CHANGED
@@ -114,6 +114,12 @@ class DocumentMetadata(BaseModel):
114
114
  def coerce_status_code_to_int(cls, v):
115
115
  return cls._coerce_string_to_int(v)
116
116
 
117
+ class AttributeResult(BaseModel):
118
+ """Result of attribute extraction."""
119
+ selector: str
120
+ attribute: str
121
+ values: List[str]
122
+
117
123
  class Document(BaseModel):
118
124
  """A scraped document."""
119
125
  markdown: Optional[str] = None
@@ -123,6 +129,7 @@ class Document(BaseModel):
123
129
  summary: Optional[str] = None
124
130
  metadata: Optional[DocumentMetadata] = None
125
131
  links: Optional[List[str]] = None
132
+ images: Optional[List[str]] = None
126
133
  screenshot: Optional[str] = None
127
134
  actions: Optional[Dict[str, Any]] = None
128
135
  warning: Optional[str] = None
@@ -182,7 +189,7 @@ CategoryOption = Union[str, Category]
182
189
 
183
190
  FormatString = Literal[
184
191
  # camelCase versions (API format)
185
- "markdown", "html", "rawHtml", "links", "screenshot", "summary", "changeTracking", "json",
192
+ "markdown", "html", "rawHtml", "links", "images", "screenshot", "summary", "changeTracking", "json", "attributes",
186
193
  # snake_case versions (user-friendly)
187
194
  "raw_html", "change_tracking"
188
195
  ]
@@ -214,9 +221,18 @@ class ScreenshotFormat(BaseModel):
214
221
  full_page: Optional[bool] = None
215
222
  quality: Optional[int] = None
216
223
  viewport: Optional[Union[Dict[str, int], Viewport]] = None
224
+
225
+ class AttributeSelector(BaseModel):
226
+ """Selector and attribute pair for attribute extraction."""
227
+ selector: str
228
+ attribute: str
217
229
 
218
- FormatOption = Union[Dict[str, Any], FormatString, JsonFormat, ChangeTrackingFormat, ScreenshotFormat, Format]
230
+ class AttributesFormat(Format):
231
+ """Configuration for attribute extraction."""
232
+ type: Literal["attributes"] = "attributes"
233
+ selectors: List[AttributeSelector]
219
234
 
235
+ FormatOption = Union[Dict[str, Any], FormatString, JsonFormat, ChangeTrackingFormat, ScreenshotFormat, AttributesFormat, Format]
220
236
  # Scrape types
221
237
  class ScrapeFormats(BaseModel):
222
238
  """Output formats for scraping."""
@@ -226,6 +242,7 @@ class ScrapeFormats(BaseModel):
226
242
  raw_html: bool = False
227
243
  summary: bool = False
228
244
  links: bool = False
245
+ images: bool = False
229
246
  screenshot: bool = False
230
247
  change_tracking: bool = False
231
248
  json: bool = False
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: firecrawl-py
3
- Version: 3.3.1
3
+ Version: 3.3.3
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/firecrawl/firecrawl
6
6
  Author: Mendable.ai
@@ -38,8 +38,12 @@ Requires-Dist: httpx
38
38
  Requires-Dist: python-dotenv
39
39
  Requires-Dist: websockets
40
40
  Requires-Dist: nest-asyncio
41
- Requires-Dist: pydantic (>=2.0)
41
+ Requires-Dist: pydantic>=2.0
42
42
  Requires-Dist: aiohttp
43
+ Dynamic: author
44
+ Dynamic: home-page
45
+ Dynamic: license-file
46
+ Dynamic: requires-python
43
47
 
44
48
  # Firecrawl Python SDK
45
49
 
@@ -0,0 +1,79 @@
1
+ firecrawl/__init__.py,sha256=1MYT5_7-p8sfruL_5y1m1n9AoWG_6aNduWGW4NId86M,2192
2
+ firecrawl/client.py,sha256=tp3mUo_3aGPuZ53kpU4bhM-5EtwD_IUWrJ7wm0GMuCc,11159
3
+ firecrawl/firecrawl.backup.py,sha256=v1FEN3jR4g5Aupg4xp6SLkuFvYMQuUKND2YELbYjE6c,200430
4
+ firecrawl/types.py,sha256=W9N2pqQuevEIIjYHN9rbDf31E-nwdCECqIn11Foz2T8,2836
5
+ firecrawl/__tests__/e2e/v2/conftest.py,sha256=I28TUpN5j0-9gM79NlbrDS8Jlsheao657od2f-2xK0Y,2587
6
+ firecrawl/__tests__/e2e/v2/test_async.py,sha256=ZXpf1FVOJgNclITglrxIyFwP4cOiqzWLicGaxIm70BQ,2526
7
+ firecrawl/__tests__/e2e/v2/test_batch_scrape.py,sha256=H9GtuwHIFdOQ958SOVThi_kvDDxcXAK_ECRh95ogonQ,3265
8
+ firecrawl/__tests__/e2e/v2/test_crawl.py,sha256=cOssZvIwtghAtLiM1QdNLhPEwAxZ9j9umTrBUPtJjpU,9951
9
+ firecrawl/__tests__/e2e/v2/test_extract.py,sha256=HgvGiDlyWtFygiPo5EP44Dem1oWrwgRF-hfc1LfeVSU,1670
10
+ firecrawl/__tests__/e2e/v2/test_map.py,sha256=9sT-Yq8V_8c9esl_bv5hnTA9WXb2Dg81kj6M-s0484c,1618
11
+ firecrawl/__tests__/e2e/v2/test_scrape.py,sha256=oyroF_WaEdxgD8t_SHkLBBfDRv1_6xZ_7vSTQpwlmA8,7198
12
+ firecrawl/__tests__/e2e/v2/test_search.py,sha256=tvU9_eg_3H5em0fhIwPPjuYe9BRAQ5St-BLM0l_FfVs,9079
13
+ firecrawl/__tests__/e2e/v2/test_usage.py,sha256=JlBkYblhThua5qF2crRjsPpq4Ja0cBsdzxZ5zxXnQ_Y,805
14
+ firecrawl/__tests__/e2e/v2/test_watcher.py,sha256=OPTKLhVAKWqXl2Tieo6zCN1xpEwZDsz-B977CVJgLMA,1932
15
+ firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py,sha256=gJv_mLzzoAYftETB2TLkrpSfB5c04kaYgkD4hQTYsIg,2639
16
+ firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py,sha256=X-nk5tkYUYIkM6kTYl7GDjvxh2JT9GxJqk2KlO8xpWw,7282
17
+ firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py,sha256=3CNRIFzgBMcOYOLhnKcK1k5a3Gy--u08EGDkL31uieM,1199
18
+ firecrawl/__tests__/e2e/v2/aio/test_aio_map.py,sha256=nckl1kbiEaaTdu5lm__tOoTDG-txTYwwSH3KZEvyKzc,1199
19
+ firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py,sha256=b17A7advBEjxrjdait2w8GHztZeKy_P3zZ3ixm5H7xw,4453
20
+ firecrawl/__tests__/e2e/v2/aio/test_aio_search.py,sha256=ehV0Ai_hknAkaoE551j2lbktV4bi_J0h3FKzC7G15Iw,8246
21
+ firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py,sha256=Dh9BVo48NKSZOKgLbO7n8fpMjvYmeMXDFzbIhnCTMhE,1014
22
+ firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py,sha256=hwES4Nu5c0hniZ9heIPDfvh_2JmJ2wPoX9ULTZ0Asjs,1471
23
+ firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py,sha256=HeOxN-sPYSssytcIRAEicJSZsFt_Oa5qGXAtdumR54c,4040
24
+ firecrawl/__tests__/unit/v2/methods/test_crawl_params.py,sha256=p9hzg14uAs1iHKXPDSXhGU6hEzPBF_Ae34RAf5XYa10,2387
25
+ firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py,sha256=PEKbooNXfQwPpvcPHXABJnveztgAA-RFBhtlSs8uPro,8780
26
+ firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py,sha256=kErOmHSD01eMjXiMd4rgsMVGd_aU2G9uVymBjbAFoGw,3918
27
+ firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py,sha256=toVcgnMp_cFeYsIUuyKGEWZGp0nAAkzaeFGUbY0zY0o,1868
28
+ firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py,sha256=wDOslsA5BN4kyezlaT5GeMv_Ifn8f461EaA7i5ujnaQ,3482
29
+ firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py,sha256=14lUgFpQsiosgMKjDustBRVE0zXnHujBI76F8BC5PZ4,6072
30
+ firecrawl/__tests__/unit/v2/methods/test_search_validation.py,sha256=7UGcNHpQzCpZbAPYjthfdPFWmAPcoApY-ED-khtuANs,9498
31
+ firecrawl/__tests__/unit/v2/methods/test_usage_types.py,sha256=cCHHfa6agSjD0brQ9rcAcw2kaI9riUH5C0dXV-fqktg,591
32
+ firecrawl/__tests__/unit/v2/methods/test_webhook.py,sha256=AvvW-bKpUA--Lvtif2bmUIp-AxiaMJ29ie1i9dk8WbI,4586
33
+ firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py,sha256=9azJxVvDOBqUevLp-wBF9gF7Ptj-7nN6LOkPQncFX2M,456
34
+ firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py,sha256=RkIKt7uxBzVhAkrLQwXYjmC-9sj32SUNQrJZgF2WEMs,2565
35
+ firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py,sha256=WMgltdrrT2HOflqGyahC4v-Wb29_8sypN0hwS9lYXe8,403
36
+ firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py,sha256=PdUJrR0JLWqrithAnRXwuRrnsIN2h_DTu6-xvTOn_UU,725
37
+ firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py,sha256=A5DT4wpH4vrIPvFxKVHrtDH5A3bgJ_ad4fmVQ8LN1t0,1993
38
+ firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py,sha256=hFk4XgqF3aFPGFJe0ikB1uwf_0FsppNGA088OrWUXvg,2091
39
+ firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py,sha256=E26UnUhpbjG-EG0ab4WRD94AxA5IBWmIHq8ZLBOWoAA,1202
40
+ firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py,sha256=pUwuWhRbVUTbgsZn4hgZesMkTMesTv_NPmvFW--ls-Y,3815
41
+ firecrawl/__tests__/unit/v2/utils/test_validation.py,sha256=E4n4jpBhH_W7E0ikI5r8KMAKiOhbfGD3i_B8-dv3PlI,10803
42
+ firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py,sha256=87w47n0iOihtu4jTR4-4rw1-xVKWmLg2BOBGxjQPnUk,9517
43
+ firecrawl/v1/__init__.py,sha256=aP1oisPeZVGGZynvENc07JySMOZfv_4zAlxQ0ecMJXA,481
44
+ firecrawl/v1/client.py,sha256=sydurfEFTsXyowyaGryA1lkPxN_r9Nf6iQpM43OwJyM,201672
45
+ firecrawl/v2/__init__.py,sha256=Jc6a8tBjYG5OPkjDM5pl-notyys-7DEj7PLEfepv3fc,137
46
+ firecrawl/v2/client.py,sha256=_DZFZO1aWvODzznK0g2Svcd2-xxXgWGR0d9vniNlk1w,30621
47
+ firecrawl/v2/client_async.py,sha256=zwxHis1bSh0tSF1480ze-4XDQEDJ5yDur1ZqtL94dwc,10127
48
+ firecrawl/v2/types.py,sha256=F-RCADQFdpAmF5t8LUabLOgyIV02Ol34yNa9y3S3ZMg,22667
49
+ firecrawl/v2/watcher.py,sha256=FOU71tqSKxgeuGycu4ye0SLc2dw7clIcoQjPsi-4Csc,14229
50
+ firecrawl/v2/watcher_async.py,sha256=AVjW2mgABniolSsauK4u0FW8ya6WzRUdyEg2R-8vGCw,10278
51
+ firecrawl/v2/methods/batch.py,sha256=us7zUGl7u9ZDIEk2J3rNqj87bkaNjXU27SMFW_fdcg8,11932
52
+ firecrawl/v2/methods/crawl.py,sha256=4ZUmanHNuNtq9wbKMAZ3lenuPcNdOaV0kYXqMI5XJJ8,15485
53
+ firecrawl/v2/methods/extract.py,sha256=-Jr4BtraU3b7hd3JIY73V-S69rUclxyXyUpoQb6DCQk,4274
54
+ firecrawl/v2/methods/map.py,sha256=4SADb0-lkbdOWDmO6k8_TzK0yRti5xsN40N45nUl9uA,2592
55
+ firecrawl/v2/methods/scrape.py,sha256=CSHBwC-P91UfrW3zHirjNAs2h899FKcWvd1DY_4fJdo,1921
56
+ firecrawl/v2/methods/search.py,sha256=6BKiQ1aKJjWBKm9BBtKxFKGD74kCKBeMIp_OgjcDFAw,7673
57
+ firecrawl/v2/methods/usage.py,sha256=OJlkxwaB-AAtgO3WLr9QiqBRmjdh6GVhroCgleegupQ,1460
58
+ firecrawl/v2/methods/aio/__init__.py,sha256=RocMJnGwnLIvGu3G8ZvY8INkipC7WHZiu2bE31eSyJs,35
59
+ firecrawl/v2/methods/aio/batch.py,sha256=GS_xsd_Uib1fxFITBK1sH88VGzFMrIcqJVQqOvMQ540,3735
60
+ firecrawl/v2/methods/aio/crawl.py,sha256=pC6bHVk30Hj1EJdAChxpMOg0Xx_GVqq4tIlvU2e5RQ4,6688
61
+ firecrawl/v2/methods/aio/extract.py,sha256=IfNr2ETqt4dR73JFzrEYI4kk5vpKnJOG0BmPEjGEoO4,4217
62
+ firecrawl/v2/methods/aio/map.py,sha256=EuT-5A0cQr_e5SBfEZ6pnl8u0JUwEEvSwhyT2N-QoKU,2326
63
+ firecrawl/v2/methods/aio/scrape.py,sha256=ilA9qco8YGwCFpE0PN1XBQUyuHPQwH2QioZ-xsfxhgU,1386
64
+ firecrawl/v2/methods/aio/search.py,sha256=_TqTFGQLlOCCLNdWcOvakTqPGD2r9AOlBg8RasOgmvw,6177
65
+ firecrawl/v2/methods/aio/usage.py,sha256=OtBi6X-aT09MMR2dpm3vBCm9JrJZIJLCQ8jJ3L7vie4,1606
66
+ firecrawl/v2/utils/__init__.py,sha256=i1GgxySmqEXpWSBQCu3iZBPIJG7fXj0QXCDWGwerWNs,338
67
+ firecrawl/v2/utils/error_handler.py,sha256=Iuf916dHphDY8ObNNlWy75628DFeJ0Rv8ljRp4LttLE,4199
68
+ firecrawl/v2/utils/get_version.py,sha256=0CxW_41q2hlzIxEWOivUCaYw3GFiSIH32RPUMcIgwAY,492
69
+ firecrawl/v2/utils/http_client.py,sha256=_n8mp4xi6GGihg662Lsv6TSlvw9zykyADwEk0fg8mYA,4873
70
+ firecrawl/v2/utils/http_client_async.py,sha256=iy89_bk2HS3afSRHZ8016eMCa9Fk-5MFTntcOHfbPgE,1936
71
+ firecrawl/v2/utils/normalize.py,sha256=nlTU6QRghT1YKZzNZlIQj4STSRuSUGrS9cCErZIcY5w,3636
72
+ firecrawl/v2/utils/validation.py,sha256=L8by7z-t6GuMGIYkK7il1BM8d-4_-sAdG9hDMF_LeG4,14518
73
+ firecrawl_py-3.3.3.dist-info/licenses/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
74
+ tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
75
+ tests/test_timeout_conversion.py,sha256=PWlIEMASQNhu4cp1OW_ebklnE9NCiigPnEFCtI5N3w0,3996
76
+ firecrawl_py-3.3.3.dist-info/METADATA,sha256=_5tGMWJrCEIJy1UCLKbPAgV6iczF2_T4aRpLhpoL0F4,7395
77
+ firecrawl_py-3.3.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
78
+ firecrawl_py-3.3.3.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
79
+ firecrawl_py-3.3.3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.38.4)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,4 +1,2 @@
1
- build
2
- dist
3
1
  firecrawl
4
2
  tests
@@ -1,87 +0,0 @@
1
- """
2
- Firecrawl Python SDK
3
-
4
- """
5
-
6
- import logging
7
- import os
8
-
9
- from .client import Firecrawl, AsyncFirecrawl, FirecrawlApp, AsyncFirecrawlApp
10
- from .v2.watcher import Watcher
11
- from .v2.watcher_async import AsyncWatcher
12
- from .v1 import (
13
- V1FirecrawlApp,
14
- AsyncV1FirecrawlApp,
15
- V1JsonConfig,
16
- V1ScrapeOptions,
17
- V1ChangeTrackingOptions,
18
- )
19
-
20
- __version__ = "3.3.1"
21
-
22
- # Define the logger for the Firecrawl project
23
- logger: logging.Logger = logging.getLogger("firecrawl")
24
-
25
-
26
- def _configure_logger() -> None:
27
- """
28
- Configure the firecrawl logger for console output.
29
-
30
- The function attaches a handler for console output with a specific format and date
31
- format to the firecrawl logger.
32
- """
33
- try:
34
- formatter = logging.Formatter(
35
- "[%(asctime)s - %(name)s:%(lineno)d - %(levelname)s] %(message)s",
36
- datefmt="%Y-%m-%d %H:%M:%S",
37
- )
38
-
39
- console_handler = logging.StreamHandler()
40
- console_handler.setFormatter(formatter)
41
-
42
- logger.addHandler(console_handler)
43
- except Exception as e:
44
- logger.error("Failed to configure logging: %s", e)
45
-
46
-
47
- def setup_logging() -> None:
48
- """Set up logging based on the FIRECRAWL_LOGGING_LEVEL environment variable."""
49
- if logger.hasHandlers():
50
- return
51
-
52
- if not (env := os.getenv("FIRECRAWL_LOGGING_LEVEL", "").upper()):
53
- logger.addHandler(logging.NullHandler())
54
- return
55
-
56
- _configure_logger()
57
-
58
- if env == "DEBUG":
59
- logger.setLevel(logging.DEBUG)
60
- elif env == "INFO":
61
- logger.setLevel(logging.INFO)
62
- elif env == "WARNING":
63
- logger.setLevel(logging.WARNING)
64
- elif env == "ERROR":
65
- logger.setLevel(logging.ERROR)
66
- elif env == "CRITICAL":
67
- logger.setLevel(logging.CRITICAL)
68
- else:
69
- logger.setLevel(logging.INFO)
70
- logger.warning("Unknown logging level: %s, defaulting to INFO", env)
71
-
72
- setup_logging()
73
- logger.debug("Debugging logger setup")
74
-
75
- __all__ = [
76
- 'Firecrawl',
77
- 'AsyncFirecrawl',
78
- 'FirecrawlApp',
79
- 'AsyncFirecrawlApp',
80
- 'Watcher',
81
- 'AsyncWatcher',
82
- 'V1FirecrawlApp',
83
- 'AsyncV1FirecrawlApp',
84
- 'V1JsonConfig',
85
- 'V1ScrapeOptions',
86
- 'V1ChangeTrackingOptions',
87
- ]
@@ -1,79 +0,0 @@
1
- import os
2
- import asyncio
3
- import pytest
4
- from dotenv import load_dotenv
5
- from firecrawl import AsyncFirecrawl
6
-
7
-
8
- load_dotenv()
9
-
10
- if not os.getenv("API_KEY"):
11
- raise ValueError("API_KEY is not set")
12
-
13
- if not os.getenv("API_URL"):
14
- raise ValueError("API_URL is not set")
15
-
16
-
17
- @pytest.mark.asyncio
18
- async def test_async_batch_start_and_status():
19
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
20
- start = await client.start_batch_scrape([
21
- "https://docs.firecrawl.dev",
22
- "https://firecrawl.dev",
23
- ], formats=["markdown"], max_concurrency=1)
24
- job_id = start.id
25
-
26
- deadline = asyncio.get_event_loop().time() + 240
27
- status = await client.get_batch_scrape_status(job_id)
28
- while status.status not in ("completed", "failed", "cancelled") and asyncio.get_event_loop().time() < deadline:
29
- await asyncio.sleep(2)
30
- status = await client.get_batch_scrape_status(job_id)
31
-
32
- assert status.status in ("completed", "failed", "cancelled")
33
-
34
-
35
- @pytest.mark.asyncio
36
- async def test_async_batch_wait_minimal():
37
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
38
- job = await client.batch_scrape([
39
- "https://docs.firecrawl.dev",
40
- "https://firecrawl.dev",
41
- ], formats=["markdown"], poll_interval=1, timeout=120)
42
- assert job.status in ("completed", "failed")
43
-
44
-
45
- @pytest.mark.asyncio
46
- async def test_async_batch_wait_with_all_params():
47
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
48
- json_schema = {"type": "object", "properties": {"title": {"type": "string"}}, "required": ["title"]}
49
- job = await client.batch_scrape(
50
- [
51
- "https://docs.firecrawl.dev",
52
- "https://firecrawl.dev",
53
- ],
54
- formats=[
55
- "markdown",
56
- {"type": "json", "prompt": "Extract page title", "schema": json_schema},
57
- {"type": "changeTracking", "prompt": "Track changes", "modes": ["json"]},
58
- ],
59
- only_main_content=True,
60
- mobile=False,
61
- ignore_invalid_urls=True,
62
- max_concurrency=2,
63
- zero_data_retention=False,
64
- poll_interval=1,
65
- timeout=180,
66
- )
67
- assert job.status in ("completed", "failed")
68
-
69
-
70
- @pytest.mark.asyncio
71
- async def test_async_cancel_batch():
72
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
73
- start = await client.start_batch_scrape([
74
- "https://docs.firecrawl.dev",
75
- "https://firecrawl.dev",
76
- ], formats=["markdown"], max_concurrency=1)
77
- ok = await client.cancel_batch_scrape(start.id)
78
- assert ok is True
79
-
@@ -1,188 +0,0 @@
1
- import os
2
- import asyncio
3
- import pytest
4
- from dotenv import load_dotenv
5
- from firecrawl import AsyncFirecrawl
6
- from firecrawl.v2.types import ScrapeOptions
7
-
8
-
9
- load_dotenv()
10
-
11
- if not os.getenv("API_KEY"):
12
- raise ValueError("API_KEY is not set")
13
-
14
- if not os.getenv("API_URL"):
15
- raise ValueError("API_URL is not set")
16
-
17
-
18
- @pytest.mark.asyncio
19
- async def test_async_crawl_start_and_status():
20
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
21
- start = await client.start_crawl("https://docs.firecrawl.dev", limit=2)
22
- job_id = start.id
23
-
24
- deadline = asyncio.get_event_loop().time() + 180
25
- status = await client.get_crawl_status(job_id)
26
- while status.status not in ("completed", "failed") and asyncio.get_event_loop().time() < deadline:
27
- await asyncio.sleep(2)
28
- status = await client.get_crawl_status(job_id)
29
-
30
- assert status.status in ("completed", "failed")
31
-
32
-
33
- @pytest.mark.asyncio
34
- async def test_async_crawl_with_all_params():
35
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
36
- # rich scrape options including json format
37
- json_schema = {
38
- "type": "object",
39
- "properties": {"title": {"type": "string"}},
40
- "required": ["title"],
41
- }
42
- status = await client.crawl(
43
- url="https://docs.firecrawl.dev",
44
- prompt="Extract docs and blog",
45
- include_paths=["/docs/*", "/blog/*"],
46
- exclude_paths=["/admin/*"],
47
- max_discovery_depth=2,
48
- ignore_sitemap=False,
49
- ignore_query_parameters=True,
50
- limit=5,
51
- crawl_entire_domain=False,
52
- allow_external_links=True,
53
- allow_subdomains=True,
54
- delay=1,
55
- max_concurrency=2,
56
- webhook="https://example.com/hook",
57
- scrape_options=ScrapeOptions(
58
- formats=[
59
- "markdown",
60
- "rawHtml",
61
- {"type": "json", "prompt": "Extract title", "schema": json_schema},
62
- ],
63
- only_main_content=True,
64
- mobile=False,
65
- timeout=20000,
66
- wait_for=500,
67
- skip_tls_verification=False,
68
- remove_base64_images=False,
69
- ),
70
- zero_data_retention=False,
71
- poll_interval=2,
72
- timeout=180,
73
- )
74
- assert status.status in ("completed", "failed")
75
-
76
-
77
- @pytest.mark.asyncio
78
- async def test_async_start_crawl_with_options():
79
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
80
- start = await client.start_crawl("https://docs.firecrawl.dev", limit=5, max_discovery_depth=2)
81
- assert start.id is not None and start.url is not None
82
-
83
-
84
- @pytest.mark.asyncio
85
- async def test_async_start_crawl_with_prompt():
86
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
87
- start = await client.start_crawl("https://firecrawl.dev", prompt="Extract all blog posts", limit=3)
88
- assert start.id is not None and start.url is not None
89
-
90
-
91
- @pytest.mark.asyncio
92
- async def test_async_get_crawl_status_shape():
93
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
94
- start = await client.start_crawl("https://docs.firecrawl.dev", limit=3)
95
- status = await client.get_crawl_status(start.id)
96
- assert status.status in ("scraping", "completed", "failed")
97
- assert status.completed >= 0
98
- assert status.expires_at is not None
99
- assert isinstance(status.data, list)
100
-
101
-
102
- @pytest.mark.asyncio
103
- async def test_async_crawl_with_wait():
104
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
105
- job = await client.crawl(url="https://docs.firecrawl.dev", limit=3, max_discovery_depth=2, poll_interval=1, timeout=120)
106
- assert job.status in ("completed", "failed")
107
- assert job.completed >= 0 and job.total >= 0 and isinstance(job.data, list)
108
-
109
-
110
- @pytest.mark.asyncio
111
- async def test_async_crawl_with_prompt_and_wait():
112
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
113
- job = await client.crawl(url="https://docs.firecrawl.dev", prompt="Extract all blog posts", limit=3, poll_interval=1, timeout=120)
114
- assert job.status in ("completed", "failed")
115
- assert job.completed >= 0 and job.total >= 0 and isinstance(job.data, list)
116
-
117
-
118
- @pytest.mark.asyncio
119
- async def test_async_crawl_with_scrape_options():
120
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
121
- scrape_opts = ScrapeOptions(formats=["markdown", "links"], only_main_content=False, mobile=True)
122
- start = await client.start_crawl("https://docs.firecrawl.dev", limit=2, scrape_options=scrape_opts)
123
- assert start.id is not None
124
-
125
-
126
- @pytest.mark.asyncio
127
- async def test_async_crawl_with_json_format_object():
128
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
129
- json_schema = {"type": "object", "properties": {"title": {"type": "string"}}, "required": ["title"]}
130
- scrape_opts = ScrapeOptions(formats=[{"type": "json", "prompt": "Extract page title", "schema": json_schema}])
131
- start = await client.start_crawl("https://docs.firecrawl.dev", limit=2, scrape_options=scrape_opts)
132
- assert start.id is not None
133
-
134
-
135
- @pytest.mark.asyncio
136
- async def test_async_cancel_crawl():
137
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
138
- start = await client.start_crawl("https://docs.firecrawl.dev", limit=3)
139
- cancelled = await client.cancel_crawl(start.id)
140
- assert cancelled is True
141
-
142
-
143
- @pytest.mark.asyncio
144
- async def test_async_get_crawl_errors_and_invalid_job():
145
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
146
- start = await client.start_crawl("https://docs.firecrawl.dev", limit=2)
147
- errs = await client.get_crawl_errors(start.id)
148
- assert hasattr(errs, "errors") and hasattr(errs, "robots_blocked")
149
- with pytest.raises(Exception):
150
- await client.get_crawl_errors("invalid-job-id-12345")
151
-
152
-
153
- @pytest.mark.asyncio
154
- async def test_async_active_crawls():
155
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
156
- resp = await client.active_crawls()
157
- assert hasattr(resp, "success") and hasattr(resp, "crawls")
158
-
159
-
160
- @pytest.mark.asyncio
161
- async def test_async_active_crawls_with_running_crawl():
162
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
163
- start = await client.start_crawl("https://docs.firecrawl.dev", limit=3)
164
- # fetch active crawls and assert our ID is listed
165
- active = await client.active_crawls()
166
- ids = [c.id for c in active.crawls]
167
- assert start.id in ids
168
- # cleanup
169
- await client.cancel_crawl(start.id)
170
-
171
-
172
- @pytest.mark.asyncio
173
- async def test_async_crawl_params_preview():
174
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
175
- params = await client.crawl_params_preview(
176
- url="https://docs.firecrawl.dev",
177
- prompt="Extract all blog posts and documentation",
178
- )
179
- assert params is not None
180
- # basic sanity: at least one field should be suggested
181
- has_any = any([
182
- getattr(params, "limit", None) is not None,
183
- getattr(params, "include_paths", None) is not None,
184
- getattr(params, "max_discovery_depth", None) is not None,
185
- ])
186
- assert has_any
187
-
188
-
@@ -1,38 +0,0 @@
1
- import os
2
- import pytest
3
- from dotenv import load_dotenv
4
- from firecrawl import AsyncFirecrawl
5
-
6
-
7
- load_dotenv()
8
-
9
- if not os.getenv("API_KEY"):
10
- raise ValueError("API_KEY is not set")
11
-
12
- if not os.getenv("API_URL"):
13
- raise ValueError("API_URL is not set")
14
-
15
-
16
- @pytest.mark.asyncio
17
- async def test_async_extract_minimal():
18
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
19
- res = await client.extract(urls=["https://docs.firecrawl.dev"], prompt="Extract title")
20
- assert res is not None
21
-
22
-
23
- @pytest.mark.asyncio
24
- async def test_async_extract_with_schema_and_options():
25
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
26
- schema = {"type": "object", "properties": {"title": {"type": "string"}}, "required": ["title"]}
27
- res = await client.extract(
28
- urls=["https://docs.firecrawl.dev"],
29
- prompt="Extract title",
30
- schema=schema,
31
- system_prompt="You are a helpful extractor",
32
- allow_external_links=False,
33
- enable_web_search=False,
34
- show_sources=False,
35
- # agent={"model": "FIRE-1", "prompt": "Extract title"}, # Skipping agent test in CI
36
- )
37
- assert res is not None
38
-