firecrawl 3.3.2__py3-none-any.whl → 3.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

firecrawl/__init__.py CHANGED
@@ -17,7 +17,7 @@ from .v1 import (
17
17
  V1ChangeTrackingOptions,
18
18
  )
19
19
 
20
- __version__ = "3.3.2"
20
+ __version__ = "3.4.0"
21
21
 
22
22
  # Define the logger for the Firecrawl project
23
23
  logger: logging.Logger = logging.getLogger("firecrawl")
@@ -151,4 +151,40 @@ class TestScrapeE2E:
151
151
  max_age=0,
152
152
  store_in_cache=False,
153
153
  )
154
- assert isinstance(doc, Document)
154
+ assert isinstance(doc, Document)
155
+
156
+ def test_scrape_images_format(self):
157
+ """Test images format extraction."""
158
+ doc = self.client.scrape(
159
+ "https://firecrawl.dev",
160
+ formats=["images"]
161
+ )
162
+ assert isinstance(doc, Document)
163
+ assert doc.images is not None
164
+ assert isinstance(doc.images, list)
165
+ assert len(doc.images) > 0
166
+ # Should find firecrawl logo/branding images
167
+ assert any("firecrawl" in img.lower() or "logo" in img.lower() for img in doc.images)
168
+
169
+ def test_scrape_images_with_multiple_formats(self):
170
+ """Test images format works with other formats."""
171
+ doc = self.client.scrape(
172
+ "https://github.com",
173
+ formats=["markdown", "links", "images"]
174
+ )
175
+ assert isinstance(doc, Document)
176
+ assert doc.markdown is not None
177
+ assert doc.links is not None
178
+ assert doc.images is not None
179
+ assert isinstance(doc.images, list)
180
+ assert len(doc.images) > 0
181
+
182
+ # Images should find content not available in links format
183
+ image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.svg', '.ico']
184
+ link_images = [
185
+ link for link in (doc.links or [])
186
+ if any(ext in link.lower() for ext in image_extensions)
187
+ ]
188
+
189
+ # Should discover additional images beyond those with obvious extensions
190
+ assert len(doc.images) >= len(link_images)
firecrawl/client.py CHANGED
@@ -56,7 +56,6 @@ class V2Proxy:
56
56
  self._client = client_instance
57
57
 
58
58
  if client_instance:
59
- # self.scrape = client_instance.scrape
60
59
  self.search = client_instance.search
61
60
  self.crawl = client_instance.crawl
62
61
  self.get_crawl_status = client_instance.get_crawl_status
@@ -168,14 +167,17 @@ class Firecrawl:
168
167
  self.v1 = V1Proxy(self._v1_client) if self._v1_client else None
169
168
  self.v2 = V2Proxy(self._v2_client)
170
169
 
171
-
172
170
  self.scrape = self._v2_client.scrape
171
+ self.search = self._v2_client.search
172
+ self.map = self._v2_client.map
173
+
173
174
  self.crawl = self._v2_client.crawl
174
175
  self.start_crawl = self._v2_client.start_crawl
175
176
  self.crawl_params_preview = self._v2_client.crawl_params_preview
176
177
  self.get_crawl_status = self._v2_client.get_crawl_status
177
178
  self.cancel_crawl = self._v2_client.cancel_crawl
178
179
  self.get_crawl_errors = self._v2_client.get_crawl_errors
180
+ self.get_active_crawls = self._v2_client.get_active_crawls
179
181
  self.active_crawls = self._v2_client.active_crawls
180
182
 
181
183
  self.start_batch_scrape = self._v2_client.start_batch_scrape
@@ -183,13 +185,15 @@ class Firecrawl:
183
185
  self.cancel_batch_scrape = self._v2_client.cancel_batch_scrape
184
186
  self.batch_scrape = self._v2_client.batch_scrape
185
187
  self.get_batch_scrape_errors = self._v2_client.get_batch_scrape_errors
188
+
189
+ self.start_extract = self._v2_client.start_extract
186
190
  self.get_extract_status = self._v2_client.get_extract_status
187
- self.map = self._v2_client.map
188
- self.search = self._v2_client.search
189
191
  self.extract = self._v2_client.extract
192
+
190
193
  self.get_concurrency = self._v2_client.get_concurrency
191
194
  self.get_credit_usage = self._v2_client.get_credit_usage
192
195
  self.get_token_usage = self._v2_client.get_token_usage
196
+
193
197
  self.watcher = self._v2_client.watcher
194
198
 
195
199
  class AsyncFirecrawl:
firecrawl/v2/types.py CHANGED
@@ -114,6 +114,12 @@ class DocumentMetadata(BaseModel):
114
114
  def coerce_status_code_to_int(cls, v):
115
115
  return cls._coerce_string_to_int(v)
116
116
 
117
+ class AttributeResult(BaseModel):
118
+ """Result of attribute extraction."""
119
+ selector: str
120
+ attribute: str
121
+ values: List[str]
122
+
117
123
  class Document(BaseModel):
118
124
  """A scraped document."""
119
125
  markdown: Optional[str] = None
@@ -123,6 +129,7 @@ class Document(BaseModel):
123
129
  summary: Optional[str] = None
124
130
  metadata: Optional[DocumentMetadata] = None
125
131
  links: Optional[List[str]] = None
132
+ images: Optional[List[str]] = None
126
133
  screenshot: Optional[str] = None
127
134
  actions: Optional[Dict[str, Any]] = None
128
135
  warning: Optional[str] = None
@@ -182,7 +189,7 @@ CategoryOption = Union[str, Category]
182
189
 
183
190
  FormatString = Literal[
184
191
  # camelCase versions (API format)
185
- "markdown", "html", "rawHtml", "links", "screenshot", "summary", "changeTracking", "json",
192
+ "markdown", "html", "rawHtml", "links", "images", "screenshot", "summary", "changeTracking", "json", "attributes",
186
193
  # snake_case versions (user-friendly)
187
194
  "raw_html", "change_tracking"
188
195
  ]
@@ -214,9 +221,18 @@ class ScreenshotFormat(BaseModel):
214
221
  full_page: Optional[bool] = None
215
222
  quality: Optional[int] = None
216
223
  viewport: Optional[Union[Dict[str, int], Viewport]] = None
224
+
225
+ class AttributeSelector(BaseModel):
226
+ """Selector and attribute pair for attribute extraction."""
227
+ selector: str
228
+ attribute: str
217
229
 
218
- FormatOption = Union[Dict[str, Any], FormatString, JsonFormat, ChangeTrackingFormat, ScreenshotFormat, Format]
230
+ class AttributesFormat(Format):
231
+ """Configuration for attribute extraction."""
232
+ type: Literal["attributes"] = "attributes"
233
+ selectors: List[AttributeSelector]
219
234
 
235
+ FormatOption = Union[Dict[str, Any], FormatString, JsonFormat, ChangeTrackingFormat, ScreenshotFormat, AttributesFormat, Format]
220
236
  # Scrape types
221
237
  class ScrapeFormats(BaseModel):
222
238
  """Output formats for scraping."""
@@ -226,6 +242,7 @@ class ScrapeFormats(BaseModel):
226
242
  raw_html: bool = False
227
243
  summary: bool = False
228
244
  links: bool = False
245
+ images: bool = False
229
246
  screenshot: bool = False
230
247
  change_tracking: bool = False
231
248
  json: bool = False
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: firecrawl
3
- Version: 3.3.2
3
+ Version: 3.4.0
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/firecrawl/firecrawl
6
6
  Author: Mendable.ai
@@ -38,8 +38,12 @@ Requires-Dist: httpx
38
38
  Requires-Dist: python-dotenv
39
39
  Requires-Dist: websockets
40
40
  Requires-Dist: nest-asyncio
41
- Requires-Dist: pydantic (>=2.0)
41
+ Requires-Dist: pydantic>=2.0
42
42
  Requires-Dist: aiohttp
43
+ Dynamic: author
44
+ Dynamic: home-page
45
+ Dynamic: license-file
46
+ Dynamic: requires-python
43
47
 
44
48
  # Firecrawl Python SDK
45
49
 
@@ -1,5 +1,5 @@
1
- firecrawl/__init__.py,sha256=l_9wjt7rVPqg4ovtOFC46I_8LghOX_jDFoEXuSavJJ0,2192
2
- firecrawl/client.py,sha256=2BGIRTiW2eR6q3wu_g2s3VTQtrHYauoDeNF1YklQpHo,11089
1
+ firecrawl/__init__.py,sha256=Czdc4XJmhkDKcbSuq1wZsiWfezM8v9AUz0K_DEfOH-A,2192
2
+ firecrawl/client.py,sha256=tp3mUo_3aGPuZ53kpU4bhM-5EtwD_IUWrJ7wm0GMuCc,11159
3
3
  firecrawl/firecrawl.backup.py,sha256=v1FEN3jR4g5Aupg4xp6SLkuFvYMQuUKND2YELbYjE6c,200430
4
4
  firecrawl/types.py,sha256=W9N2pqQuevEIIjYHN9rbDf31E-nwdCECqIn11Foz2T8,2836
5
5
  firecrawl/__tests__/e2e/v2/conftest.py,sha256=I28TUpN5j0-9gM79NlbrDS8Jlsheao657od2f-2xK0Y,2587
@@ -8,7 +8,7 @@ firecrawl/__tests__/e2e/v2/test_batch_scrape.py,sha256=H9GtuwHIFdOQ958SOVThi_kvD
8
8
  firecrawl/__tests__/e2e/v2/test_crawl.py,sha256=cOssZvIwtghAtLiM1QdNLhPEwAxZ9j9umTrBUPtJjpU,9951
9
9
  firecrawl/__tests__/e2e/v2/test_extract.py,sha256=HgvGiDlyWtFygiPo5EP44Dem1oWrwgRF-hfc1LfeVSU,1670
10
10
  firecrawl/__tests__/e2e/v2/test_map.py,sha256=9sT-Yq8V_8c9esl_bv5hnTA9WXb2Dg81kj6M-s0484c,1618
11
- firecrawl/__tests__/e2e/v2/test_scrape.py,sha256=psW2nfcA_hMFpZ4msL_VJWJTMa3Sidp11ubhftbm52g,5759
11
+ firecrawl/__tests__/e2e/v2/test_scrape.py,sha256=oyroF_WaEdxgD8t_SHkLBBfDRv1_6xZ_7vSTQpwlmA8,7198
12
12
  firecrawl/__tests__/e2e/v2/test_search.py,sha256=tvU9_eg_3H5em0fhIwPPjuYe9BRAQ5St-BLM0l_FfVs,9079
13
13
  firecrawl/__tests__/e2e/v2/test_usage.py,sha256=JlBkYblhThua5qF2crRjsPpq4Ja0cBsdzxZ5zxXnQ_Y,805
14
14
  firecrawl/__tests__/e2e/v2/test_watcher.py,sha256=OPTKLhVAKWqXl2Tieo6zCN1xpEwZDsz-B977CVJgLMA,1932
@@ -45,7 +45,7 @@ firecrawl/v1/client.py,sha256=sydurfEFTsXyowyaGryA1lkPxN_r9Nf6iQpM43OwJyM,201672
45
45
  firecrawl/v2/__init__.py,sha256=Jc6a8tBjYG5OPkjDM5pl-notyys-7DEj7PLEfepv3fc,137
46
46
  firecrawl/v2/client.py,sha256=_DZFZO1aWvODzznK0g2Svcd2-xxXgWGR0d9vniNlk1w,30621
47
47
  firecrawl/v2/client_async.py,sha256=zwxHis1bSh0tSF1480ze-4XDQEDJ5yDur1ZqtL94dwc,10127
48
- firecrawl/v2/types.py,sha256=rBdTaTQmb1SmdR8O0GvA_gCfBG-QCtBOrMsFgA9Usms,22114
48
+ firecrawl/v2/types.py,sha256=F-RCADQFdpAmF5t8LUabLOgyIV02Ol34yNa9y3S3ZMg,22667
49
49
  firecrawl/v2/watcher.py,sha256=FOU71tqSKxgeuGycu4ye0SLc2dw7clIcoQjPsi-4Csc,14229
50
50
  firecrawl/v2/watcher_async.py,sha256=AVjW2mgABniolSsauK4u0FW8ya6WzRUdyEg2R-8vGCw,10278
51
51
  firecrawl/v2/methods/batch.py,sha256=us7zUGl7u9ZDIEk2J3rNqj87bkaNjXU27SMFW_fdcg8,11932
@@ -70,10 +70,10 @@ firecrawl/v2/utils/http_client.py,sha256=_n8mp4xi6GGihg662Lsv6TSlvw9zykyADwEk0fg
70
70
  firecrawl/v2/utils/http_client_async.py,sha256=iy89_bk2HS3afSRHZ8016eMCa9Fk-5MFTntcOHfbPgE,1936
71
71
  firecrawl/v2/utils/normalize.py,sha256=nlTU6QRghT1YKZzNZlIQj4STSRuSUGrS9cCErZIcY5w,3636
72
72
  firecrawl/v2/utils/validation.py,sha256=L8by7z-t6GuMGIYkK7il1BM8d-4_-sAdG9hDMF_LeG4,14518
73
+ firecrawl-3.4.0.dist-info/licenses/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
73
74
  tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
74
75
  tests/test_timeout_conversion.py,sha256=PWlIEMASQNhu4cp1OW_ebklnE9NCiigPnEFCtI5N3w0,3996
75
- firecrawl-3.3.2.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
76
- firecrawl-3.3.2.dist-info/METADATA,sha256=rARO6Cg0dIX46x3MEerpa6ymhnviGgrJDe7mGyZV1Jw,7313
77
- firecrawl-3.3.2.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
78
- firecrawl-3.3.2.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
79
- firecrawl-3.3.2.dist-info/RECORD,,
76
+ firecrawl-3.4.0.dist-info/METADATA,sha256=Gyv7wpufayVyAsLKfllRJihHmw43p5UH_lFDJf6i-mU,7392
77
+ firecrawl-3.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
78
+ firecrawl-3.4.0.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
79
+ firecrawl-3.4.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.38.4)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5