firecrawl 1.2.4__tar.gz → 1.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 1.2.4
3
+ Version: 1.3.1
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
@@ -76,7 +76,6 @@ crawl_status = app.crawl_url(
76
76
  'limit': 100,
77
77
  'scrapeOptions': {'formats': ['markdown', 'html']}
78
78
  },
79
- wait_until_done=True,
80
79
  poll_interval=30
81
80
  )
82
81
  print(crawl_status)
@@ -36,7 +36,6 @@ crawl_status = app.crawl_url(
36
36
  'limit': 100,
37
37
  'scrapeOptions': {'formats': ['markdown', 'html']}
38
38
  },
39
- wait_until_done=True,
40
39
  poll_interval=30
41
40
  )
42
41
  print(crawl_status)
@@ -13,7 +13,7 @@ import os
13
13
 
14
14
  from .firecrawl import FirecrawlApp
15
15
 
16
- __version__ = "1.2.4"
16
+ __version__ = "1.3.1"
17
17
 
18
18
  # Define the logger for the Firecrawl project
19
19
  logger: logging.Logger = logging.getLogger("firecrawl")
@@ -117,7 +117,14 @@ class FirecrawlApp:
117
117
  idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
118
118
 
119
119
  Returns:
120
- Any: The crawl job ID or the crawl results if waiting until completion.
120
+ Dict[str, Any]: A dictionary containing the crawl results. The structure includes:
121
+ - 'success' (bool): Indicates if the crawl was successful.
122
+ - 'status' (str): The final status of the crawl job (e.g., 'completed').
123
+ - 'completed' (int): Number of scraped pages that completed.
124
+ - 'total' (int): Total number of scraped pages.
125
+ - 'creditsUsed' (int): Estimated number of API credits used for this crawl.
126
+ - 'expiresAt' (str): ISO 8601 formatted date-time string indicating when the crawl data expires.
127
+ - 'data' (List[Dict]): List of all the scraped pages.
121
128
 
122
129
  Raises:
123
130
  Exception: If the crawl job initiation or monitoring fails.
@@ -146,7 +153,10 @@ class FirecrawlApp:
146
153
  idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
147
154
 
148
155
  Returns:
149
- Dict[str, Any]: The response from the crawl initiation request.
156
+ Dict[str, Any]: A dictionary containing the crawl initiation response. The structure includes:
157
+ - 'success' (bool): Indicates if the crawl initiation was successful.
158
+ - 'id' (str): The unique identifier for the crawl job.
159
+ - 'url' (str): The URL to check the status of the crawl job.
150
160
  """
151
161
  endpoint = f'/v1/crawl'
152
162
  headers = self._prepare_headers(idempotency_key)
@@ -191,6 +201,23 @@ class FirecrawlApp:
191
201
  }
192
202
  else:
193
203
  self._handle_error(response, 'check crawl status')
204
+
205
+ def cancel_crawl(self, id: str) -> Dict[str, Any]:
206
+ """
207
+ Cancel an asynchronous crawl job using the Firecrawl API.
208
+
209
+ Args:
210
+ id (str): The ID of the crawl job to cancel.
211
+
212
+ Returns:
213
+ Dict[str, Any]: The response from the cancel crawl request.
214
+ """
215
+ headers = self._prepare_headers()
216
+ response = self._delete_request(f'{self.api_url}/v1/crawl/{id}', headers)
217
+ if response.status_code == 200:
218
+ return response.json()
219
+ else:
220
+ self._handle_error(response, "cancel crawl job")
194
221
 
195
222
  def crawl_url_and_watch(self, url: str, params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> 'CrawlWatcher':
196
223
  """
@@ -219,7 +246,7 @@ class FirecrawlApp:
219
246
  params (Optional[Dict[str, Any]]): Additional parameters for the map search.
220
247
 
221
248
  Returns:
222
- Any: The result of the map search, typically a dictionary containing mapping data.
249
+ List[str]: A list of URLs discovered during the map search.
223
250
  """
224
251
  endpoint = f'/v1/map'
225
252
  headers = self._prepare_headers()
@@ -228,7 +255,7 @@ class FirecrawlApp:
228
255
  json_data = {'url': url}
229
256
  if params:
230
257
  json_data.update(params)
231
-
258
+
232
259
  # Make the POST request with the prepared headers and JSON data
233
260
  response = requests.post(
234
261
  f'{self.api_url}{endpoint}',
@@ -238,7 +265,7 @@ class FirecrawlApp:
238
265
  if response.status_code == 200:
239
266
  response = response.json()
240
267
  if response['success'] and 'links' in response:
241
- return response['links']
268
+ return response
242
269
  else:
243
270
  raise Exception(f'Failed to map URL. Error: {response["error"]}')
244
271
  else:
@@ -321,6 +348,33 @@ class FirecrawlApp:
321
348
  else:
322
349
  return response
323
350
  return response
351
+
352
+ def _delete_request(self, url: str,
353
+ headers: Dict[str, str],
354
+ retries: int = 3,
355
+ backoff_factor: float = 0.5) -> requests.Response:
356
+ """
357
+ Make a DELETE request with retries.
358
+
359
+ Args:
360
+ url (str): The URL to send the DELETE request to.
361
+ headers (Dict[str, str]): The headers to include in the DELETE request.
362
+ retries (int): Number of retries for the request.
363
+ backoff_factor (float): Backoff factor for retries.
364
+
365
+ Returns:
366
+ requests.Response: The response from the DELETE request.
367
+
368
+ Raises:
369
+ requests.RequestException: If the request fails after the specified retries.
370
+ """
371
+ for attempt in range(retries):
372
+ response = requests.delete(url, headers=headers)
373
+ if response.status_code == 502:
374
+ time.sleep(backoff_factor * (2 ** attempt))
375
+ else:
376
+ return response
377
+ return response
324
378
 
325
379
  def _monitor_job_status(self, id: str, headers: Dict[str, str], poll_interval: int) -> Any:
326
380
  """
@@ -434,4 +488,4 @@ class CrawlWatcher:
434
488
  self.dispatch_event('document', doc)
435
489
  elif msg['type'] == 'document':
436
490
  self.data.append(msg['data'])
437
- self.dispatch_event('document', msg['data'])
491
+ self.dispatch_event('document', msg['data'])
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 1.2.4
3
+ Version: 1.3.1
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
@@ -76,7 +76,6 @@ crawl_status = app.crawl_url(
76
76
  'limit': 100,
77
77
  'scrapeOptions': {'formats': ['markdown', 'html']}
78
78
  },
79
- wait_until_done=True,
80
79
  poll_interval=30
81
80
  )
82
81
  print(crawl_status)
File without changes
File without changes
File without changes
File without changes