firecrawl 1.2.3__tar.gz → 1.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of firecrawl might be problematic. Click here for more details.
- {firecrawl-1.2.3 → firecrawl-1.3.0}/PKG-INFO +1 -2
- {firecrawl-1.2.3 → firecrawl-1.3.0}/firecrawl/__init__.py +1 -1
- {firecrawl-1.2.3 → firecrawl-1.3.0}/firecrawl/firecrawl.py +47 -4
- {firecrawl-1.2.3 → firecrawl-1.3.0}/firecrawl.egg-info/PKG-INFO +1 -2
- {firecrawl-1.2.3 → firecrawl-1.3.0}/firecrawl.egg-info/requires.txt +0 -1
- {firecrawl-1.2.3 → firecrawl-1.3.0}/pyproject.toml +1 -2
- {firecrawl-1.2.3 → firecrawl-1.3.0}/LICENSE +0 -0
- {firecrawl-1.2.3 → firecrawl-1.3.0}/README.md +0 -0
- {firecrawl-1.2.3 → firecrawl-1.3.0}/firecrawl/__tests__/e2e_withAuth/__init__.py +0 -0
- {firecrawl-1.2.3 → firecrawl-1.3.0}/firecrawl/__tests__/e2e_withAuth/test.py +0 -0
- {firecrawl-1.2.3 → firecrawl-1.3.0}/firecrawl/__tests__/v1/e2e_withAuth/__init__.py +0 -0
- {firecrawl-1.2.3 → firecrawl-1.3.0}/firecrawl/__tests__/v1/e2e_withAuth/test.py +0 -0
- {firecrawl-1.2.3 → firecrawl-1.3.0}/firecrawl.egg-info/SOURCES.txt +0 -0
- {firecrawl-1.2.3 → firecrawl-1.3.0}/firecrawl.egg-info/dependency_links.txt +0 -0
- {firecrawl-1.2.3 → firecrawl-1.3.0}/firecrawl.egg-info/top_level.txt +0 -0
- {firecrawl-1.2.3 → firecrawl-1.3.0}/setup.cfg +0 -0
- {firecrawl-1.2.3 → firecrawl-1.3.0}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: firecrawl
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.0
|
|
4
4
|
Summary: Python SDK for Firecrawl API
|
|
5
5
|
Home-page: https://github.com/mendableai/firecrawl
|
|
6
6
|
Author: Mendable.ai
|
|
@@ -36,7 +36,6 @@ License-File: LICENSE
|
|
|
36
36
|
Requires-Dist: requests
|
|
37
37
|
Requires-Dist: python-dotenv
|
|
38
38
|
Requires-Dist: websockets
|
|
39
|
-
Requires-Dist: asyncio
|
|
40
39
|
Requires-Dist: nest-asyncio
|
|
41
40
|
|
|
42
41
|
# Firecrawl Python SDK
|
|
@@ -13,7 +13,6 @@ import logging
|
|
|
13
13
|
import os
|
|
14
14
|
import time
|
|
15
15
|
from typing import Any, Dict, Optional, List
|
|
16
|
-
import asyncio
|
|
17
16
|
import json
|
|
18
17
|
|
|
19
18
|
import requests
|
|
@@ -192,6 +191,23 @@ class FirecrawlApp:
|
|
|
192
191
|
}
|
|
193
192
|
else:
|
|
194
193
|
self._handle_error(response, 'check crawl status')
|
|
194
|
+
|
|
195
|
+
def cancel_crawl(self, id: str) -> Dict[str, Any]:
|
|
196
|
+
"""
|
|
197
|
+
Cancel an asynchronous crawl job using the Firecrawl API.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
id (str): The ID of the crawl job to cancel.
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
Dict[str, Any]: The response from the cancel crawl request.
|
|
204
|
+
"""
|
|
205
|
+
headers = self._prepare_headers()
|
|
206
|
+
response = self._delete_request(f'{self.api_url}/v1/crawl/{id}', headers)
|
|
207
|
+
if response.status_code == 200:
|
|
208
|
+
return response.json()
|
|
209
|
+
else:
|
|
210
|
+
self._handle_error(response, "cancel crawl job")
|
|
195
211
|
|
|
196
212
|
def crawl_url_and_watch(self, url: str, params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> 'CrawlWatcher':
|
|
197
213
|
"""
|
|
@@ -229,7 +245,7 @@ class FirecrawlApp:
|
|
|
229
245
|
json_data = {'url': url}
|
|
230
246
|
if params:
|
|
231
247
|
json_data.update(params)
|
|
232
|
-
|
|
248
|
+
|
|
233
249
|
# Make the POST request with the prepared headers and JSON data
|
|
234
250
|
response = requests.post(
|
|
235
251
|
f'{self.api_url}{endpoint}',
|
|
@@ -239,7 +255,7 @@ class FirecrawlApp:
|
|
|
239
255
|
if response.status_code == 200:
|
|
240
256
|
response = response.json()
|
|
241
257
|
if response['success'] and 'links' in response:
|
|
242
|
-
return response
|
|
258
|
+
return response
|
|
243
259
|
else:
|
|
244
260
|
raise Exception(f'Failed to map URL. Error: {response["error"]}')
|
|
245
261
|
else:
|
|
@@ -322,6 +338,33 @@ class FirecrawlApp:
|
|
|
322
338
|
else:
|
|
323
339
|
return response
|
|
324
340
|
return response
|
|
341
|
+
|
|
342
|
+
def _delete_request(self, url: str,
|
|
343
|
+
headers: Dict[str, str],
|
|
344
|
+
retries: int = 3,
|
|
345
|
+
backoff_factor: float = 0.5) -> requests.Response:
|
|
346
|
+
"""
|
|
347
|
+
Make a DELETE request with retries.
|
|
348
|
+
|
|
349
|
+
Args:
|
|
350
|
+
url (str): The URL to send the DELETE request to.
|
|
351
|
+
headers (Dict[str, str]): The headers to include in the DELETE request.
|
|
352
|
+
retries (int): Number of retries for the request.
|
|
353
|
+
backoff_factor (float): Backoff factor for retries.
|
|
354
|
+
|
|
355
|
+
Returns:
|
|
356
|
+
requests.Response: The response from the DELETE request.
|
|
357
|
+
|
|
358
|
+
Raises:
|
|
359
|
+
requests.RequestException: If the request fails after the specified retries.
|
|
360
|
+
"""
|
|
361
|
+
for attempt in range(retries):
|
|
362
|
+
response = requests.delete(url, headers=headers)
|
|
363
|
+
if response.status_code == 502:
|
|
364
|
+
time.sleep(backoff_factor * (2 ** attempt))
|
|
365
|
+
else:
|
|
366
|
+
return response
|
|
367
|
+
return response
|
|
325
368
|
|
|
326
369
|
def _monitor_job_status(self, id: str, headers: Dict[str, str], poll_interval: int) -> Any:
|
|
327
370
|
"""
|
|
@@ -435,4 +478,4 @@ class CrawlWatcher:
|
|
|
435
478
|
self.dispatch_event('document', doc)
|
|
436
479
|
elif msg['type'] == 'document':
|
|
437
480
|
self.data.append(msg['data'])
|
|
438
|
-
self.dispatch_event('document', msg['data'])
|
|
481
|
+
self.dispatch_event('document', msg['data'])
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: firecrawl
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.0
|
|
4
4
|
Summary: Python SDK for Firecrawl API
|
|
5
5
|
Home-page: https://github.com/mendableai/firecrawl
|
|
6
6
|
Author: Mendable.ai
|
|
@@ -36,7 +36,6 @@ License-File: LICENSE
|
|
|
36
36
|
Requires-Dist: requests
|
|
37
37
|
Requires-Dist: python-dotenv
|
|
38
38
|
Requires-Dist: websockets
|
|
39
|
-
Requires-Dist: asyncio
|
|
40
39
|
Requires-Dist: nest-asyncio
|
|
41
40
|
|
|
42
41
|
# Firecrawl Python SDK
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|