firecrawl 1.2.4__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of firecrawl might be problematic. Click here for more details.
- firecrawl/__init__.py +1 -1
- firecrawl/firecrawl.py +47 -3
- {firecrawl-1.2.4.dist-info → firecrawl-1.3.0.dist-info}/METADATA +1 -1
- firecrawl-1.3.0.dist-info/RECORD +11 -0
- firecrawl-1.2.4.dist-info/RECORD +0 -11
- {firecrawl-1.2.4.dist-info → firecrawl-1.3.0.dist-info}/LICENSE +0 -0
- {firecrawl-1.2.4.dist-info → firecrawl-1.3.0.dist-info}/WHEEL +0 -0
- {firecrawl-1.2.4.dist-info → firecrawl-1.3.0.dist-info}/top_level.txt +0 -0
firecrawl/__init__.py
CHANGED
firecrawl/firecrawl.py
CHANGED
|
@@ -191,6 +191,23 @@ class FirecrawlApp:
|
|
|
191
191
|
}
|
|
192
192
|
else:
|
|
193
193
|
self._handle_error(response, 'check crawl status')
|
|
194
|
+
|
|
195
|
+
def cancel_crawl(self, id: str) -> Dict[str, Any]:
|
|
196
|
+
"""
|
|
197
|
+
Cancel an asynchronous crawl job using the Firecrawl API.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
id (str): The ID of the crawl job to cancel.
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
Dict[str, Any]: The response from the cancel crawl request.
|
|
204
|
+
"""
|
|
205
|
+
headers = self._prepare_headers()
|
|
206
|
+
response = self._delete_request(f'{self.api_url}/v1/crawl/{id}', headers)
|
|
207
|
+
if response.status_code == 200:
|
|
208
|
+
return response.json()
|
|
209
|
+
else:
|
|
210
|
+
self._handle_error(response, "cancel crawl job")
|
|
194
211
|
|
|
195
212
|
def crawl_url_and_watch(self, url: str, params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> 'CrawlWatcher':
|
|
196
213
|
"""
|
|
@@ -228,7 +245,7 @@ class FirecrawlApp:
|
|
|
228
245
|
json_data = {'url': url}
|
|
229
246
|
if params:
|
|
230
247
|
json_data.update(params)
|
|
231
|
-
|
|
248
|
+
|
|
232
249
|
# Make the POST request with the prepared headers and JSON data
|
|
233
250
|
response = requests.post(
|
|
234
251
|
f'{self.api_url}{endpoint}',
|
|
@@ -238,7 +255,7 @@ class FirecrawlApp:
|
|
|
238
255
|
if response.status_code == 200:
|
|
239
256
|
response = response.json()
|
|
240
257
|
if response['success'] and 'links' in response:
|
|
241
|
-
return response
|
|
258
|
+
return response
|
|
242
259
|
else:
|
|
243
260
|
raise Exception(f'Failed to map URL. Error: {response["error"]}')
|
|
244
261
|
else:
|
|
@@ -321,6 +338,33 @@ class FirecrawlApp:
|
|
|
321
338
|
else:
|
|
322
339
|
return response
|
|
323
340
|
return response
|
|
341
|
+
|
|
342
|
+
def _delete_request(self, url: str,
|
|
343
|
+
headers: Dict[str, str],
|
|
344
|
+
retries: int = 3,
|
|
345
|
+
backoff_factor: float = 0.5) -> requests.Response:
|
|
346
|
+
"""
|
|
347
|
+
Make a DELETE request with retries.
|
|
348
|
+
|
|
349
|
+
Args:
|
|
350
|
+
url (str): The URL to send the DELETE request to.
|
|
351
|
+
headers (Dict[str, str]): The headers to include in the DELETE request.
|
|
352
|
+
retries (int): Number of retries for the request.
|
|
353
|
+
backoff_factor (float): Backoff factor for retries.
|
|
354
|
+
|
|
355
|
+
Returns:
|
|
356
|
+
requests.Response: The response from the DELETE request.
|
|
357
|
+
|
|
358
|
+
Raises:
|
|
359
|
+
requests.RequestException: If the request fails after the specified retries.
|
|
360
|
+
"""
|
|
361
|
+
for attempt in range(retries):
|
|
362
|
+
response = requests.delete(url, headers=headers)
|
|
363
|
+
if response.status_code == 502:
|
|
364
|
+
time.sleep(backoff_factor * (2 ** attempt))
|
|
365
|
+
else:
|
|
366
|
+
return response
|
|
367
|
+
return response
|
|
324
368
|
|
|
325
369
|
def _monitor_job_status(self, id: str, headers: Dict[str, str], poll_interval: int) -> Any:
|
|
326
370
|
"""
|
|
@@ -434,4 +478,4 @@ class CrawlWatcher:
|
|
|
434
478
|
self.dispatch_event('document', doc)
|
|
435
479
|
elif msg['type'] == 'document':
|
|
436
480
|
self.data.append(msg['data'])
|
|
437
|
-
self.dispatch_event('document', msg['data'])
|
|
481
|
+
self.dispatch_event('document', msg['data'])
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
firecrawl/__init__.py,sha256=PLBoKqU5OAidzN7dJSD96fjhO6dzeJr0M13p2A9-6co,1682
|
|
2
|
+
firecrawl/firecrawl.py,sha256=aCJ-UMOnt35AiOVAe9kobUFjaS-K81mMKe49TZAKB-4,19175
|
|
3
|
+
firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
firecrawl/__tests__/e2e_withAuth/test.py,sha256=L-umFR3WyrJso1EwqkxjbTMr5AEI4t5zDfhQcCzitOI,7911
|
|
5
|
+
firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=KQMmGAtJAIafja6EGtJ-W9162w2Hm6PNjqKl3_RQXLA,16456
|
|
7
|
+
firecrawl-1.3.0.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
|
|
8
|
+
firecrawl-1.3.0.dist-info/METADATA,sha256=EGLvZGl6JyT3iBgx9YEBRr1NCiaiL859YYGlxc6JppU,8274
|
|
9
|
+
firecrawl-1.3.0.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
|
10
|
+
firecrawl-1.3.0.dist-info/top_level.txt,sha256=jTvz79zWhiyAezfmmHe4FQ-hR60C59UU5FrjMjijLu8,10
|
|
11
|
+
firecrawl-1.3.0.dist-info/RECORD,,
|
firecrawl-1.2.4.dist-info/RECORD
DELETED
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
firecrawl/__init__.py,sha256=HmserZa0PACOszHJE0mNB554NdrBRZyauK5qclFZ7Cc,1682
|
|
2
|
-
firecrawl/firecrawl.py,sha256=TkSvL9421CHKuPLdyV1C0i1dNRtubT6qncF9gRIE-W8,17568
|
|
3
|
-
firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
firecrawl/__tests__/e2e_withAuth/test.py,sha256=L-umFR3WyrJso1EwqkxjbTMr5AEI4t5zDfhQcCzitOI,7911
|
|
5
|
-
firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
-
firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=KQMmGAtJAIafja6EGtJ-W9162w2Hm6PNjqKl3_RQXLA,16456
|
|
7
|
-
firecrawl-1.2.4.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
|
|
8
|
-
firecrawl-1.2.4.dist-info/METADATA,sha256=x8OTnwxKW7UMqs6F6GJ-zZ8XKjJS7VhFMqY0u-YSs9Y,8274
|
|
9
|
-
firecrawl-1.2.4.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
|
10
|
-
firecrawl-1.2.4.dist-info/top_level.txt,sha256=jTvz79zWhiyAezfmmHe4FQ-hR60C59UU5FrjMjijLu8,10
|
|
11
|
-
firecrawl-1.2.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|