firecrawl 1.2.4__py3-none-any.whl → 1.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of firecrawl might be problematic. Click here for more details.
- firecrawl/__init__.py +1 -1
- firecrawl/firecrawl.py +60 -6
- {firecrawl-1.2.4.dist-info → firecrawl-1.3.1.dist-info}/METADATA +1 -2
- firecrawl-1.3.1.dist-info/RECORD +11 -0
- firecrawl-1.2.4.dist-info/RECORD +0 -11
- {firecrawl-1.2.4.dist-info → firecrawl-1.3.1.dist-info}/LICENSE +0 -0
- {firecrawl-1.2.4.dist-info → firecrawl-1.3.1.dist-info}/WHEEL +0 -0
- {firecrawl-1.2.4.dist-info → firecrawl-1.3.1.dist-info}/top_level.txt +0 -0
firecrawl/__init__.py
CHANGED
firecrawl/firecrawl.py
CHANGED
|
@@ -117,7 +117,14 @@ class FirecrawlApp:
|
|
|
117
117
|
idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
|
|
118
118
|
|
|
119
119
|
Returns:
|
|
120
|
-
Any:
|
|
120
|
+
Dict[str, Any]: A dictionary containing the crawl results. The structure includes:
|
|
121
|
+
- 'success' (bool): Indicates if the crawl was successful.
|
|
122
|
+
- 'status' (str): The final status of the crawl job (e.g., 'completed').
|
|
123
|
+
- 'completed' (int): Number of scraped pages that completed.
|
|
124
|
+
- 'total' (int): Total number of scraped pages.
|
|
125
|
+
- 'creditsUsed' (int): Estimated number of API credits used for this crawl.
|
|
126
|
+
- 'expiresAt' (str): ISO 8601 formatted date-time string indicating when the crawl data expires.
|
|
127
|
+
- 'data' (List[Dict]): List of all the scraped pages.
|
|
121
128
|
|
|
122
129
|
Raises:
|
|
123
130
|
Exception: If the crawl job initiation or monitoring fails.
|
|
@@ -146,7 +153,10 @@ class FirecrawlApp:
|
|
|
146
153
|
idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
|
|
147
154
|
|
|
148
155
|
Returns:
|
|
149
|
-
Dict[str, Any]:
|
|
156
|
+
Dict[str, Any]: A dictionary containing the crawl initiation response. The structure includes:
|
|
157
|
+
- 'success' (bool): Indicates if the crawl initiation was successful.
|
|
158
|
+
- 'id' (str): The unique identifier for the crawl job.
|
|
159
|
+
- 'url' (str): The URL to check the status of the crawl job.
|
|
150
160
|
"""
|
|
151
161
|
endpoint = f'/v1/crawl'
|
|
152
162
|
headers = self._prepare_headers(idempotency_key)
|
|
@@ -191,6 +201,23 @@ class FirecrawlApp:
|
|
|
191
201
|
}
|
|
192
202
|
else:
|
|
193
203
|
self._handle_error(response, 'check crawl status')
|
|
204
|
+
|
|
205
|
+
def cancel_crawl(self, id: str) -> Dict[str, Any]:
|
|
206
|
+
"""
|
|
207
|
+
Cancel an asynchronous crawl job using the Firecrawl API.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
id (str): The ID of the crawl job to cancel.
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
Dict[str, Any]: The response from the cancel crawl request.
|
|
214
|
+
"""
|
|
215
|
+
headers = self._prepare_headers()
|
|
216
|
+
response = self._delete_request(f'{self.api_url}/v1/crawl/{id}', headers)
|
|
217
|
+
if response.status_code == 200:
|
|
218
|
+
return response.json()
|
|
219
|
+
else:
|
|
220
|
+
self._handle_error(response, "cancel crawl job")
|
|
194
221
|
|
|
195
222
|
def crawl_url_and_watch(self, url: str, params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> 'CrawlWatcher':
|
|
196
223
|
"""
|
|
@@ -219,7 +246,7 @@ class FirecrawlApp:
|
|
|
219
246
|
params (Optional[Dict[str, Any]]): Additional parameters for the map search.
|
|
220
247
|
|
|
221
248
|
Returns:
|
|
222
|
-
|
|
249
|
+
List[str]: A list of URLs discovered during the map search.
|
|
223
250
|
"""
|
|
224
251
|
endpoint = f'/v1/map'
|
|
225
252
|
headers = self._prepare_headers()
|
|
@@ -228,7 +255,7 @@ class FirecrawlApp:
|
|
|
228
255
|
json_data = {'url': url}
|
|
229
256
|
if params:
|
|
230
257
|
json_data.update(params)
|
|
231
|
-
|
|
258
|
+
|
|
232
259
|
# Make the POST request with the prepared headers and JSON data
|
|
233
260
|
response = requests.post(
|
|
234
261
|
f'{self.api_url}{endpoint}',
|
|
@@ -238,7 +265,7 @@ class FirecrawlApp:
|
|
|
238
265
|
if response.status_code == 200:
|
|
239
266
|
response = response.json()
|
|
240
267
|
if response['success'] and 'links' in response:
|
|
241
|
-
return response
|
|
268
|
+
return response
|
|
242
269
|
else:
|
|
243
270
|
raise Exception(f'Failed to map URL. Error: {response["error"]}')
|
|
244
271
|
else:
|
|
@@ -321,6 +348,33 @@ class FirecrawlApp:
|
|
|
321
348
|
else:
|
|
322
349
|
return response
|
|
323
350
|
return response
|
|
351
|
+
|
|
352
|
+
def _delete_request(self, url: str,
|
|
353
|
+
headers: Dict[str, str],
|
|
354
|
+
retries: int = 3,
|
|
355
|
+
backoff_factor: float = 0.5) -> requests.Response:
|
|
356
|
+
"""
|
|
357
|
+
Make a DELETE request with retries.
|
|
358
|
+
|
|
359
|
+
Args:
|
|
360
|
+
url (str): The URL to send the DELETE request to.
|
|
361
|
+
headers (Dict[str, str]): The headers to include in the DELETE request.
|
|
362
|
+
retries (int): Number of retries for the request.
|
|
363
|
+
backoff_factor (float): Backoff factor for retries.
|
|
364
|
+
|
|
365
|
+
Returns:
|
|
366
|
+
requests.Response: The response from the DELETE request.
|
|
367
|
+
|
|
368
|
+
Raises:
|
|
369
|
+
requests.RequestException: If the request fails after the specified retries.
|
|
370
|
+
"""
|
|
371
|
+
for attempt in range(retries):
|
|
372
|
+
response = requests.delete(url, headers=headers)
|
|
373
|
+
if response.status_code == 502:
|
|
374
|
+
time.sleep(backoff_factor * (2 ** attempt))
|
|
375
|
+
else:
|
|
376
|
+
return response
|
|
377
|
+
return response
|
|
324
378
|
|
|
325
379
|
def _monitor_job_status(self, id: str, headers: Dict[str, str], poll_interval: int) -> Any:
|
|
326
380
|
"""
|
|
@@ -434,4 +488,4 @@ class CrawlWatcher:
|
|
|
434
488
|
self.dispatch_event('document', doc)
|
|
435
489
|
elif msg['type'] == 'document':
|
|
436
490
|
self.data.append(msg['data'])
|
|
437
|
-
self.dispatch_event('document', msg['data'])
|
|
491
|
+
self.dispatch_event('document', msg['data'])
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: firecrawl
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.1
|
|
4
4
|
Summary: Python SDK for Firecrawl API
|
|
5
5
|
Home-page: https://github.com/mendableai/firecrawl
|
|
6
6
|
Author: Mendable.ai
|
|
@@ -76,7 +76,6 @@ crawl_status = app.crawl_url(
|
|
|
76
76
|
'limit': 100,
|
|
77
77
|
'scrapeOptions': {'formats': ['markdown', 'html']}
|
|
78
78
|
},
|
|
79
|
-
wait_until_done=True,
|
|
80
79
|
poll_interval=30
|
|
81
80
|
)
|
|
82
81
|
print(crawl_status)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
firecrawl/__init__.py,sha256=ywBQO_FgxUjvRrCUWiuOTlASMKIQmpB1e7uRPBnaw-U,1682
|
|
2
|
+
firecrawl/firecrawl.py,sha256=V7V3kmzCFCoqjomD_I9yPD0h8uq8GkUsi7-x6NGrw0A,20008
|
|
3
|
+
firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
firecrawl/__tests__/e2e_withAuth/test.py,sha256=L-umFR3WyrJso1EwqkxjbTMr5AEI4t5zDfhQcCzitOI,7911
|
|
5
|
+
firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=KQMmGAtJAIafja6EGtJ-W9162w2Hm6PNjqKl3_RQXLA,16456
|
|
7
|
+
firecrawl-1.3.1.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
|
|
8
|
+
firecrawl-1.3.1.dist-info/METADATA,sha256=eHpv5fJmiPN--bfaSHcPsmIQKNrUcHf0F8HcsvAWtfE,8249
|
|
9
|
+
firecrawl-1.3.1.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
|
10
|
+
firecrawl-1.3.1.dist-info/top_level.txt,sha256=jTvz79zWhiyAezfmmHe4FQ-hR60C59UU5FrjMjijLu8,10
|
|
11
|
+
firecrawl-1.3.1.dist-info/RECORD,,
|
firecrawl-1.2.4.dist-info/RECORD
DELETED
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
firecrawl/__init__.py,sha256=HmserZa0PACOszHJE0mNB554NdrBRZyauK5qclFZ7Cc,1682
|
|
2
|
-
firecrawl/firecrawl.py,sha256=TkSvL9421CHKuPLdyV1C0i1dNRtubT6qncF9gRIE-W8,17568
|
|
3
|
-
firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
firecrawl/__tests__/e2e_withAuth/test.py,sha256=L-umFR3WyrJso1EwqkxjbTMr5AEI4t5zDfhQcCzitOI,7911
|
|
5
|
-
firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
-
firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=KQMmGAtJAIafja6EGtJ-W9162w2Hm6PNjqKl3_RQXLA,16456
|
|
7
|
-
firecrawl-1.2.4.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
|
|
8
|
-
firecrawl-1.2.4.dist-info/METADATA,sha256=x8OTnwxKW7UMqs6F6GJ-zZ8XKjJS7VhFMqY0u-YSs9Y,8274
|
|
9
|
-
firecrawl-1.2.4.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
|
10
|
-
firecrawl-1.2.4.dist-info/top_level.txt,sha256=jTvz79zWhiyAezfmmHe4FQ-hR60C59UU5FrjMjijLu8,10
|
|
11
|
-
firecrawl-1.2.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|