firecrawl-py 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ Metadata-Version: 2.1
2
+ Name: firecrawl-py
3
+ Version: 0.0.1
4
+ Summary: Python SDK for Firecrawl API
5
+ Home-page: https://github.com/mendableai/firecrawl-py
6
+ Author: Eric Ciarla
7
+ Author-email: nick@mendable.ai
8
+ Requires-Dist: requests
9
+
@@ -0,0 +1,6 @@
1
+ mendable/__init__.py,sha256=Z0tfQTnh-Rr7V-_3yjYlgxt3cREhhqV8s1LVXSmNCuo,31
2
+ mendable/firecrawl.py,sha256=ReCvqTWgxHEtgYakT8nOkWYv1KsEiLzkSSQ9-3HyZc8,3362
3
+ firecrawl_py-0.0.1.dist-info/METADATA,sha256=qGKynQcGLplfRP6XN5IUweLZp9E4N1M7cTx0edBef38,224
4
+ firecrawl_py-0.0.1.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
5
+ firecrawl_py-0.0.1.dist-info/top_level.txt,sha256=HV-vVOHTsAD2b_EZNXwuUyiCCD4Ex74C2SPkTf1h2eA,9
6
+ firecrawl_py-0.0.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: bdist_wheel (0.38.4)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ mendable
mendable/__init__.py ADDED
@@ -0,0 +1 @@
1
+ from .firecrawl import ChatApp
mendable/firecrawl.py ADDED
@@ -0,0 +1,78 @@
1
+ import os
2
+ import requests
3
+
4
+ class FireCrawl:
5
+ def __init__(self, api_key=None):
6
+ self.api_key = api_key or os.getenv('FIRECRAWL_API_KEY')
7
+ if self.api_key is None:
8
+ raise ValueError('No API key provided')
9
+
10
+ def scrape_url(self, url, params):
11
+ headers = {
12
+ 'Content-Type': 'application/json',
13
+ 'Authorization': f'Bearer {self.api_key}'
14
+ }
15
+ response = requests.post(
16
+ 'https://api.firecrawl.dev/v0/scrape',
17
+ headers=headers,
18
+ json={'url': url, **params}
19
+ )
20
+ if response.status_code == 200:
21
+ return response.json()
22
+ elif response.status_code in [402, 409, 500]:
23
+ error_message = response.json().get('error', 'Unknown error occurred')
24
+ raise Exception(f'Failed to scrape URL. Status code: {response.status_code}. Error: {error_message}')
25
+ else:
26
+ raise Exception(f'Failed to scrape URL. Status code: {response.status_code}')
27
+
28
+ def crawl_url(self, url, params):
29
+ import time
30
+ headers = self._prepare_headers()
31
+ response = self._post_request('https://api.firecrawl.dev/v0/crawl', {'url': url, **params}, headers)
32
+ if response.status_code == 200:
33
+ job_id = response.json().get('jobId')
34
+ return self._monitor_job_status(job_id, headers)
35
+ else:
36
+ self._handle_error(response, 'start crawl job')
37
+
38
+ def check_crawl_status(self, job_id):
39
+ headers = self._prepare_headers()
40
+ response = self._get_request(f'https://api.firecrawl.dev/v0/crawl/status/{job_id}', headers)
41
+ if response.status_code == 200:
42
+ return response.json()
43
+ else:
44
+ self._handle_error(response, 'check crawl status')
45
+
46
+ def _prepare_headers(self):
47
+ return {
48
+ 'Content-Type': 'application/json',
49
+ 'Authorization': f'Bearer {self.api_key}'
50
+ }
51
+
52
+ def _post_request(self, url, data, headers):
53
+ return requests.post(url, headers=headers, json=data)
54
+
55
+ def _get_request(self, url, headers):
56
+ return requests.get(url, headers=headers)
57
+
58
+ def _monitor_job_status(self, job_id, headers):
59
+ import time
60
+ while True:
61
+ status_response = self._get_request(f'https://api.firecrawl.dev/v0/crawl/status/{job_id}', headers)
62
+ if status_response.status_code == 200:
63
+ status_data = status_response.json()
64
+ if status_data['status'] == 'completed':
65
+ return status_data
66
+ elif status_data['status'] in ['active', 'paused', 'pending', 'queued']:
67
+ time.sleep(2) # Wait for 2 seconds before checking again
68
+ else:
69
+ raise Exception(f'Crawl job failed or was stopped. Status: {status_data["status"]}')
70
+ else:
71
+ self._handle_error(status_response, 'check crawl status')
72
+
73
+ def _handle_error(self, response, action):
74
+ if response.status_code in [402, 409, 500]:
75
+ error_message = response.json().get('error', 'Unknown error occurred')
76
+ raise Exception(f'Failed to {action}. Status code: {response.status_code}. Error: {error_message}')
77
+ else:
78
+ raise Exception(f'Unexpected error occurred while trying to {action}. Status code: {response.status_code}')