firecrawl-py 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- firecrawl-py-0.0.1/PKG-INFO +7 -0
- firecrawl-py-0.0.1/README.md +156 -0
- firecrawl-py-0.0.1/firecrawl_py.egg-info/PKG-INFO +7 -0
- firecrawl-py-0.0.1/firecrawl_py.egg-info/SOURCES.txt +9 -0
- firecrawl-py-0.0.1/firecrawl_py.egg-info/dependency_links.txt +1 -0
- firecrawl-py-0.0.1/firecrawl_py.egg-info/requires.txt +1 -0
- firecrawl-py-0.0.1/firecrawl_py.egg-info/top_level.txt +1 -0
- firecrawl-py-0.0.1/mendable/__init__.py +1 -0
- firecrawl-py-0.0.1/mendable/firecrawl.py +78 -0
- firecrawl-py-0.0.1/setup.cfg +4 -0
- firecrawl-py-0.0.1/setup.py +14 -0
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
# Mendable Python SDK
|
|
2
|
+
|
|
3
|
+
This is a Python SDK for Mendable.ai, which provides a Python interface to interact with Mendable.ai's API.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
To install this package, use pip:
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install mendable-py
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Prerequisites
|
|
14
|
+
|
|
15
|
+
To use this package, you'll need to obtain an API key from Mendable.ai and make it available as an environment variable or set it in the constructor.
|
|
16
|
+
|
|
17
|
+
In your environment (add to .env file):
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
MENDABLE_API_KEY=your_api_key
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
OR
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
my_chat_bot = ChatApp(api_key="your-api-key")
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
## Basic Usage
|
|
31
|
+
|
|
32
|
+
You can use this package to add sources to Mendable and ask questions to it (history is an option parameter):
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
from mendable import ChatApp
|
|
36
|
+
|
|
37
|
+
my_chat_bot = ChatApp(api_key="your-api-key")
|
|
38
|
+
|
|
39
|
+
my_chat_bot.add("url", "https://www.mendable.ai/")
|
|
40
|
+
|
|
41
|
+
answer = my_chat_bot.ask(question="What is Mendable?", history=[{ "prompt" : "How do I create a new project?", "response" : "You can create a new project by going to the projects page and clicking the new project button." }])
|
|
42
|
+
|
|
43
|
+
print(answer['answer']['text'])
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Here is what the ask methods response object looks like:
|
|
47
|
+
|
|
48
|
+
```json
|
|
49
|
+
{
|
|
50
|
+
"answer": {
|
|
51
|
+
"text": "This is how to deploy it..."
|
|
52
|
+
},
|
|
53
|
+
"message_id": 123,
|
|
54
|
+
"sources": [
|
|
55
|
+
{
|
|
56
|
+
"id": 866,
|
|
57
|
+
"content":"",
|
|
58
|
+
"link": "",
|
|
59
|
+
"relevance_score": 0.99
|
|
60
|
+
},
|
|
61
|
+
]
|
|
62
|
+
}
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
## Rate Message
|
|
67
|
+
|
|
68
|
+
This is how you can rate a message positive (1) or negative (0).
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
from mendable import ChatApp
|
|
72
|
+
|
|
73
|
+
my_chat_bot = ChatApp(api_key="your-api-key")
|
|
74
|
+
|
|
75
|
+
my_chat_bot.add("url", "https://www.mendable.ai/")
|
|
76
|
+
|
|
77
|
+
answer = my_chat_bot.ask(question="What is Mendable?", history=[{ "prompt" : "How do I create a new project?", "response" : "You can create a new project by going to the projects page and clicking the new project button." }])
|
|
78
|
+
|
|
79
|
+
message_id = answer["message_id"]
|
|
80
|
+
|
|
81
|
+
my_chat_bot.rate_message(message_id, 1)
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## See all sources for project
|
|
85
|
+
|
|
86
|
+
This method lists all unique sources for a project.
|
|
87
|
+
|
|
88
|
+
```python
|
|
89
|
+
from mendable import ChatApp
|
|
90
|
+
|
|
91
|
+
my_chat_bot = ChatApp(api_key="your-api-key")
|
|
92
|
+
|
|
93
|
+
my_chat_bot.add("url", "https://www.mendable.ai/")
|
|
94
|
+
|
|
95
|
+
my_chat_bot.get_sources()
|
|
96
|
+
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
The response object looks like this:
|
|
100
|
+
```json
|
|
101
|
+
[
|
|
102
|
+
{
|
|
103
|
+
"id": 52,
|
|
104
|
+
"source": "https://mendable.ai"
|
|
105
|
+
},
|
|
106
|
+
..
|
|
107
|
+
]
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## Add and Delete Indexes
|
|
111
|
+
|
|
112
|
+
You can also check/delete indexes using `get_sources` and `delete_source` functions:
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
from mendable import ChatApp
|
|
116
|
+
|
|
117
|
+
my_chat_bot = ChatApp(api_key="your-api-key")
|
|
118
|
+
|
|
119
|
+
my_chat_bot.add("url", "https://www.mendable.ai/")
|
|
120
|
+
|
|
121
|
+
my_chat_bot.get_sources()
|
|
122
|
+
|
|
123
|
+
my_chat_bot.delete_source("https://www.mendable.ai/")
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
### Supported ingestion formats and type
|
|
127
|
+
|
|
128
|
+
- Website Crawler URL -> "website-crawler"
|
|
129
|
+
- Docusaurus site URL -> "docusaurus"
|
|
130
|
+
- GitHub Repo URL -> "github"
|
|
131
|
+
- YouTube Video URL -> "youtube"
|
|
132
|
+
- Single Website URL -> "url"
|
|
133
|
+
- Sitemap URL -> "sitemap"
|
|
134
|
+
- OpenAPI YAML URL -> "openapi"
|
|
135
|
+
|
|
136
|
+
## Start new conversation
|
|
137
|
+
|
|
138
|
+
This method makes a new conversation for a given project
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
from mendable import ChatApp
|
|
142
|
+
|
|
143
|
+
my_chat_bot = ChatApp(api_key="your-api-key")
|
|
144
|
+
|
|
145
|
+
my_chat_bot.start_new_conversation()
|
|
146
|
+
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
## License
|
|
151
|
+
|
|
152
|
+
This project is licensed under the terms of the MIT license.
|
|
153
|
+
|
|
154
|
+
---
|
|
155
|
+
|
|
156
|
+
Please make sure to replace `your_api_key` with your actual API key and modify any part of this README according to your needs before adding it to your package.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
requests
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
mendable
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .firecrawl import ChatApp
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import requests
|
|
3
|
+
|
|
4
|
+
class FireCrawl:
|
|
5
|
+
def __init__(self, api_key=None):
|
|
6
|
+
self.api_key = api_key or os.getenv('FIRECRAWL_API_KEY')
|
|
7
|
+
if self.api_key is None:
|
|
8
|
+
raise ValueError('No API key provided')
|
|
9
|
+
|
|
10
|
+
def scrape_url(self, url, params):
|
|
11
|
+
headers = {
|
|
12
|
+
'Content-Type': 'application/json',
|
|
13
|
+
'Authorization': f'Bearer {self.api_key}'
|
|
14
|
+
}
|
|
15
|
+
response = requests.post(
|
|
16
|
+
'https://api.firecrawl.dev/v0/scrape',
|
|
17
|
+
headers=headers,
|
|
18
|
+
json={'url': url, **params}
|
|
19
|
+
)
|
|
20
|
+
if response.status_code == 200:
|
|
21
|
+
return response.json()
|
|
22
|
+
elif response.status_code in [402, 409, 500]:
|
|
23
|
+
error_message = response.json().get('error', 'Unknown error occurred')
|
|
24
|
+
raise Exception(f'Failed to scrape URL. Status code: {response.status_code}. Error: {error_message}')
|
|
25
|
+
else:
|
|
26
|
+
raise Exception(f'Failed to scrape URL. Status code: {response.status_code}')
|
|
27
|
+
|
|
28
|
+
def crawl_url(self, url, params):
|
|
29
|
+
import time
|
|
30
|
+
headers = self._prepare_headers()
|
|
31
|
+
response = self._post_request('https://api.firecrawl.dev/v0/crawl', {'url': url, **params}, headers)
|
|
32
|
+
if response.status_code == 200:
|
|
33
|
+
job_id = response.json().get('jobId')
|
|
34
|
+
return self._monitor_job_status(job_id, headers)
|
|
35
|
+
else:
|
|
36
|
+
self._handle_error(response, 'start crawl job')
|
|
37
|
+
|
|
38
|
+
def check_crawl_status(self, job_id):
|
|
39
|
+
headers = self._prepare_headers()
|
|
40
|
+
response = self._get_request(f'https://api.firecrawl.dev/v0/crawl/status/{job_id}', headers)
|
|
41
|
+
if response.status_code == 200:
|
|
42
|
+
return response.json()
|
|
43
|
+
else:
|
|
44
|
+
self._handle_error(response, 'check crawl status')
|
|
45
|
+
|
|
46
|
+
def _prepare_headers(self):
|
|
47
|
+
return {
|
|
48
|
+
'Content-Type': 'application/json',
|
|
49
|
+
'Authorization': f'Bearer {self.api_key}'
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
def _post_request(self, url, data, headers):
|
|
53
|
+
return requests.post(url, headers=headers, json=data)
|
|
54
|
+
|
|
55
|
+
def _get_request(self, url, headers):
|
|
56
|
+
return requests.get(url, headers=headers)
|
|
57
|
+
|
|
58
|
+
def _monitor_job_status(self, job_id, headers):
|
|
59
|
+
import time
|
|
60
|
+
while True:
|
|
61
|
+
status_response = self._get_request(f'https://api.firecrawl.dev/v0/crawl/status/{job_id}', headers)
|
|
62
|
+
if status_response.status_code == 200:
|
|
63
|
+
status_data = status_response.json()
|
|
64
|
+
if status_data['status'] == 'completed':
|
|
65
|
+
return status_data
|
|
66
|
+
elif status_data['status'] in ['active', 'paused', 'pending', 'queued']:
|
|
67
|
+
time.sleep(2) # Wait for 2 seconds before checking again
|
|
68
|
+
else:
|
|
69
|
+
raise Exception(f'Crawl job failed or was stopped. Status: {status_data["status"]}')
|
|
70
|
+
else:
|
|
71
|
+
self._handle_error(status_response, 'check crawl status')
|
|
72
|
+
|
|
73
|
+
def _handle_error(self, response, action):
|
|
74
|
+
if response.status_code in [402, 409, 500]:
|
|
75
|
+
error_message = response.json().get('error', 'Unknown error occurred')
|
|
76
|
+
raise Exception(f'Failed to {action}. Status code: {response.status_code}. Error: {error_message}')
|
|
77
|
+
else:
|
|
78
|
+
raise Exception(f'Unexpected error occurred while trying to {action}. Status code: {response.status_code}')
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
|
|
3
|
+
setup(
|
|
4
|
+
name='firecrawl-py',
|
|
5
|
+
version='0.0.1',
|
|
6
|
+
url='https://github.com/mendableai/firecrawl-py',
|
|
7
|
+
author='Eric Ciarla',
|
|
8
|
+
author_email='nick@mendable.ai',
|
|
9
|
+
description='Python SDK for Firecrawl API',
|
|
10
|
+
packages=find_packages(),
|
|
11
|
+
install_requires=[
|
|
12
|
+
'requests',
|
|
13
|
+
],
|
|
14
|
+
)
|