firecrawl 1.6.8__tar.gz → 1.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of firecrawl might be problematic. Click here for more details.
- {firecrawl-1.6.8 → firecrawl-1.7.0}/PKG-INFO +2 -1
- {firecrawl-1.6.8 → firecrawl-1.7.0}/firecrawl/__init__.py +1 -1
- {firecrawl-1.6.8 → firecrawl-1.7.0}/firecrawl/__tests__/e2e_withAuth/test.py +12 -12
- {firecrawl-1.6.8 → firecrawl-1.7.0}/firecrawl/__tests__/v1/e2e_withAuth/test.py +57 -35
- {firecrawl-1.6.8 → firecrawl-1.7.0}/firecrawl/firecrawl.py +16 -13
- {firecrawl-1.6.8 → firecrawl-1.7.0}/firecrawl.egg-info/PKG-INFO +2 -1
- {firecrawl-1.6.8 → firecrawl-1.7.0}/firecrawl.egg-info/requires.txt +1 -0
- {firecrawl-1.6.8 → firecrawl-1.7.0}/pyproject.toml +2 -1
- {firecrawl-1.6.8 → firecrawl-1.7.0}/LICENSE +0 -0
- {firecrawl-1.6.8 → firecrawl-1.7.0}/README.md +0 -0
- {firecrawl-1.6.8 → firecrawl-1.7.0}/firecrawl/__tests__/e2e_withAuth/__init__.py +0 -0
- {firecrawl-1.6.8 → firecrawl-1.7.0}/firecrawl/__tests__/v1/e2e_withAuth/__init__.py +0 -0
- {firecrawl-1.6.8 → firecrawl-1.7.0}/firecrawl.egg-info/SOURCES.txt +0 -0
- {firecrawl-1.6.8 → firecrawl-1.7.0}/firecrawl.egg-info/dependency_links.txt +0 -0
- {firecrawl-1.6.8 → firecrawl-1.7.0}/firecrawl.egg-info/top_level.txt +0 -0
- {firecrawl-1.6.8 → firecrawl-1.7.0}/setup.cfg +0 -0
- {firecrawl-1.6.8 → firecrawl-1.7.0}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: firecrawl
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.7.0
|
|
4
4
|
Summary: Python SDK for Firecrawl API
|
|
5
5
|
Home-page: https://github.com/mendableai/firecrawl
|
|
6
6
|
Author: Mendable.ai
|
|
@@ -37,6 +37,7 @@ Requires-Dist: requests
|
|
|
37
37
|
Requires-Dist: python-dotenv
|
|
38
38
|
Requires-Dist: websockets
|
|
39
39
|
Requires-Dist: nest-asyncio
|
|
40
|
+
Requires-Dist: pydantic>=2.10.3
|
|
40
41
|
|
|
41
42
|
# Firecrawl Python SDK
|
|
42
43
|
|
|
@@ -29,12 +29,12 @@ def test_scrape_url_invalid_api_key():
|
|
|
29
29
|
invalid_app.scrape_url('https://firecrawl.dev')
|
|
30
30
|
assert "Unexpected error during scrape URL: Status code 401. Unauthorized: Invalid token" in str(excinfo.value)
|
|
31
31
|
|
|
32
|
-
def test_blocklisted_url():
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
32
|
+
# def test_blocklisted_url():
|
|
33
|
+
# blocklisted_url = "https://facebook.com/fake-test"
|
|
34
|
+
# app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
|
|
35
|
+
# with pytest.raises(Exception) as excinfo:
|
|
36
|
+
# app.scrape_url(blocklisted_url)
|
|
37
|
+
# assert "Unexpected error during scrape URL: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." in str(excinfo.value)
|
|
38
38
|
|
|
39
39
|
def test_successful_response_with_valid_preview_token():
|
|
40
40
|
app = FirecrawlApp(api_url=API_URL, api_key="this_is_just_a_preview_token", version='v0')
|
|
@@ -90,12 +90,12 @@ def test_crawl_url_invalid_api_key():
|
|
|
90
90
|
invalid_app.crawl_url('https://firecrawl.dev')
|
|
91
91
|
assert "Unexpected error during start crawl job: Status code 401. Unauthorized: Invalid token" in str(excinfo.value)
|
|
92
92
|
|
|
93
|
-
def test_should_return_error_for_blocklisted_url():
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
93
|
+
# def test_should_return_error_for_blocklisted_url():
|
|
94
|
+
# app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
|
|
95
|
+
# blocklisted_url = "https://twitter.com/fake-test"
|
|
96
|
+
# with pytest.raises(Exception) as excinfo:
|
|
97
|
+
# app.crawl_url(blocklisted_url)
|
|
98
|
+
# assert "Unexpected error during start crawl job: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." in str(excinfo.value)
|
|
99
99
|
|
|
100
100
|
def test_crawl_url_wait_for_completion_e2e():
|
|
101
101
|
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
|
|
@@ -8,7 +8,7 @@ from datetime import datetime
|
|
|
8
8
|
|
|
9
9
|
load_dotenv()
|
|
10
10
|
|
|
11
|
-
API_URL =
|
|
11
|
+
API_URL = os.getenv('API_URL', 'http://127.0.0.1:3002')
|
|
12
12
|
ABSOLUTE_FIRECRAWL_PATH = "firecrawl/firecrawl.py"
|
|
13
13
|
TEST_API_KEY = os.getenv('TEST_API_KEY')
|
|
14
14
|
|
|
@@ -20,22 +20,33 @@ spec.loader.exec_module(firecrawl)
|
|
|
20
20
|
FirecrawlApp = firecrawl.FirecrawlApp
|
|
21
21
|
|
|
22
22
|
def test_no_api_key():
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
23
|
+
if 'api.firecrawl.dev' in API_URL:
|
|
24
|
+
with pytest.raises(Exception) as excinfo:
|
|
25
|
+
invalid_app = FirecrawlApp(api_url=API_URL)
|
|
26
|
+
assert "No API key provided" in str(excinfo.value)
|
|
27
|
+
else:
|
|
28
|
+
# Should not raise error for self-hosted
|
|
29
|
+
app = FirecrawlApp(api_url=API_URL)
|
|
30
|
+
assert app is not None
|
|
26
31
|
|
|
27
32
|
def test_scrape_url_invalid_api_key():
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
33
|
+
if 'api.firecrawl.dev' in API_URL:
|
|
34
|
+
invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key")
|
|
35
|
+
with pytest.raises(Exception) as excinfo:
|
|
36
|
+
invalid_app.scrape_url('https://firecrawl.dev')
|
|
37
|
+
assert "Unauthorized: Invalid token" in str(excinfo.value)
|
|
38
|
+
else:
|
|
39
|
+
# Should work without API key for self-hosted
|
|
40
|
+
app = FirecrawlApp(api_url=API_URL)
|
|
41
|
+
response = app.scrape_url('https://firecrawl.dev')
|
|
42
|
+
assert response is not None
|
|
32
43
|
|
|
33
|
-
def test_blocklisted_url():
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
44
|
+
# def test_blocklisted_url():
|
|
45
|
+
# blocklisted_url = "https://facebook.com/fake-test"
|
|
46
|
+
# app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
|
47
|
+
# with pytest.raises(Exception) as excinfo:
|
|
48
|
+
# app.scrape_url(blocklisted_url)
|
|
49
|
+
# assert "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions." in str(excinfo.value)
|
|
39
50
|
|
|
40
51
|
def test_successful_response_with_valid_preview_token():
|
|
41
52
|
app = FirecrawlApp(api_url=API_URL, api_key="this_is_just_a_preview_token")
|
|
@@ -131,17 +142,23 @@ def test_successful_response_for_valid_scrape_with_pdf_file_without_explicit_ext
|
|
|
131
142
|
assert 'We present spectrophotometric observations of the Broad Line Radio Galaxy' in response['markdown']
|
|
132
143
|
|
|
133
144
|
def test_crawl_url_invalid_api_key():
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
145
|
+
if 'api.firecrawl.dev' in API_URL:
|
|
146
|
+
invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key")
|
|
147
|
+
with pytest.raises(Exception) as excinfo:
|
|
148
|
+
invalid_app.crawl_url('https://firecrawl.dev')
|
|
149
|
+
assert "Unauthorized: Invalid token" in str(excinfo.value)
|
|
150
|
+
else:
|
|
151
|
+
# Should work without API key for self-hosted
|
|
152
|
+
app = FirecrawlApp(api_url=API_URL)
|
|
153
|
+
response = app.crawl_url('https://firecrawl.dev')
|
|
154
|
+
assert response is not None
|
|
138
155
|
|
|
139
|
-
def test_should_return_error_for_blocklisted_url():
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
156
|
+
# def test_should_return_error_for_blocklisted_url():
|
|
157
|
+
# app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
|
158
|
+
# blocklisted_url = "https://twitter.com/fake-test"
|
|
159
|
+
# with pytest.raises(Exception) as excinfo:
|
|
160
|
+
# app.crawl_url(blocklisted_url)
|
|
161
|
+
# assert "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions." in str(excinfo.value)
|
|
145
162
|
|
|
146
163
|
def test_crawl_url_wait_for_completion_e2e():
|
|
147
164
|
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
|
@@ -291,17 +308,23 @@ def test_check_crawl_status_e2e():
|
|
|
291
308
|
assert 'error' not in status_response['data'][0]['metadata']
|
|
292
309
|
|
|
293
310
|
def test_invalid_api_key_on_map():
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
311
|
+
if 'api.firecrawl.dev' in API_URL:
|
|
312
|
+
invalid_app = FirecrawlApp(api_key="invalid_api_key", api_url=API_URL)
|
|
313
|
+
with pytest.raises(Exception) as excinfo:
|
|
314
|
+
invalid_app.map_url('https://roastmywebsite.ai')
|
|
315
|
+
assert "Unauthorized: Invalid token" in str(excinfo.value)
|
|
316
|
+
else:
|
|
317
|
+
# Should work without API key for self-hosted
|
|
318
|
+
app = FirecrawlApp(api_url=API_URL)
|
|
319
|
+
response = app.map_url('https://roastmywebsite.ai')
|
|
320
|
+
assert response is not None
|
|
298
321
|
|
|
299
|
-
def test_blocklisted_url_on_map():
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
322
|
+
# def test_blocklisted_url_on_map():
|
|
323
|
+
# app = FirecrawlApp(api_key=TEST_API_KEY, api_url=API_URL)
|
|
324
|
+
# blocklisted_url = "https://facebook.com/fake-test"
|
|
325
|
+
# with pytest.raises(Exception) as excinfo:
|
|
326
|
+
# app.map_url(blocklisted_url)
|
|
327
|
+
# assert "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions." in str(excinfo.value)
|
|
305
328
|
|
|
306
329
|
def test_successful_response_with_valid_preview_token_on_map():
|
|
307
330
|
app = FirecrawlApp(api_key="this_is_just_a_preview_token", api_url=API_URL)
|
|
@@ -349,4 +372,3 @@ def test_search_e2e():
|
|
|
349
372
|
# assert isinstance(llm_extraction['is_open_source'], bool)
|
|
350
373
|
|
|
351
374
|
|
|
352
|
-
|
|
@@ -40,19 +40,22 @@ class FirecrawlApp:
|
|
|
40
40
|
error: Optional[str] = None
|
|
41
41
|
|
|
42
42
|
def __init__(self, api_key: Optional[str] = None, api_url: Optional[str] = None) -> None:
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
43
|
+
"""
|
|
44
|
+
Initialize the FirecrawlApp instance with API key, API URL.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
api_key (Optional[str]): API key for authenticating with the Firecrawl API.
|
|
48
|
+
api_url (Optional[str]): Base URL for the Firecrawl API.
|
|
49
|
+
"""
|
|
50
|
+
self.api_key = api_key or os.getenv('FIRECRAWL_API_KEY')
|
|
51
|
+
self.api_url = api_url or os.getenv('FIRECRAWL_API_URL', 'https://api.firecrawl.dev')
|
|
52
|
+
|
|
53
|
+
# Only require API key when using cloud service
|
|
54
|
+
if 'api.firecrawl.dev' in self.api_url and self.api_key is None:
|
|
55
|
+
logger.warning("No API key provided for cloud service")
|
|
56
|
+
raise ValueError('No API key provided')
|
|
57
|
+
|
|
58
|
+
logger.debug(f"Initialized FirecrawlApp with API URL: {self.api_url}")
|
|
56
59
|
|
|
57
60
|
def scrape_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> Any:
|
|
58
61
|
"""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: firecrawl
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.7.0
|
|
4
4
|
Summary: Python SDK for Firecrawl API
|
|
5
5
|
Home-page: https://github.com/mendableai/firecrawl
|
|
6
6
|
Author: Mendable.ai
|
|
@@ -37,6 +37,7 @@ Requires-Dist: requests
|
|
|
37
37
|
Requires-Dist: python-dotenv
|
|
38
38
|
Requires-Dist: websockets
|
|
39
39
|
Requires-Dist: nest-asyncio
|
|
40
|
+
Requires-Dist: pydantic>=2.10.3
|
|
40
41
|
|
|
41
42
|
# Firecrawl Python SDK
|
|
42
43
|
|
|
@@ -12,7 +12,8 @@ dependencies = [
|
|
|
12
12
|
"requests",
|
|
13
13
|
"python-dotenv",
|
|
14
14
|
"websockets",
|
|
15
|
-
"nest-asyncio"
|
|
15
|
+
"nest-asyncio",
|
|
16
|
+
"pydantic>=2.10.3",
|
|
16
17
|
]
|
|
17
18
|
authors = [{name = "Mendable.ai",email = "nick@mendable.ai"}]
|
|
18
19
|
maintainers = [{name = "Mendable.ai",email = "nick@mendable.ai"}]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|