firecrawl 1.6.8__tar.gz → 1.7.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 1.6.8
3
+ Version: 1.7.1
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
@@ -37,6 +37,7 @@ Requires-Dist: requests
37
37
  Requires-Dist: python-dotenv
38
38
  Requires-Dist: websockets
39
39
  Requires-Dist: nest-asyncio
40
+ Requires-Dist: pydantic>=2.10.3
40
41
 
41
42
  # Firecrawl Python SDK
42
43
 
@@ -13,7 +13,7 @@ import os
13
13
 
14
14
  from .firecrawl import FirecrawlApp # noqa
15
15
 
16
- __version__ = "1.6.8"
16
+ __version__ = "1.7.1"
17
17
 
18
18
  # Define the logger for the Firecrawl project
19
19
  logger: logging.Logger = logging.getLogger("firecrawl")
@@ -29,12 +29,12 @@ def test_scrape_url_invalid_api_key():
29
29
  invalid_app.scrape_url('https://firecrawl.dev')
30
30
  assert "Unexpected error during scrape URL: Status code 401. Unauthorized: Invalid token" in str(excinfo.value)
31
31
 
32
- def test_blocklisted_url():
33
- blocklisted_url = "https://facebook.com/fake-test"
34
- app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
35
- with pytest.raises(Exception) as excinfo:
36
- app.scrape_url(blocklisted_url)
37
- assert "Unexpected error during scrape URL: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." in str(excinfo.value)
32
+ # def test_blocklisted_url():
33
+ # blocklisted_url = "https://facebook.com/fake-test"
34
+ # app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
35
+ # with pytest.raises(Exception) as excinfo:
36
+ # app.scrape_url(blocklisted_url)
37
+ # assert "Unexpected error during scrape URL: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." in str(excinfo.value)
38
38
 
39
39
  def test_successful_response_with_valid_preview_token():
40
40
  app = FirecrawlApp(api_url=API_URL, api_key="this_is_just_a_preview_token", version='v0')
@@ -90,12 +90,12 @@ def test_crawl_url_invalid_api_key():
90
90
  invalid_app.crawl_url('https://firecrawl.dev')
91
91
  assert "Unexpected error during start crawl job: Status code 401. Unauthorized: Invalid token" in str(excinfo.value)
92
92
 
93
- def test_should_return_error_for_blocklisted_url():
94
- app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
95
- blocklisted_url = "https://twitter.com/fake-test"
96
- with pytest.raises(Exception) as excinfo:
97
- app.crawl_url(blocklisted_url)
98
- assert "Unexpected error during start crawl job: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." in str(excinfo.value)
93
+ # def test_should_return_error_for_blocklisted_url():
94
+ # app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
95
+ # blocklisted_url = "https://twitter.com/fake-test"
96
+ # with pytest.raises(Exception) as excinfo:
97
+ # app.crawl_url(blocklisted_url)
98
+ # assert "Unexpected error during start crawl job: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." in str(excinfo.value)
99
99
 
100
100
  def test_crawl_url_wait_for_completion_e2e():
101
101
  app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
@@ -8,7 +8,7 @@ from datetime import datetime
8
8
 
9
9
  load_dotenv()
10
10
 
11
- API_URL = "http://127.0.0.1:3002";
11
+ API_URL = os.getenv('API_URL', 'http://127.0.0.1:3002')
12
12
  ABSOLUTE_FIRECRAWL_PATH = "firecrawl/firecrawl.py"
13
13
  TEST_API_KEY = os.getenv('TEST_API_KEY')
14
14
 
@@ -20,22 +20,33 @@ spec.loader.exec_module(firecrawl)
20
20
  FirecrawlApp = firecrawl.FirecrawlApp
21
21
 
22
22
  def test_no_api_key():
23
- with pytest.raises(Exception) as excinfo:
24
- invalid_app = FirecrawlApp(api_url=API_URL)
25
- assert "No API key provided" in str(excinfo.value)
23
+ if 'api.firecrawl.dev' in API_URL:
24
+ with pytest.raises(Exception) as excinfo:
25
+ invalid_app = FirecrawlApp(api_url=API_URL)
26
+ assert "No API key provided" in str(excinfo.value)
27
+ else:
28
+ # Should not raise error for self-hosted
29
+ app = FirecrawlApp(api_url=API_URL)
30
+ assert app is not None
26
31
 
27
32
  def test_scrape_url_invalid_api_key():
28
- invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key")
29
- with pytest.raises(Exception) as excinfo:
30
- invalid_app.scrape_url('https://firecrawl.dev')
31
- assert "Unauthorized: Invalid token" in str(excinfo.value)
33
+ if 'api.firecrawl.dev' in API_URL:
34
+ invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key")
35
+ with pytest.raises(Exception) as excinfo:
36
+ invalid_app.scrape_url('https://firecrawl.dev')
37
+ assert "Unauthorized: Invalid token" in str(excinfo.value)
38
+ else:
39
+ # Should work without API key for self-hosted
40
+ app = FirecrawlApp(api_url=API_URL)
41
+ response = app.scrape_url('https://firecrawl.dev')
42
+ assert response is not None
32
43
 
33
- def test_blocklisted_url():
34
- blocklisted_url = "https://facebook.com/fake-test"
35
- app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
36
- with pytest.raises(Exception) as excinfo:
37
- app.scrape_url(blocklisted_url)
38
- assert "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions." in str(excinfo.value)
44
+ # def test_blocklisted_url():
45
+ # blocklisted_url = "https://facebook.com/fake-test"
46
+ # app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
47
+ # with pytest.raises(Exception) as excinfo:
48
+ # app.scrape_url(blocklisted_url)
49
+ # assert "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions." in str(excinfo.value)
39
50
 
40
51
  def test_successful_response_with_valid_preview_token():
41
52
  app = FirecrawlApp(api_url=API_URL, api_key="this_is_just_a_preview_token")
@@ -131,17 +142,23 @@ def test_successful_response_for_valid_scrape_with_pdf_file_without_explicit_ext
131
142
  assert 'We present spectrophotometric observations of the Broad Line Radio Galaxy' in response['markdown']
132
143
 
133
144
  def test_crawl_url_invalid_api_key():
134
- invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key")
135
- with pytest.raises(Exception) as excinfo:
136
- invalid_app.crawl_url('https://firecrawl.dev')
137
- assert "Unauthorized: Invalid token" in str(excinfo.value)
145
+ if 'api.firecrawl.dev' in API_URL:
146
+ invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key")
147
+ with pytest.raises(Exception) as excinfo:
148
+ invalid_app.crawl_url('https://firecrawl.dev')
149
+ assert "Unauthorized: Invalid token" in str(excinfo.value)
150
+ else:
151
+ # Should work without API key for self-hosted
152
+ app = FirecrawlApp(api_url=API_URL)
153
+ response = app.crawl_url('https://firecrawl.dev')
154
+ assert response is not None
138
155
 
139
- def test_should_return_error_for_blocklisted_url():
140
- app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
141
- blocklisted_url = "https://twitter.com/fake-test"
142
- with pytest.raises(Exception) as excinfo:
143
- app.crawl_url(blocklisted_url)
144
- assert "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions." in str(excinfo.value)
156
+ # def test_should_return_error_for_blocklisted_url():
157
+ # app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
158
+ # blocklisted_url = "https://twitter.com/fake-test"
159
+ # with pytest.raises(Exception) as excinfo:
160
+ # app.crawl_url(blocklisted_url)
161
+ # assert "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions." in str(excinfo.value)
145
162
 
146
163
  def test_crawl_url_wait_for_completion_e2e():
147
164
  app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
@@ -291,17 +308,23 @@ def test_check_crawl_status_e2e():
291
308
  assert 'error' not in status_response['data'][0]['metadata']
292
309
 
293
310
  def test_invalid_api_key_on_map():
294
- invalid_app = FirecrawlApp(api_key="invalid_api_key", api_url=API_URL)
295
- with pytest.raises(Exception) as excinfo:
296
- invalid_app.map_url('https://roastmywebsite.ai')
297
- assert "Unauthorized: Invalid token" in str(excinfo.value)
311
+ if 'api.firecrawl.dev' in API_URL:
312
+ invalid_app = FirecrawlApp(api_key="invalid_api_key", api_url=API_URL)
313
+ with pytest.raises(Exception) as excinfo:
314
+ invalid_app.map_url('https://roastmywebsite.ai')
315
+ assert "Unauthorized: Invalid token" in str(excinfo.value)
316
+ else:
317
+ # Should work without API key for self-hosted
318
+ app = FirecrawlApp(api_url=API_URL)
319
+ response = app.map_url('https://roastmywebsite.ai')
320
+ assert response is not None
298
321
 
299
- def test_blocklisted_url_on_map():
300
- app = FirecrawlApp(api_key=TEST_API_KEY, api_url=API_URL)
301
- blocklisted_url = "https://facebook.com/fake-test"
302
- with pytest.raises(Exception) as excinfo:
303
- app.map_url(blocklisted_url)
304
- assert "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions." in str(excinfo.value)
322
+ # def test_blocklisted_url_on_map():
323
+ # app = FirecrawlApp(api_key=TEST_API_KEY, api_url=API_URL)
324
+ # blocklisted_url = "https://facebook.com/fake-test"
325
+ # with pytest.raises(Exception) as excinfo:
326
+ # app.map_url(blocklisted_url)
327
+ # assert "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions." in str(excinfo.value)
305
328
 
306
329
  def test_successful_response_with_valid_preview_token_on_map():
307
330
  app = FirecrawlApp(api_key="this_is_just_a_preview_token", api_url=API_URL)
@@ -349,4 +372,3 @@ def test_search_e2e():
349
372
  # assert isinstance(llm_extraction['is_open_source'], bool)
350
373
 
351
374
 
352
-
@@ -40,19 +40,22 @@ class FirecrawlApp:
40
40
  error: Optional[str] = None
41
41
 
42
42
  def __init__(self, api_key: Optional[str] = None, api_url: Optional[str] = None) -> None:
43
- """
44
- Initialize the FirecrawlApp instance with API key, API URL.
45
-
46
- Args:
47
- api_key (Optional[str]): API key for authenticating with the Firecrawl API.
48
- api_url (Optional[str]): Base URL for the Firecrawl API.
49
- """
50
- self.api_key = api_key or os.getenv('FIRECRAWL_API_KEY')
51
- self.api_url = api_url or os.getenv('FIRECRAWL_API_URL', 'https://api.firecrawl.dev')
52
- if self.api_key is None:
53
- logger.warning("No API key provided")
54
- raise ValueError('No API key provided')
55
- logger.debug(f"Initialized FirecrawlApp with API key: {self.api_key}")
43
+ """
44
+ Initialize the FirecrawlApp instance with API key, API URL.
45
+
46
+ Args:
47
+ api_key (Optional[str]): API key for authenticating with the Firecrawl API.
48
+ api_url (Optional[str]): Base URL for the Firecrawl API.
49
+ """
50
+ self.api_key = api_key or os.getenv('FIRECRAWL_API_KEY')
51
+ self.api_url = api_url or os.getenv('FIRECRAWL_API_URL', 'https://api.firecrawl.dev')
52
+
53
+ # Only require API key when using cloud service
54
+ if 'api.firecrawl.dev' in self.api_url and self.api_key is None:
55
+ logger.warning("No API key provided for cloud service")
56
+ raise ValueError('No API key provided')
57
+
58
+ logger.debug(f"Initialized FirecrawlApp with API URL: {self.api_url}")
56
59
 
57
60
  def scrape_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> Any:
58
61
  """
@@ -469,20 +472,24 @@ class FirecrawlApp:
469
472
  if not params or not params.get('prompt'):
470
473
  raise ValueError("Prompt is required")
471
474
 
472
- if not params.get('schema'):
473
- raise ValueError("Schema is required for extraction")
475
+ schema = params.get('schema')
476
+ if schema:
477
+ if hasattr(schema, 'model_json_schema'):
478
+ # Convert Pydantic model to JSON schema
479
+ schema = schema.model_json_schema()
480
+ # Otherwise assume it's already a JSON schema dict
474
481
 
475
482
  jsonData = {'urls': urls, **params}
476
- jsonSchema = params['schema'].schema() if hasattr(params['schema'], 'schema') else None
483
+ request_data = {
484
+ **jsonData,
485
+ 'allowExternalLinks': params.get('allow_external_links', False),
486
+ 'schema': schema
487
+ }
477
488
 
478
489
  try:
479
490
  response = self._post_request(
480
491
  f'{self.api_url}/v1/extract',
481
- {
482
- **jsonData,
483
- 'allowExternalLinks': params.get('allow_external_links', False),
484
- 'schema': jsonSchema
485
- },
492
+ request_data,
486
493
  headers
487
494
  )
488
495
  if response.status_code == 200:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 1.6.8
3
+ Version: 1.7.1
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
@@ -37,6 +37,7 @@ Requires-Dist: requests
37
37
  Requires-Dist: python-dotenv
38
38
  Requires-Dist: websockets
39
39
  Requires-Dist: nest-asyncio
40
+ Requires-Dist: pydantic>=2.10.3
40
41
 
41
42
  # Firecrawl Python SDK
42
43
 
@@ -2,3 +2,4 @@ requests
2
2
  python-dotenv
3
3
  websockets
4
4
  nest-asyncio
5
+ pydantic>=2.10.3
@@ -12,7 +12,8 @@ dependencies = [
12
12
  "requests",
13
13
  "python-dotenv",
14
14
  "websockets",
15
- "nest-asyncio"
15
+ "nest-asyncio",
16
+ "pydantic>=2.10.3",
16
17
  ]
17
18
  authors = [{name = "Mendable.ai",email = "nick@mendable.ai"}]
18
19
  maintainers = [{name = "Mendable.ai",email = "nick@mendable.ai"}]
File without changes
File without changes
File without changes
File without changes