firecrawl 2.5.4__py3-none-any.whl → 2.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

Files changed (50) hide show
  1. firecrawl/__init__.py +1 -1
  2. firecrawl/firecrawl.py +12 -0
  3. {firecrawl-2.5.4.dist-info → firecrawl-2.6.0.dist-info}/LICENSE +0 -0
  4. {firecrawl-2.5.4.dist-info → firecrawl-2.6.0.dist-info}/METADATA +1 -1
  5. firecrawl-2.6.0.dist-info/RECORD +12 -0
  6. {firecrawl-2.5.4.dist-info → firecrawl-2.6.0.dist-info}/top_level.txt +0 -2
  7. build/lib/build/lib/build/lib/build/lib/build/lib/build/lib/firecrawl/__init__.py +0 -79
  8. build/lib/build/lib/build/lib/build/lib/build/lib/build/lib/firecrawl/__tests__/e2e_withAuth/__init__.py +0 -0
  9. build/lib/build/lib/build/lib/build/lib/build/lib/build/lib/firecrawl/__tests__/e2e_withAuth/test.py +0 -170
  10. build/lib/build/lib/build/lib/build/lib/build/lib/build/lib/firecrawl/__tests__/v1/e2e_withAuth/__init__.py +0 -0
  11. build/lib/build/lib/build/lib/build/lib/build/lib/build/lib/firecrawl/__tests__/v1/e2e_withAuth/test.py +0 -440
  12. build/lib/build/lib/build/lib/build/lib/build/lib/build/lib/firecrawl/firecrawl.py +0 -4454
  13. build/lib/build/lib/build/lib/build/lib/build/lib/build/lib/tests/test_change_tracking.py +0 -98
  14. build/lib/build/lib/build/lib/build/lib/build/lib/firecrawl/__init__.py +0 -79
  15. build/lib/build/lib/build/lib/build/lib/build/lib/firecrawl/__tests__/e2e_withAuth/__init__.py +0 -0
  16. build/lib/build/lib/build/lib/build/lib/build/lib/firecrawl/__tests__/e2e_withAuth/test.py +0 -170
  17. build/lib/build/lib/build/lib/build/lib/build/lib/firecrawl/__tests__/v1/e2e_withAuth/__init__.py +0 -0
  18. build/lib/build/lib/build/lib/build/lib/build/lib/firecrawl/__tests__/v1/e2e_withAuth/test.py +0 -440
  19. build/lib/build/lib/build/lib/build/lib/build/lib/firecrawl/firecrawl.py +0 -4454
  20. build/lib/build/lib/build/lib/build/lib/build/lib/tests/test_change_tracking.py +0 -98
  21. build/lib/build/lib/build/lib/build/lib/firecrawl/__init__.py +0 -79
  22. build/lib/build/lib/build/lib/build/lib/firecrawl/__tests__/e2e_withAuth/__init__.py +0 -0
  23. build/lib/build/lib/build/lib/build/lib/firecrawl/__tests__/e2e_withAuth/test.py +0 -170
  24. build/lib/build/lib/build/lib/build/lib/firecrawl/__tests__/v1/e2e_withAuth/__init__.py +0 -0
  25. build/lib/build/lib/build/lib/build/lib/firecrawl/__tests__/v1/e2e_withAuth/test.py +0 -440
  26. build/lib/build/lib/build/lib/build/lib/firecrawl/firecrawl.py +0 -4454
  27. build/lib/build/lib/build/lib/build/lib/tests/test_change_tracking.py +0 -98
  28. build/lib/build/lib/build/lib/firecrawl/__init__.py +0 -79
  29. build/lib/build/lib/build/lib/firecrawl/__tests__/e2e_withAuth/__init__.py +0 -0
  30. build/lib/build/lib/build/lib/firecrawl/__tests__/e2e_withAuth/test.py +0 -170
  31. build/lib/build/lib/build/lib/firecrawl/__tests__/v1/e2e_withAuth/__init__.py +0 -0
  32. build/lib/build/lib/build/lib/firecrawl/__tests__/v1/e2e_withAuth/test.py +0 -440
  33. build/lib/build/lib/build/lib/firecrawl/firecrawl.py +0 -4454
  34. build/lib/build/lib/build/lib/tests/test_change_tracking.py +0 -98
  35. build/lib/build/lib/firecrawl/__init__.py +0 -79
  36. build/lib/build/lib/firecrawl/__tests__/e2e_withAuth/__init__.py +0 -0
  37. build/lib/build/lib/firecrawl/__tests__/e2e_withAuth/test.py +0 -170
  38. build/lib/build/lib/firecrawl/__tests__/v1/e2e_withAuth/__init__.py +0 -0
  39. build/lib/build/lib/firecrawl/__tests__/v1/e2e_withAuth/test.py +0 -440
  40. build/lib/build/lib/firecrawl/firecrawl.py +0 -4454
  41. build/lib/build/lib/tests/test_change_tracking.py +0 -98
  42. build/lib/firecrawl/__init__.py +0 -79
  43. build/lib/firecrawl/__tests__/e2e_withAuth/__init__.py +0 -0
  44. build/lib/firecrawl/__tests__/e2e_withAuth/test.py +0 -170
  45. build/lib/firecrawl/__tests__/v1/e2e_withAuth/__init__.py +0 -0
  46. build/lib/firecrawl/__tests__/v1/e2e_withAuth/test.py +0 -440
  47. build/lib/firecrawl/firecrawl.py +0 -4454
  48. build/lib/tests/test_change_tracking.py +0 -98
  49. firecrawl-2.5.4.dist-info/RECORD +0 -54
  50. {firecrawl-2.5.4.dist-info → firecrawl-2.6.0.dist-info}/WHEEL +0 -0
firecrawl/__init__.py CHANGED
@@ -13,7 +13,7 @@ import os
13
13
 
14
14
  from .firecrawl import FirecrawlApp, AsyncFirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
15
15
 
16
- __version__ = "2.5.4"
16
+ __version__ = "2.6.0"
17
17
 
18
18
  # Define the logger for the Firecrawl project
19
19
  logger: logging.Logger = logging.getLogger("firecrawl")
firecrawl/firecrawl.py CHANGED
@@ -347,6 +347,7 @@ class GenerateLLMsTextParams(pydantic.BaseModel):
347
347
  """
348
348
  maxUrls: Optional[int] = 10
349
349
  showFullText: Optional[bool] = False
350
+ cache: Optional[bool] = True
350
351
  __experimental_stream: Optional[bool] = None
351
352
 
352
353
  class DeepResearchParams(pydantic.BaseModel):
@@ -1870,6 +1871,7 @@ class FirecrawlApp:
1870
1871
  *,
1871
1872
  max_urls: Optional[int] = None,
1872
1873
  show_full_text: Optional[bool] = None,
1874
+ cache: Optional[bool] = None,
1873
1875
  experimental_stream: Optional[bool] = None) -> GenerateLLMsTextStatusResponse:
1874
1876
  """
1875
1877
  Generate LLMs.txt for a given URL and poll until completion.
@@ -1878,6 +1880,7 @@ class FirecrawlApp:
1878
1880
  url (str): Target URL to generate LLMs.txt from
1879
1881
  max_urls (Optional[int]): Maximum URLs to process (default: 10)
1880
1882
  show_full_text (Optional[bool]): Include full text in output (default: False)
1883
+ cache (Optional[bool]): Whether to use cached content if available (default: True)
1881
1884
  experimental_stream (Optional[bool]): Enable experimental streaming
1882
1885
 
1883
1886
  Returns:
@@ -1893,6 +1896,7 @@ class FirecrawlApp:
1893
1896
  params = GenerateLLMsTextParams(
1894
1897
  maxUrls=max_urls,
1895
1898
  showFullText=show_full_text,
1899
+ cache=cache,
1896
1900
  __experimental_stream=experimental_stream
1897
1901
  )
1898
1902
 
@@ -1900,6 +1904,7 @@ class FirecrawlApp:
1900
1904
  url,
1901
1905
  max_urls=max_urls,
1902
1906
  show_full_text=show_full_text,
1907
+ cache=cache,
1903
1908
  experimental_stream=experimental_stream
1904
1909
  )
1905
1910
 
@@ -1935,6 +1940,7 @@ class FirecrawlApp:
1935
1940
  *,
1936
1941
  max_urls: Optional[int] = None,
1937
1942
  show_full_text: Optional[bool] = None,
1943
+ cache: Optional[bool] = None,
1938
1944
  experimental_stream: Optional[bool] = None) -> GenerateLLMsTextResponse:
1939
1945
  """
1940
1946
  Initiate an asynchronous LLMs.txt generation operation.
@@ -1943,6 +1949,7 @@ class FirecrawlApp:
1943
1949
  url (str): The target URL to generate LLMs.txt from. Must be a valid HTTP/HTTPS URL.
1944
1950
  max_urls (Optional[int]): Maximum URLs to process (default: 10)
1945
1951
  show_full_text (Optional[bool]): Include full text in output (default: False)
1952
+ cache (Optional[bool]): Whether to use cached content if available (default: True)
1946
1953
  experimental_stream (Optional[bool]): Enable experimental streaming
1947
1954
 
1948
1955
  Returns:
@@ -1957,6 +1964,7 @@ class FirecrawlApp:
1957
1964
  params = GenerateLLMsTextParams(
1958
1965
  maxUrls=max_urls,
1959
1966
  showFullText=show_full_text,
1967
+ cache=cache,
1960
1968
  __experimental_stream=experimental_stream
1961
1969
  )
1962
1970
 
@@ -4001,6 +4009,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
4001
4009
  url,
4002
4010
  max_urls=max_urls,
4003
4011
  show_full_text=show_full_text,
4012
+ cache=cache,
4004
4013
  experimental_stream=experimental_stream
4005
4014
  )
4006
4015
  if not response.get('success') or 'id' not in response:
@@ -4027,6 +4036,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
4027
4036
  *,
4028
4037
  max_urls: Optional[int] = None,
4029
4038
  show_full_text: Optional[bool] = None,
4039
+ cache: Optional[bool] = None,
4030
4040
  experimental_stream: Optional[bool] = None) -> GenerateLLMsTextResponse:
4031
4041
  """
4032
4042
  Initiate an asynchronous LLMs.txt generation job without waiting for completion.
@@ -4035,6 +4045,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
4035
4045
  url (str): Target URL to generate LLMs.txt from
4036
4046
  max_urls (Optional[int]): Maximum URLs to process (default: 10)
4037
4047
  show_full_text (Optional[bool]): Include full text in output (default: False)
4048
+ cache (Optional[bool]): Whether to use cached content if available (default: True)
4038
4049
  experimental_stream (Optional[bool]): Enable experimental streaming
4039
4050
 
4040
4051
  Returns:
@@ -4057,6 +4068,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
4057
4068
  params = GenerateLLMsTextParams(
4058
4069
  maxUrls=max_urls,
4059
4070
  showFullText=show_full_text,
4071
+ cache=cache,
4060
4072
  __experimental_stream=experimental_stream
4061
4073
  )
4062
4074
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 2.5.4
3
+ Version: 2.6.0
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
@@ -0,0 +1,12 @@
1
+ firecrawl/__init__.py,sha256=bds9ny9yl_8sXYAmfjaVd_32uf8Qm0ZPbZ2V-LGTgGQ,2612
2
+ firecrawl/firecrawl.py,sha256=Y_8gS4vBPQZ_LP8UXdZAOEMEmK0kRv88Jst2dZ3VW8c,190089
3
+ firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ firecrawl/__tests__/e2e_withAuth/test.py,sha256=-Fq2vPcMo0iQi4dwsUkkCd931ybDaTxMBnZbRfGdDcA,7931
5
+ firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=DcCw-cohtnL-t9XPekUtRoQrgg3UCWu8Ikqudf9ory8,19880
7
+ tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
8
+ firecrawl-2.6.0.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
9
+ firecrawl-2.6.0.dist-info/METADATA,sha256=gOm5xIZTdtTxAIVZbjZFf5ALWIrIOGpYCFqQ9zW4PAU,7165
10
+ firecrawl-2.6.0.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
11
+ firecrawl-2.6.0.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
12
+ firecrawl-2.6.0.dist-info/RECORD,,
@@ -1,4 +1,2 @@
1
- build
2
- dist
3
1
  firecrawl
4
2
  tests
@@ -1,79 +0,0 @@
1
- """
2
- This is the Firecrawl package.
3
-
4
- This package provides a Python SDK for interacting with the Firecrawl API.
5
- It includes methods to scrape URLs, perform searches, initiate and monitor crawl jobs,
6
- and check the status of these jobs.
7
-
8
- For more information visit https://github.com/firecrawl/
9
- """
10
-
11
- import logging
12
- import os
13
-
14
- from .firecrawl import FirecrawlApp, AsyncFirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
15
-
16
- __version__ = "2.5.4"
17
-
18
- # Define the logger for the Firecrawl project
19
- logger: logging.Logger = logging.getLogger("firecrawl")
20
-
21
-
22
- def _configure_logger() -> None:
23
- """
24
- Configure the firecrawl logger for console output.
25
-
26
- The function attaches a handler for console output with a specific format and date
27
- format to the firecrawl logger.
28
- """
29
- try:
30
- # Create the formatter
31
- formatter = logging.Formatter(
32
- "[%(asctime)s - %(name)s:%(lineno)d - %(levelname)s] %(message)s",
33
- datefmt="%Y-%m-%d %H:%M:%S",
34
- )
35
-
36
- # Create the console handler and set the formatter
37
- console_handler = logging.StreamHandler()
38
- console_handler.setFormatter(formatter)
39
-
40
- # Add the console handler to the firecrawl logger
41
- logger.addHandler(console_handler)
42
- except Exception as e:
43
- logger.error("Failed to configure logging: %s", e)
44
-
45
-
46
- def setup_logging() -> None:
47
- """Set up logging based on the FIRECRAWL_LOGGING_LEVEL environment variable."""
48
- # Check if the firecrawl logger already has a handler
49
- if logger.hasHandlers():
50
- return # To prevent duplicate logging
51
-
52
- # Check if the FIRECRAWL_LOGGING_LEVEL environment variable is set
53
- if not (env := os.getenv("FIRECRAWL_LOGGING_LEVEL", "").upper()):
54
- # Attach a no-op handler to prevent warnings about no handlers
55
- logger.addHandler(logging.NullHandler())
56
- return
57
-
58
- # Attach the console handler to the firecrawl logger
59
- _configure_logger()
60
-
61
- # Set the logging level based on the FIRECRAWL_LOGGING_LEVEL environment variable
62
- if env == "DEBUG":
63
- logger.setLevel(logging.DEBUG)
64
- elif env == "INFO":
65
- logger.setLevel(logging.INFO)
66
- elif env == "WARNING":
67
- logger.setLevel(logging.WARNING)
68
- elif env == "ERROR":
69
- logger.setLevel(logging.ERROR)
70
- elif env == "CRITICAL":
71
- logger.setLevel(logging.CRITICAL)
72
- else:
73
- logger.setLevel(logging.INFO)
74
- logger.warning("Unknown logging level: %s, defaulting to INFO", env)
75
-
76
-
77
- # Initialize logging configuration when the module is imported
78
- setup_logging()
79
- logger.debug("Debugging logger setup")
@@ -1,170 +0,0 @@
1
- import importlib.util
2
- import pytest
3
- import time
4
- import os
5
- from uuid import uuid4
6
- from dotenv import load_dotenv
7
-
8
- load_dotenv()
9
-
10
- API_URL = "http://127.0.0.1:3002"
11
- ABSOLUTE_FIRECRAWL_PATH = "firecrawl/firecrawl.py"
12
- TEST_API_KEY = os.getenv('TEST_API_KEY')
13
-
14
- print(f"ABSOLUTE_FIRECRAWL_PATH: {ABSOLUTE_FIRECRAWL_PATH}")
15
-
16
- spec = importlib.util.spec_from_file_location("FirecrawlApp", ABSOLUTE_FIRECRAWL_PATH)
17
- firecrawl = importlib.util.module_from_spec(spec)
18
- spec.loader.exec_module(firecrawl)
19
- FirecrawlApp = firecrawl.FirecrawlApp
20
-
21
- def test_no_api_key():
22
- with pytest.raises(Exception) as excinfo:
23
- invalid_app = FirecrawlApp(api_url=API_URL, version='v0')
24
- assert "No API key provided" in str(excinfo.value)
25
-
26
- def test_scrape_url_invalid_api_key():
27
- invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key", version='v0')
28
- with pytest.raises(Exception) as excinfo:
29
- invalid_app.scrape_url('https://firecrawl.dev')
30
- assert "Unexpected error during scrape URL: Status code 401. Unauthorized: Invalid token" in str(excinfo.value)
31
-
32
- # def test_blocklisted_url():
33
- # blocklisted_url = "https://facebook.com/fake-test"
34
- # app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
35
- # with pytest.raises(Exception) as excinfo:
36
- # app.scrape_url(blocklisted_url)
37
- # assert "Unexpected error during scrape URL: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." in str(excinfo.value)
38
-
39
- def test_successful_response_with_valid_preview_token():
40
- app = FirecrawlApp(api_url=API_URL, api_key=os.getenv('PREVIEW_TOKEN'), version='v0')
41
- response = app.scrape_url('https://roastmywebsite.ai')
42
- assert response is not None
43
- assert 'content' in response
44
- assert "_Roast_" in response['content']
45
-
46
- def test_scrape_url_e2e():
47
- app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
48
- response = app.scrape_url('https://roastmywebsite.ai')
49
- print(response)
50
-
51
- assert response is not None
52
- assert 'content' in response
53
- assert 'markdown' in response
54
- assert 'metadata' in response
55
- assert 'html' not in response
56
- assert "_Roast_" in response['content']
57
-
58
- def test_successful_response_with_valid_api_key_and_include_html():
59
- app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
60
- response = app.scrape_url('https://roastmywebsite.ai', {'pageOptions': {'includeHtml': True}})
61
- assert response is not None
62
- assert 'content' in response
63
- assert 'markdown' in response
64
- assert 'html' in response
65
- assert 'metadata' in response
66
- assert "_Roast_" in response['content']
67
- assert "_Roast_" in response['markdown']
68
- assert "<h1" in response['html']
69
-
70
- def test_successful_response_for_valid_scrape_with_pdf_file():
71
- app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
72
- response = app.scrape_url('https://arxiv.org/pdf/astro-ph/9301001.pdf')
73
- assert response is not None
74
- assert 'content' in response
75
- assert 'metadata' in response
76
- assert 'We present spectrophotometric observations of the Broad Line Radio Galaxy' in response['content']
77
-
78
- def test_successful_response_for_valid_scrape_with_pdf_file_without_explicit_extension():
79
- app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
80
- response = app.scrape_url('https://arxiv.org/pdf/astro-ph/9301001')
81
- time.sleep(6) # wait for 6 seconds
82
- assert response is not None
83
- assert 'content' in response
84
- assert 'metadata' in response
85
- assert 'We present spectrophotometric observations of the Broad Line Radio Galaxy' in response['content']
86
-
87
- def test_crawl_url_invalid_api_key():
88
- invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key", version='v0')
89
- with pytest.raises(Exception) as excinfo:
90
- invalid_app.crawl_url('https://firecrawl.dev')
91
- assert "Unexpected error during start crawl job: Status code 401. Unauthorized: Invalid token" in str(excinfo.value)
92
-
93
- # def test_should_return_error_for_blocklisted_url():
94
- # app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
95
- # blocklisted_url = "https://twitter.com/fake-test"
96
- # with pytest.raises(Exception) as excinfo:
97
- # app.crawl_url(blocklisted_url)
98
- # assert "Unexpected error during start crawl job: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." in str(excinfo.value)
99
-
100
- def test_crawl_url_wait_for_completion_e2e():
101
- app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
102
- response = app.crawl_url('https://roastmywebsite.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True)
103
- assert response is not None
104
- assert len(response) > 0
105
- assert 'content' in response[0]
106
- assert "_Roast_" in response[0]['content']
107
-
108
- def test_crawl_url_with_idempotency_key_e2e():
109
- app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
110
- uniqueIdempotencyKey = str(uuid4())
111
- response = app.crawl_url('https://roastmywebsite.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)
112
- assert response is not None
113
- assert len(response) > 0
114
- assert 'content' in response[0]
115
- assert "_Roast_" in response[0]['content']
116
-
117
- with pytest.raises(Exception) as excinfo:
118
- app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)
119
- assert "Conflict: Failed to start crawl job due to a conflict. Idempotency key already used" in str(excinfo.value)
120
-
121
- def test_check_crawl_status_e2e():
122
- app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
123
- response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, False)
124
- assert response is not None
125
- assert 'jobId' in response
126
-
127
- time.sleep(30) # wait for 30 seconds
128
- status_response = app.check_crawl_status(response['jobId'])
129
- assert status_response is not None
130
- assert 'status' in status_response
131
- assert status_response['status'] == 'completed'
132
- assert 'data' in status_response
133
- assert len(status_response['data']) > 0
134
-
135
- def test_search_e2e():
136
- app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
137
- response = app.search("test query")
138
- assert response is not None
139
- assert 'content' in response[0]
140
- assert len(response) > 2
141
-
142
- def test_search_invalid_api_key():
143
- invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key", version='v0')
144
- with pytest.raises(Exception) as excinfo:
145
- invalid_app.search("test query")
146
- assert "Unexpected error during search: Status code 401. Unauthorized: Invalid token" in str(excinfo.value)
147
-
148
- def test_llm_extraction():
149
- app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
150
- response = app.scrape_url("https://firecrawl.dev", {
151
- 'extractorOptions': {
152
- 'mode': 'llm-extraction',
153
- 'extractionPrompt': "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
154
- 'extractionSchema': {
155
- 'type': 'object',
156
- 'properties': {
157
- 'company_mission': {'type': 'string'},
158
- 'supports_sso': {'type': 'boolean'},
159
- 'is_open_source': {'type': 'boolean'}
160
- },
161
- 'required': ['company_mission', 'supports_sso', 'is_open_source']
162
- }
163
- }
164
- })
165
- assert response is not None
166
- assert 'llm_extraction' in response
167
- llm_extraction = response['llm_extraction']
168
- assert 'company_mission' in llm_extraction
169
- assert isinstance(llm_extraction['supports_sso'], bool)
170
- assert isinstance(llm_extraction['is_open_source'], bool)