firecrawl 2.16.5__py3-none-any.whl → 3.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

Files changed (82) hide show
  1. firecrawl/__init__.py +27 -19
  2. firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +79 -0
  3. firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +189 -0
  4. firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +38 -0
  5. firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +40 -0
  6. firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +137 -0
  7. firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +183 -0
  8. firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +35 -0
  9. firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +43 -0
  10. firecrawl/__tests__/e2e/v2/conftest.py +73 -0
  11. firecrawl/__tests__/e2e/v2/test_async.py +73 -0
  12. firecrawl/__tests__/e2e/v2/test_batch_scrape.py +105 -0
  13. firecrawl/__tests__/e2e/v2/test_crawl.py +276 -0
  14. firecrawl/__tests__/e2e/v2/test_extract.py +54 -0
  15. firecrawl/__tests__/e2e/v2/test_map.py +60 -0
  16. firecrawl/__tests__/e2e/v2/test_scrape.py +154 -0
  17. firecrawl/__tests__/e2e/v2/test_search.py +265 -0
  18. firecrawl/__tests__/e2e/v2/test_usage.py +26 -0
  19. firecrawl/__tests__/e2e/v2/test_watcher.py +65 -0
  20. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +12 -0
  21. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +61 -0
  22. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +12 -0
  23. firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +19 -0
  24. firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +50 -0
  25. firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +63 -0
  26. firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +28 -0
  27. firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +117 -0
  28. firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +90 -0
  29. firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +70 -0
  30. firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +240 -0
  31. firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +107 -0
  32. firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +53 -0
  33. firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +92 -0
  34. firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +167 -0
  35. firecrawl/__tests__/unit/v2/methods/test_search_validation.py +206 -0
  36. firecrawl/__tests__/unit/v2/methods/test_usage_types.py +18 -0
  37. firecrawl/__tests__/unit/v2/methods/test_webhook.py +123 -0
  38. firecrawl/__tests__/unit/v2/utils/test_validation.py +290 -0
  39. firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +332 -0
  40. firecrawl/client.py +241 -0
  41. firecrawl/{firecrawl.py → firecrawl.backup.py} +17 -15
  42. firecrawl/types.py +157 -0
  43. firecrawl/v1/__init__.py +14 -0
  44. firecrawl/v1/client.py +4653 -0
  45. firecrawl/v2/__init__.py +4 -0
  46. firecrawl/v2/client.py +802 -0
  47. firecrawl/v2/client_async.py +250 -0
  48. firecrawl/v2/methods/aio/__init__.py +1 -0
  49. firecrawl/v2/methods/aio/batch.py +85 -0
  50. firecrawl/v2/methods/aio/crawl.py +174 -0
  51. firecrawl/v2/methods/aio/extract.py +126 -0
  52. firecrawl/v2/methods/aio/map.py +59 -0
  53. firecrawl/v2/methods/aio/scrape.py +36 -0
  54. firecrawl/v2/methods/aio/search.py +58 -0
  55. firecrawl/v2/methods/aio/usage.py +42 -0
  56. firecrawl/v2/methods/batch.py +420 -0
  57. firecrawl/v2/methods/crawl.py +468 -0
  58. firecrawl/v2/methods/extract.py +131 -0
  59. firecrawl/v2/methods/map.py +77 -0
  60. firecrawl/v2/methods/scrape.py +68 -0
  61. firecrawl/v2/methods/search.py +173 -0
  62. firecrawl/v2/methods/usage.py +41 -0
  63. firecrawl/v2/types.py +546 -0
  64. firecrawl/v2/utils/__init__.py +9 -0
  65. firecrawl/v2/utils/error_handler.py +107 -0
  66. firecrawl/v2/utils/get_version.py +15 -0
  67. firecrawl/v2/utils/http_client.py +153 -0
  68. firecrawl/v2/utils/http_client_async.py +64 -0
  69. firecrawl/v2/utils/validation.py +324 -0
  70. firecrawl/v2/watcher.py +312 -0
  71. firecrawl/v2/watcher_async.py +245 -0
  72. {firecrawl-2.16.5.dist-info → firecrawl-3.0.3.dist-info}/LICENSE +0 -0
  73. {firecrawl-2.16.5.dist-info → firecrawl-3.0.3.dist-info}/METADATA +49 -32
  74. firecrawl-3.0.3.dist-info/RECORD +78 -0
  75. tests/test_timeout_conversion.py +117 -0
  76. firecrawl/__tests__/e2e_withAuth/__init__.py +0 -0
  77. firecrawl/__tests__/e2e_withAuth/test.py +0 -170
  78. firecrawl/__tests__/v1/e2e_withAuth/__init__.py +0 -0
  79. firecrawl/__tests__/v1/e2e_withAuth/test.py +0 -465
  80. firecrawl-2.16.5.dist-info/RECORD +0 -12
  81. {firecrawl-2.16.5.dist-info → firecrawl-3.0.3.dist-info}/WHEEL +0 -0
  82. {firecrawl-2.16.5.dist-info → firecrawl-3.0.3.dist-info}/top_level.txt +0 -0
firecrawl/__init__.py CHANGED
@@ -1,19 +1,23 @@
1
1
  """
2
- This is the Firecrawl package.
2
+ Firecrawl Python SDK
3
3
 
4
- This package provides a Python SDK for interacting with the Firecrawl API.
5
- It includes methods to scrape URLs, perform searches, initiate and monitor crawl jobs,
6
- and check the status of these jobs.
7
-
8
- For more information visit https://github.com/firecrawl/
9
4
  """
10
5
 
11
6
  import logging
12
7
  import os
13
8
 
14
- from .firecrawl import FirecrawlApp, AsyncFirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
9
+ from .client import Firecrawl, AsyncFirecrawl, FirecrawlApp, AsyncFirecrawlApp
10
+ from .v2.watcher import Watcher
11
+ from .v2.watcher_async import AsyncWatcher
12
+ from .v1 import (
13
+ V1FirecrawlApp,
14
+ AsyncV1FirecrawlApp,
15
+ V1JsonConfig,
16
+ V1ScrapeOptions,
17
+ V1ChangeTrackingOptions,
18
+ )
15
19
 
16
- __version__ = "2.16.5"
20
+ __version__ = "3.0.3"
17
21
 
18
22
  # Define the logger for the Firecrawl project
19
23
  logger: logging.Logger = logging.getLogger("firecrawl")
@@ -27,17 +31,14 @@ def _configure_logger() -> None:
27
31
  format to the firecrawl logger.
28
32
  """
29
33
  try:
30
- # Create the formatter
31
34
  formatter = logging.Formatter(
32
35
  "[%(asctime)s - %(name)s:%(lineno)d - %(levelname)s] %(message)s",
33
36
  datefmt="%Y-%m-%d %H:%M:%S",
34
37
  )
35
38
 
36
- # Create the console handler and set the formatter
37
39
  console_handler = logging.StreamHandler()
38
40
  console_handler.setFormatter(formatter)
39
41
 
40
- # Add the console handler to the firecrawl logger
41
42
  logger.addHandler(console_handler)
42
43
  except Exception as e:
43
44
  logger.error("Failed to configure logging: %s", e)
@@ -45,20 +46,15 @@ def _configure_logger() -> None:
45
46
 
46
47
  def setup_logging() -> None:
47
48
  """Set up logging based on the FIRECRAWL_LOGGING_LEVEL environment variable."""
48
- # Check if the firecrawl logger already has a handler
49
49
  if logger.hasHandlers():
50
- return # To prevent duplicate logging
50
+ return
51
51
 
52
- # Check if the FIRECRAWL_LOGGING_LEVEL environment variable is set
53
52
  if not (env := os.getenv("FIRECRAWL_LOGGING_LEVEL", "").upper()):
54
- # Attach a no-op handler to prevent warnings about no handlers
55
53
  logger.addHandler(logging.NullHandler())
56
54
  return
57
55
 
58
- # Attach the console handler to the firecrawl logger
59
56
  _configure_logger()
60
57
 
61
- # Set the logging level based on the FIRECRAWL_LOGGING_LEVEL environment variable
62
58
  if env == "DEBUG":
63
59
  logger.setLevel(logging.DEBUG)
64
60
  elif env == "INFO":
@@ -73,7 +69,19 @@ def setup_logging() -> None:
73
69
  logger.setLevel(logging.INFO)
74
70
  logger.warning("Unknown logging level: %s, defaulting to INFO", env)
75
71
 
76
-
77
- # Initialize logging configuration when the module is imported
78
72
  setup_logging()
79
73
  logger.debug("Debugging logger setup")
74
+
75
+ __all__ = [
76
+ 'Firecrawl',
77
+ 'AsyncFirecrawl',
78
+ 'FirecrawlApp',
79
+ 'AsyncFirecrawlApp',
80
+ 'Watcher',
81
+ 'AsyncWatcher',
82
+ 'V1FirecrawlApp',
83
+ 'AsyncV1FirecrawlApp',
84
+ 'V1JsonConfig',
85
+ 'V1ScrapeOptions',
86
+ 'V1ChangeTrackingOptions',
87
+ ]
@@ -0,0 +1,79 @@
1
+ import os
2
+ import asyncio
3
+ import pytest
4
+ from dotenv import load_dotenv
5
+ from firecrawl import AsyncFirecrawl
6
+
7
+
8
+ load_dotenv()
9
+
10
+ if not os.getenv("API_KEY"):
11
+ raise ValueError("API_KEY is not set")
12
+
13
+ if not os.getenv("API_URL"):
14
+ raise ValueError("API_URL is not set")
15
+
16
+
17
+ @pytest.mark.asyncio
18
+ async def test_async_batch_start_and_status():
19
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
20
+ start = await client.start_batch_scrape([
21
+ "https://docs.firecrawl.dev",
22
+ "https://firecrawl.dev",
23
+ ], formats=["markdown"], max_concurrency=1)
24
+ job_id = start.id
25
+
26
+ deadline = asyncio.get_event_loop().time() + 240
27
+ status = await client.get_batch_scrape_status(job_id)
28
+ while status.status not in ("completed", "failed", "cancelled") and asyncio.get_event_loop().time() < deadline:
29
+ await asyncio.sleep(2)
30
+ status = await client.get_batch_scrape_status(job_id)
31
+
32
+ assert status.status in ("completed", "failed", "cancelled")
33
+
34
+
35
+ @pytest.mark.asyncio
36
+ async def test_async_batch_wait_minimal():
37
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
38
+ job = await client.batch_scrape([
39
+ "https://docs.firecrawl.dev",
40
+ "https://firecrawl.dev",
41
+ ], formats=["markdown"], poll_interval=1, timeout=120)
42
+ assert job.status in ("completed", "failed")
43
+
44
+
45
+ @pytest.mark.asyncio
46
+ async def test_async_batch_wait_with_all_params():
47
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
48
+ json_schema = {"type": "object", "properties": {"title": {"type": "string"}}, "required": ["title"]}
49
+ job = await client.batch_scrape(
50
+ [
51
+ "https://docs.firecrawl.dev",
52
+ "https://firecrawl.dev",
53
+ ],
54
+ formats=[
55
+ "markdown",
56
+ {"type": "json", "prompt": "Extract page title", "schema": json_schema},
57
+ {"type": "changeTracking", "prompt": "Track changes", "modes": ["json"]},
58
+ ],
59
+ only_main_content=True,
60
+ mobile=False,
61
+ ignore_invalid_urls=True,
62
+ max_concurrency=2,
63
+ zero_data_retention=False,
64
+ poll_interval=1,
65
+ timeout=180,
66
+ )
67
+ assert job.status in ("completed", "failed")
68
+
69
+
70
+ @pytest.mark.asyncio
71
+ async def test_async_cancel_batch():
72
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
73
+ start = await client.start_batch_scrape([
74
+ "https://docs.firecrawl.dev",
75
+ "https://firecrawl.dev",
76
+ ], formats=["markdown"], max_concurrency=1)
77
+ ok = await client.cancel_batch_scrape(start.id)
78
+ assert ok is True
79
+
@@ -0,0 +1,189 @@
1
+ import os
2
+ import asyncio
3
+ import pytest
4
+ from dotenv import load_dotenv
5
+ from firecrawl import AsyncFirecrawl
6
+ from firecrawl.v2.types import ScrapeOptions
7
+
8
+
9
+ load_dotenv()
10
+
11
+ if not os.getenv("API_KEY"):
12
+ raise ValueError("API_KEY is not set")
13
+
14
+ if not os.getenv("API_URL"):
15
+ raise ValueError("API_URL is not set")
16
+
17
+
18
+ @pytest.mark.asyncio
19
+ async def test_async_crawl_start_and_status():
20
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
21
+ start = await client.start_crawl("https://docs.firecrawl.dev", limit=2)
22
+ job_id = start.id
23
+
24
+ deadline = asyncio.get_event_loop().time() + 180
25
+ status = await client.get_crawl_status(job_id)
26
+ while status.status not in ("completed", "failed") and asyncio.get_event_loop().time() < deadline:
27
+ await asyncio.sleep(2)
28
+ status = await client.get_crawl_status(job_id)
29
+
30
+ assert status.status in ("completed", "failed")
31
+
32
+
33
+ @pytest.mark.asyncio
34
+ async def test_async_crawl_with_all_params():
35
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
36
+ # rich scrape options including json format
37
+ json_schema = {
38
+ "type": "object",
39
+ "properties": {"title": {"type": "string"}},
40
+ "required": ["title"],
41
+ }
42
+ status = await client.crawl(
43
+ url="https://docs.firecrawl.dev",
44
+ prompt="Extract docs and blog",
45
+ include_paths=["/docs/*", "/blog/*"],
46
+ exclude_paths=["/admin/*"],
47
+ max_discovery_depth=2,
48
+ ignore_sitemap=False,
49
+ ignore_query_parameters=True,
50
+ limit=5,
51
+ crawl_entire_domain=False,
52
+ allow_external_links=True,
53
+ allow_subdomains=True,
54
+ delay=1,
55
+ max_concurrency=2,
56
+ webhook="https://example.com/hook",
57
+ scrape_options=ScrapeOptions(
58
+ formats=[
59
+ "markdown",
60
+ "rawHtml",
61
+ {"type": "json", "prompt": "Extract title", "schema": json_schema},
62
+ ],
63
+ only_main_content=True,
64
+ mobile=False,
65
+ timeout=20000,
66
+ wait_for=500,
67
+ skip_tls_verification=False,
68
+ remove_base64_images=False,
69
+ ),
70
+ zero_data_retention=False,
71
+ poll_interval=2,
72
+ timeout=180,
73
+ )
74
+ assert status.status in ("completed", "failed")
75
+
76
+
77
+ @pytest.mark.asyncio
78
+ async def test_async_start_crawl_with_options():
79
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
80
+ start = await client.start_crawl("https://docs.firecrawl.dev", limit=5, max_discovery_depth=2)
81
+ assert start.id is not None and start.url is not None
82
+
83
+
84
+ @pytest.mark.asyncio
85
+ async def test_async_start_crawl_with_prompt():
86
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
87
+ start = await client.start_crawl("https://firecrawl.dev", prompt="Extract all blog posts", limit=3)
88
+ assert start.id is not None and start.url is not None
89
+
90
+
91
+ @pytest.mark.asyncio
92
+ async def test_async_get_crawl_status_shape():
93
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
94
+ start = await client.start_crawl("https://docs.firecrawl.dev", limit=3)
95
+ status = await client.get_crawl_status(start.id)
96
+ assert status.status in ("scraping", "completed", "failed")
97
+ assert status.completed >= 0
98
+ assert status.expires_at is not None
99
+ assert status.next is not None
100
+ assert isinstance(status.data, list)
101
+
102
+
103
+ @pytest.mark.asyncio
104
+ async def test_async_crawl_with_wait():
105
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
106
+ job = await client.crawl(url="https://docs.firecrawl.dev", limit=3, max_discovery_depth=2, poll_interval=1, timeout=120)
107
+ assert job.status in ("completed", "failed")
108
+ assert job.completed >= 0 and job.total >= 0 and isinstance(job.data, list)
109
+
110
+
111
+ @pytest.mark.asyncio
112
+ async def test_async_crawl_with_prompt_and_wait():
113
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
114
+ job = await client.crawl(url="https://docs.firecrawl.dev", prompt="Extract all blog posts", limit=3, poll_interval=1, timeout=120)
115
+ assert job.status in ("completed", "failed")
116
+ assert job.completed >= 0 and job.total >= 0 and isinstance(job.data, list)
117
+
118
+
119
+ @pytest.mark.asyncio
120
+ async def test_async_crawl_with_scrape_options():
121
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
122
+ scrape_opts = ScrapeOptions(formats=["markdown", "links"], only_main_content=False, mobile=True)
123
+ start = await client.start_crawl("https://docs.firecrawl.dev", limit=2, scrape_options=scrape_opts)
124
+ assert start.id is not None
125
+
126
+
127
+ @pytest.mark.asyncio
128
+ async def test_async_crawl_with_json_format_object():
129
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
130
+ json_schema = {"type": "object", "properties": {"title": {"type": "string"}}, "required": ["title"]}
131
+ scrape_opts = ScrapeOptions(formats=[{"type": "json", "prompt": "Extract page title", "schema": json_schema}])
132
+ start = await client.start_crawl("https://docs.firecrawl.dev", limit=2, scrape_options=scrape_opts)
133
+ assert start.id is not None
134
+
135
+
136
+ @pytest.mark.asyncio
137
+ async def test_async_cancel_crawl():
138
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
139
+ start = await client.start_crawl("https://docs.firecrawl.dev", limit=3)
140
+ cancelled = await client.cancel_crawl(start.id)
141
+ assert cancelled is True
142
+
143
+
144
+ @pytest.mark.asyncio
145
+ async def test_async_get_crawl_errors_and_invalid_job():
146
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
147
+ start = await client.start_crawl("https://docs.firecrawl.dev", limit=2)
148
+ errs = await client.get_crawl_errors(start.id)
149
+ assert hasattr(errs, "errors") and hasattr(errs, "robots_blocked")
150
+ with pytest.raises(Exception):
151
+ await client.get_crawl_errors("invalid-job-id-12345")
152
+
153
+
154
+ @pytest.mark.asyncio
155
+ async def test_async_active_crawls():
156
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
157
+ resp = await client.active_crawls()
158
+ assert hasattr(resp, "success") and hasattr(resp, "crawls")
159
+
160
+
161
+ @pytest.mark.asyncio
162
+ async def test_async_active_crawls_with_running_crawl():
163
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
164
+ start = await client.start_crawl("https://docs.firecrawl.dev", limit=3)
165
+ # fetch active crawls and assert our ID is listed
166
+ active = await client.active_crawls()
167
+ ids = [c.id for c in active.crawls]
168
+ assert start.id in ids
169
+ # cleanup
170
+ await client.cancel_crawl(start.id)
171
+
172
+
173
+ @pytest.mark.asyncio
174
+ async def test_async_crawl_params_preview():
175
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
176
+ params = await client.crawl_params_preview(
177
+ url="https://docs.firecrawl.dev",
178
+ prompt="Extract all blog posts and documentation",
179
+ )
180
+ assert params is not None
181
+ # basic sanity: at least one field should be suggested
182
+ has_any = any([
183
+ getattr(params, "limit", None) is not None,
184
+ getattr(params, "include_paths", None) is not None,
185
+ getattr(params, "max_discovery_depth", None) is not None,
186
+ ])
187
+ assert has_any
188
+
189
+
@@ -0,0 +1,38 @@
1
+ import os
2
+ import pytest
3
+ from dotenv import load_dotenv
4
+ from firecrawl import AsyncFirecrawl
5
+
6
+
7
+ load_dotenv()
8
+
9
+ if not os.getenv("API_KEY"):
10
+ raise ValueError("API_KEY is not set")
11
+
12
+ if not os.getenv("API_URL"):
13
+ raise ValueError("API_URL is not set")
14
+
15
+
16
+ @pytest.mark.asyncio
17
+ async def test_async_extract_minimal():
18
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
19
+ res = await client.extract(urls=["https://docs.firecrawl.dev"], prompt="Extract title")
20
+ assert res is not None
21
+
22
+
23
+ @pytest.mark.asyncio
24
+ async def test_async_extract_with_schema_and_options():
25
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
26
+ schema = {"type": "object", "properties": {"title": {"type": "string"}}, "required": ["title"]}
27
+ res = await client.extract(
28
+ urls=["https://docs.firecrawl.dev"],
29
+ prompt="Extract title",
30
+ schema=schema,
31
+ system_prompt="You are a helpful extractor",
32
+ allow_external_links=False,
33
+ enable_web_search=False,
34
+ show_sources=False,
35
+ # agent={"model": "FIRE-1", "prompt": "Extract title"}, # Skipping agent test in CI
36
+ )
37
+ assert res is not None
38
+
@@ -0,0 +1,40 @@
1
+ import os
2
+ import pytest
3
+ from dotenv import load_dotenv
4
+ from firecrawl import AsyncFirecrawl
5
+
6
+
7
+ load_dotenv()
8
+
9
+ if not os.getenv("API_KEY"):
10
+ raise ValueError("API_KEY is not set")
11
+
12
+ if not os.getenv("API_URL"):
13
+ raise ValueError("API_URL is not set")
14
+
15
+
16
+ @pytest.mark.asyncio
17
+ async def test_async_map_minimal():
18
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
19
+ resp = await client.map("https://docs.firecrawl.dev")
20
+ assert hasattr(resp, "links") and isinstance(resp.links, list)
21
+ if resp.links:
22
+ first = resp.links[0]
23
+ assert hasattr(first, "url") and isinstance(first.url, str) and first.url.startswith("http")
24
+
25
+
26
+ @pytest.mark.asyncio
27
+ @pytest.mark.parametrize("sitemap", ["only", "include", "skip"])
28
+ async def test_async_map_with_all_params(sitemap):
29
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
30
+ resp = await client.map(
31
+ "https://docs.firecrawl.dev",
32
+ search="docs",
33
+ include_subdomains=True,
34
+ limit=10,
35
+ sitemap=sitemap,
36
+ timeout=15000,
37
+ )
38
+ assert hasattr(resp, "links") and isinstance(resp.links, list)
39
+ assert len(resp.links) <= 10
40
+
@@ -0,0 +1,137 @@
1
+ import os
2
+ import pytest
3
+ from dotenv import load_dotenv
4
+ from firecrawl import AsyncFirecrawl
5
+ from firecrawl.v2.types import Document
6
+
7
+
8
+ load_dotenv()
9
+
10
+ if not os.getenv("API_KEY"):
11
+ raise ValueError("API_KEY is not set")
12
+
13
+ if not os.getenv("API_URL"):
14
+ raise ValueError("API_URL is not set")
15
+
16
+
17
+ @pytest.mark.asyncio
18
+ async def test_async_scrape_minimal():
19
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
20
+ doc = await client.scrape("https://docs.firecrawl.dev")
21
+ assert isinstance(doc, Document)
22
+ assert (
23
+ (doc.markdown and len(doc.markdown) > 0)
24
+ or (doc.html and len(doc.html) > 0)
25
+ or (doc.raw_html and len(doc.raw_html) > 0)
26
+ or (doc.links is not None)
27
+ or (doc.screenshot is not None)
28
+ or (doc.json is not None)
29
+ or (doc.summary is not None)
30
+ )
31
+
32
+
33
+ @pytest.mark.asyncio
34
+ async def test_async_scrape_with_all_params():
35
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
36
+ # Include multiple formats with configuration
37
+ json_schema = {
38
+ "type": "object",
39
+ "properties": {"title": {"type": "string"}},
40
+ "required": ["title"],
41
+ }
42
+ doc = await client.scrape(
43
+ "https://docs.firecrawl.dev",
44
+ formats=[
45
+ "markdown",
46
+ "rawHtml",
47
+ {"type": "screenshot", "full_page": False, "quality": 70},
48
+ {"type": "json", "prompt": "Extract title", "schema": json_schema},
49
+ ],
50
+ headers={"User-Agent": "E2E-AIO"},
51
+ include_tags=["main"],
52
+ exclude_tags=["nav"],
53
+ only_main_content=True,
54
+ timeout=20000,
55
+ wait_for=500,
56
+ mobile=False,
57
+ parsers=["pdf"],
58
+ actions=[],
59
+ skip_tls_verification=False,
60
+ remove_base64_images=False,
61
+ fast_mode=False,
62
+ use_mock=None,
63
+ block_ads=False,
64
+ proxy="basic",
65
+ max_age=0,
66
+ store_in_cache=False,
67
+ )
68
+ assert isinstance(doc, Document)
69
+
70
+
71
+ @pytest.mark.asyncio
72
+ async def test_async_scrape_with_options_markdown():
73
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
74
+ doc = await client.scrape(
75
+ "https://docs.firecrawl.dev",
76
+ formats=["markdown"],
77
+ only_main_content=False,
78
+ mobile=False,
79
+ )
80
+ assert isinstance(doc, Document)
81
+
82
+
83
+ @pytest.mark.asyncio
84
+ async def test_async_scrape_with_screenshot_action_viewport():
85
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
86
+ doc = await client.scrape(
87
+ "https://docs.firecrawl.dev",
88
+ formats=[{"type": "screenshot", "full_page": False, "quality": 80, "viewport": {"width": 800, "height": 600}}],
89
+ )
90
+ assert isinstance(doc, Document)
91
+
92
+
93
+ @pytest.mark.asyncio
94
+ @pytest.mark.parametrize("fmt,expect_field", [
95
+ ("markdown", "markdown"),
96
+ ("html", "html"),
97
+ ("raw_html", "raw_html"),
98
+ ("links", "links"),
99
+ ("screenshot", "screenshot"),
100
+ ("summary", "summary"),
101
+ ])
102
+ async def test_async_scrape_basic_formats(fmt, expect_field):
103
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
104
+ doc = await client.scrape("https://docs.firecrawl.dev", formats=[fmt])
105
+ assert isinstance(doc, Document)
106
+ if expect_field == "markdown":
107
+ assert doc.markdown is not None
108
+ elif expect_field == "html":
109
+ assert doc.html is not None
110
+ elif expect_field == "raw_html":
111
+ assert doc.raw_html is not None
112
+ elif expect_field == "links":
113
+ assert isinstance(doc.links, list)
114
+ elif expect_field == "screenshot":
115
+ assert doc.screenshot is not None
116
+ elif expect_field == "summary":
117
+ assert doc.summary is not None
118
+
119
+
120
+ @pytest.mark.asyncio
121
+ async def test_async_scrape_with_json_format_object():
122
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
123
+ json_schema = {"type": "object", "properties": {"title": {"type": "string"}}, "required": ["title"]}
124
+ doc = await client.scrape(
125
+ "https://docs.firecrawl.dev",
126
+ formats=[{"type": "json", "prompt": "Extract page title", "schema": json_schema}],
127
+ only_main_content=True,
128
+ )
129
+ assert isinstance(doc, Document)
130
+
131
+
132
+ @pytest.mark.asyncio
133
+ async def test_async_scrape_invalid_url():
134
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
135
+ with pytest.raises(ValueError):
136
+ await client.scrape("")
137
+