firecrawl 2.16.5__tar.gz → 3.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

Files changed (89) hide show
  1. {firecrawl-2.16.5 → firecrawl-3.0.3}/LICENSE +0 -0
  2. {firecrawl-2.16.5 → firecrawl-3.0.3}/PKG-INFO +49 -32
  3. {firecrawl-2.16.5 → firecrawl-3.0.3}/README.md +44 -28
  4. {firecrawl-2.16.5 → firecrawl-3.0.3}/firecrawl/__init__.py +27 -19
  5. firecrawl-3.0.3/firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +79 -0
  6. firecrawl-3.0.3/firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +189 -0
  7. firecrawl-3.0.3/firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +38 -0
  8. firecrawl-3.0.3/firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +40 -0
  9. firecrawl-3.0.3/firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +137 -0
  10. firecrawl-3.0.3/firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +183 -0
  11. firecrawl-3.0.3/firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +35 -0
  12. firecrawl-3.0.3/firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +43 -0
  13. firecrawl-3.0.3/firecrawl/__tests__/e2e/v2/conftest.py +73 -0
  14. firecrawl-3.0.3/firecrawl/__tests__/e2e/v2/test_async.py +73 -0
  15. firecrawl-3.0.3/firecrawl/__tests__/e2e/v2/test_batch_scrape.py +105 -0
  16. firecrawl-3.0.3/firecrawl/__tests__/e2e/v2/test_crawl.py +276 -0
  17. firecrawl-3.0.3/firecrawl/__tests__/e2e/v2/test_extract.py +54 -0
  18. firecrawl-3.0.3/firecrawl/__tests__/e2e/v2/test_map.py +60 -0
  19. firecrawl-3.0.3/firecrawl/__tests__/e2e/v2/test_scrape.py +154 -0
  20. firecrawl-3.0.3/firecrawl/__tests__/e2e/v2/test_search.py +265 -0
  21. firecrawl-3.0.3/firecrawl/__tests__/e2e/v2/test_usage.py +26 -0
  22. firecrawl-3.0.3/firecrawl/__tests__/e2e/v2/test_watcher.py +65 -0
  23. firecrawl-3.0.3/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +12 -0
  24. firecrawl-3.0.3/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +61 -0
  25. firecrawl-3.0.3/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +12 -0
  26. firecrawl-3.0.3/firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +19 -0
  27. firecrawl-3.0.3/firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +50 -0
  28. firecrawl-3.0.3/firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +63 -0
  29. firecrawl-3.0.3/firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +28 -0
  30. firecrawl-3.0.3/firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +117 -0
  31. firecrawl-3.0.3/firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +90 -0
  32. firecrawl-3.0.3/firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +70 -0
  33. firecrawl-3.0.3/firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +240 -0
  34. firecrawl-3.0.3/firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +107 -0
  35. firecrawl-3.0.3/firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +53 -0
  36. firecrawl-3.0.3/firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +92 -0
  37. firecrawl-3.0.3/firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +167 -0
  38. firecrawl-3.0.3/firecrawl/__tests__/unit/v2/methods/test_search_validation.py +206 -0
  39. firecrawl-3.0.3/firecrawl/__tests__/unit/v2/methods/test_usage_types.py +18 -0
  40. firecrawl-3.0.3/firecrawl/__tests__/unit/v2/methods/test_webhook.py +123 -0
  41. firecrawl-3.0.3/firecrawl/__tests__/unit/v2/utils/test_validation.py +290 -0
  42. firecrawl-3.0.3/firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +332 -0
  43. firecrawl-3.0.3/firecrawl/client.py +241 -0
  44. firecrawl-2.16.5/firecrawl/firecrawl.py → firecrawl-3.0.3/firecrawl/firecrawl.backup.py +17 -15
  45. firecrawl-3.0.3/firecrawl/types.py +157 -0
  46. firecrawl-3.0.3/firecrawl/v1/__init__.py +14 -0
  47. firecrawl-3.0.3/firecrawl/v1/client.py +4653 -0
  48. firecrawl-3.0.3/firecrawl/v2/__init__.py +4 -0
  49. firecrawl-3.0.3/firecrawl/v2/client.py +802 -0
  50. firecrawl-3.0.3/firecrawl/v2/client_async.py +250 -0
  51. firecrawl-3.0.3/firecrawl/v2/methods/aio/__init__.py +1 -0
  52. firecrawl-3.0.3/firecrawl/v2/methods/aio/batch.py +85 -0
  53. firecrawl-3.0.3/firecrawl/v2/methods/aio/crawl.py +174 -0
  54. firecrawl-3.0.3/firecrawl/v2/methods/aio/extract.py +126 -0
  55. firecrawl-3.0.3/firecrawl/v2/methods/aio/map.py +59 -0
  56. firecrawl-3.0.3/firecrawl/v2/methods/aio/scrape.py +36 -0
  57. firecrawl-3.0.3/firecrawl/v2/methods/aio/search.py +58 -0
  58. firecrawl-3.0.3/firecrawl/v2/methods/aio/usage.py +42 -0
  59. firecrawl-3.0.3/firecrawl/v2/methods/batch.py +420 -0
  60. firecrawl-3.0.3/firecrawl/v2/methods/crawl.py +468 -0
  61. firecrawl-3.0.3/firecrawl/v2/methods/extract.py +131 -0
  62. firecrawl-3.0.3/firecrawl/v2/methods/map.py +77 -0
  63. firecrawl-3.0.3/firecrawl/v2/methods/scrape.py +68 -0
  64. firecrawl-3.0.3/firecrawl/v2/methods/search.py +173 -0
  65. firecrawl-3.0.3/firecrawl/v2/methods/usage.py +41 -0
  66. firecrawl-3.0.3/firecrawl/v2/types.py +546 -0
  67. firecrawl-3.0.3/firecrawl/v2/utils/__init__.py +9 -0
  68. firecrawl-3.0.3/firecrawl/v2/utils/error_handler.py +107 -0
  69. firecrawl-3.0.3/firecrawl/v2/utils/get_version.py +15 -0
  70. firecrawl-3.0.3/firecrawl/v2/utils/http_client.py +153 -0
  71. firecrawl-3.0.3/firecrawl/v2/utils/http_client_async.py +64 -0
  72. firecrawl-3.0.3/firecrawl/v2/utils/validation.py +324 -0
  73. firecrawl-3.0.3/firecrawl/v2/watcher.py +312 -0
  74. firecrawl-3.0.3/firecrawl/v2/watcher_async.py +245 -0
  75. {firecrawl-2.16.5 → firecrawl-3.0.3}/firecrawl.egg-info/PKG-INFO +49 -32
  76. firecrawl-3.0.3/firecrawl.egg-info/SOURCES.txt +82 -0
  77. {firecrawl-2.16.5 → firecrawl-3.0.3}/firecrawl.egg-info/requires.txt +1 -0
  78. {firecrawl-2.16.5 → firecrawl-3.0.3}/pyproject.toml +3 -2
  79. {firecrawl-2.16.5 → firecrawl-3.0.3}/setup.py +3 -3
  80. {firecrawl-2.16.5 → firecrawl-3.0.3}/tests/test_change_tracking.py +0 -0
  81. firecrawl-3.0.3/tests/test_timeout_conversion.py +117 -0
  82. firecrawl-2.16.5/firecrawl/__tests__/e2e_withAuth/__init__.py +0 -0
  83. firecrawl-2.16.5/firecrawl/__tests__/e2e_withAuth/test.py +0 -170
  84. firecrawl-2.16.5/firecrawl/__tests__/v1/e2e_withAuth/__init__.py +0 -0
  85. firecrawl-2.16.5/firecrawl/__tests__/v1/e2e_withAuth/test.py +0 -465
  86. firecrawl-2.16.5/firecrawl.egg-info/SOURCES.txt +0 -16
  87. {firecrawl-2.16.5 → firecrawl-3.0.3}/firecrawl.egg-info/dependency_links.txt +0 -0
  88. {firecrawl-2.16.5 → firecrawl-3.0.3}/firecrawl.egg-info/top_level.txt +0 -0
  89. {firecrawl-2.16.5 → firecrawl-3.0.3}/setup.cfg +0 -0
File without changes
@@ -1,15 +1,15 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 2.16.5
3
+ Version: 3.0.3
4
4
  Summary: Python SDK for Firecrawl API
5
- Home-page: https://github.com/mendableai/firecrawl
5
+ Home-page: https://github.com/firecrawl/firecrawl
6
6
  Author: Mendable.ai
7
7
  Author-email: "Mendable.ai" <nick@mendable.ai>
8
8
  Maintainer-email: "Mendable.ai" <nick@mendable.ai>
9
9
  License: MIT License
10
10
  Project-URL: Documentation, https://docs.firecrawl.dev
11
- Project-URL: Source, https://github.com/mendableai/firecrawl
12
- Project-URL: Tracker, https://github.com/mendableai/firecrawl/issues
11
+ Project-URL: Source, https://github.com/firecrawl/firecrawl
12
+ Project-URL: Tracker, https://github.com/firecrawl/firecrawl/issues
13
13
  Keywords: SDK,API,firecrawl
14
14
  Classifier: Development Status :: 5 - Production/Stable
15
15
  Classifier: Environment :: Web Environment
@@ -34,6 +34,7 @@ Requires-Python: >=3.8
34
34
  Description-Content-Type: text/markdown
35
35
  License-File: LICENSE
36
36
  Requires-Dist: requests
37
+ Requires-Dist: httpx
37
38
  Requires-Dist: python-dotenv
38
39
  Requires-Dist: websockets
39
40
  Requires-Dist: nest-asyncio
@@ -55,24 +56,25 @@ pip install firecrawl-py
55
56
  ## Usage
56
57
 
57
58
  1. Get an API key from [firecrawl.dev](https://firecrawl.dev)
58
- 2. Set the API key as an environment variable named `FIRECRAWL_API_KEY` or pass it as a parameter to the `FirecrawlApp` class.
59
+ 2. Set the API key as an environment variable named `FIRECRAWL_API_KEY` or pass it as a parameter to the `Firecrawl` class.
59
60
 
60
61
  Here's an example of how to use the SDK:
61
62
 
62
63
  ```python
63
- from firecrawl import FirecrawlApp, ScrapeOptions
64
+ from firecrawl import Firecrawl
65
+ from firecrawl.types import ScrapeOptions
64
66
 
65
- app = FirecrawlApp(api_key="fc-YOUR_API_KEY")
67
+ firecrawl = Firecrawl(api_key="fc-YOUR_API_KEY")
66
68
 
67
- # Scrape a website:
68
- data = app.scrape_url(
69
+ # Scrape a website (v2):
70
+ data = firecrawl.scrape(
69
71
  'https://firecrawl.dev',
70
72
  formats=['markdown', 'html']
71
73
  )
72
74
  print(data)
73
75
 
74
- # Crawl a website:
75
- crawl_status = app.crawl_url(
76
+ # Crawl a website (v2 waiter):
77
+ crawl_status = firecrawl.crawl(
76
78
  'https://firecrawl.dev',
77
79
  limit=100,
78
80
  scrape_options=ScrapeOptions(formats=['markdown', 'html'])
@@ -82,20 +84,20 @@ print(crawl_status)
82
84
 
83
85
  ### Scraping a URL
84
86
 
85
- To scrape a single URL, use the `scrape_url` method. It takes the URL as a parameter and returns the scraped data as a dictionary.
87
+ To scrape a single URL, use the `scrape` method. It takes the URL as a parameter and returns a document with the requested formats.
86
88
 
87
89
  ```python
88
- # Scrape a website:
89
- scrape_result = app.scrape_url('firecrawl.dev', formats=['markdown', 'html'])
90
+ # Scrape a website (v2):
91
+ scrape_result = firecrawl.scrape('https://firecrawl.dev', formats=['markdown', 'html'])
90
92
  print(scrape_result)
91
93
  ```
92
94
 
93
95
  ### Crawling a Website
94
96
 
95
- To crawl a website, use the `crawl_url` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the maximum number of pages to crawl, allowed domains, and the output format.
97
+ To crawl a website, use the `crawl` method. It takes the starting URL and optional parameters as arguments. You can control depth, limits, formats, and more.
96
98
 
97
99
  ```python
98
- crawl_status = app.crawl_url(
100
+ crawl_status = firecrawl.crawl(
99
101
  'https://firecrawl.dev',
100
102
  limit=100,
101
103
  scrape_options=ScrapeOptions(formats=['markdown', 'html']),
@@ -108,23 +110,23 @@ print(crawl_status)
108
110
 
109
111
  <Tip>Looking for async operations? Check out the [Async Class](#async-class) section below.</Tip>
110
112
 
111
- To crawl a website asynchronously, use the `crawl_url_async` method. It returns the crawl `ID` which you can use to check the status of the crawl job. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the maximum number of pages to crawl, allowed domains, and the output format.
113
+ To enqueue a crawl asynchronously, use `start_crawl`. It returns the crawl `ID` which you can use to check the status of the crawl job.
112
114
 
113
115
  ```python
114
- crawl_status = app.async_crawl_url(
116
+ crawl_job = firecrawl.start_crawl(
115
117
  'https://firecrawl.dev',
116
118
  limit=100,
117
119
  scrape_options=ScrapeOptions(formats=['markdown', 'html']),
118
120
  )
119
- print(crawl_status)
121
+ print(crawl_job)
120
122
  ```
121
123
 
122
124
  ### Checking Crawl Status
123
125
 
124
- To check the status of a crawl job, use the `check_crawl_status` method. It takes the job ID as a parameter and returns the current status of the crawl job.
126
+ To check the status of a crawl job, use the `get_crawl_status` method. It takes the job ID as a parameter and returns the current status of the crawl job.
125
127
 
126
128
  ```python
127
- crawl_status = app.check_crawl_status("<crawl_id>")
129
+ crawl_status = firecrawl.get_crawl_status("<crawl_id>")
128
130
  print(crawl_status)
129
131
  ```
130
132
 
@@ -133,17 +135,17 @@ print(crawl_status)
133
135
  To cancel an asynchronous crawl job, use the `cancel_crawl` method. It takes the job ID of the asynchronous crawl as a parameter and returns the cancellation status.
134
136
 
135
137
  ```python
136
- cancel_crawl = app.cancel_crawl(id)
138
+ cancel_crawl = firecrawl.cancel_crawl(id)
137
139
  print(cancel_crawl)
138
140
  ```
139
141
 
140
142
  ### Map a Website
141
143
 
142
- Use `map_url` to generate a list of URLs from a website. The `params` argument let you customize the mapping process, including options to exclude subdomains or to utilize the sitemap.
144
+ Use `map` to generate a list of URLs from a website. Options let you customize the mapping process, including whether to use the sitemap or include subdomains.
143
145
 
144
146
  ```python
145
- # Map a website:
146
- map_result = app.map_url('https://firecrawl.dev')
147
+ # Map a website (v2):
148
+ map_result = firecrawl.map('https://firecrawl.dev')
147
149
  print(map_result)
148
150
  ```
149
151
 
@@ -194,20 +196,35 @@ The SDK handles errors returned by the Firecrawl API and raises appropriate exce
194
196
 
195
197
  ## Async Class
196
198
 
197
- For async operations, you can use the `AsyncFirecrawlApp` class. Its methods are the same as the `FirecrawlApp` class, but they don't block the main thread.
199
+ For async operations, you can use the `AsyncFirecrawl` class. Its methods mirror the `Firecrawl` class, but you `await` them.
198
200
 
199
201
  ```python
200
- from firecrawl import AsyncFirecrawlApp
202
+ from firecrawl import AsyncFirecrawl
201
203
 
202
- app = AsyncFirecrawlApp(api_key="YOUR_API_KEY")
204
+ firecrawl = AsyncFirecrawl(api_key="YOUR_API_KEY")
203
205
 
204
- # Async Scrape
206
+ # Async Scrape (v2)
205
207
  async def example_scrape():
206
- scrape_result = await app.scrape_url(url="https://example.com")
208
+ scrape_result = await firecrawl.scrape(url="https://example.com")
207
209
  print(scrape_result)
208
210
 
209
- # Async Crawl
211
+ # Async Crawl (v2)
210
212
  async def example_crawl():
211
- crawl_result = await app.crawl_url(url="https://example.com")
213
+ crawl_result = await firecrawl.crawl(url="https://example.com")
212
214
  print(crawl_result)
213
215
  ```
216
+
217
+ ## v1 compatibility
218
+
219
+ For legacy code paths, v1 remains available under `firecrawl.v1` with the original method names.
220
+
221
+ ```python
222
+ from firecrawl import Firecrawl
223
+
224
+ firecrawl = Firecrawl(api_key="YOUR_API_KEY")
225
+
226
+ # v1 methods (feature‑frozen)
227
+ doc_v1 = firecrawl.v1.scrape_url('https://firecrawl.dev', formats=['markdown', 'html'])
228
+ crawl_v1 = firecrawl.v1.crawl_url('https://firecrawl.dev', limit=100)
229
+ map_v1 = firecrawl.v1.map_url('https://firecrawl.dev')
230
+ ```
@@ -13,24 +13,25 @@ pip install firecrawl-py
13
13
  ## Usage
14
14
 
15
15
  1. Get an API key from [firecrawl.dev](https://firecrawl.dev)
16
- 2. Set the API key as an environment variable named `FIRECRAWL_API_KEY` or pass it as a parameter to the `FirecrawlApp` class.
16
+ 2. Set the API key as an environment variable named `FIRECRAWL_API_KEY` or pass it as a parameter to the `Firecrawl` class.
17
17
 
18
18
  Here's an example of how to use the SDK:
19
19
 
20
20
  ```python
21
- from firecrawl import FirecrawlApp, ScrapeOptions
21
+ from firecrawl import Firecrawl
22
+ from firecrawl.types import ScrapeOptions
22
23
 
23
- app = FirecrawlApp(api_key="fc-YOUR_API_KEY")
24
+ firecrawl = Firecrawl(api_key="fc-YOUR_API_KEY")
24
25
 
25
- # Scrape a website:
26
- data = app.scrape_url(
26
+ # Scrape a website (v2):
27
+ data = firecrawl.scrape(
27
28
  'https://firecrawl.dev',
28
29
  formats=['markdown', 'html']
29
30
  )
30
31
  print(data)
31
32
 
32
- # Crawl a website:
33
- crawl_status = app.crawl_url(
33
+ # Crawl a website (v2 waiter):
34
+ crawl_status = firecrawl.crawl(
34
35
  'https://firecrawl.dev',
35
36
  limit=100,
36
37
  scrape_options=ScrapeOptions(formats=['markdown', 'html'])
@@ -40,20 +41,20 @@ print(crawl_status)
40
41
 
41
42
  ### Scraping a URL
42
43
 
43
- To scrape a single URL, use the `scrape_url` method. It takes the URL as a parameter and returns the scraped data as a dictionary.
44
+ To scrape a single URL, use the `scrape` method. It takes the URL as a parameter and returns a document with the requested formats.
44
45
 
45
46
  ```python
46
- # Scrape a website:
47
- scrape_result = app.scrape_url('firecrawl.dev', formats=['markdown', 'html'])
47
+ # Scrape a website (v2):
48
+ scrape_result = firecrawl.scrape('https://firecrawl.dev', formats=['markdown', 'html'])
48
49
  print(scrape_result)
49
50
  ```
50
51
 
51
52
  ### Crawling a Website
52
53
 
53
- To crawl a website, use the `crawl_url` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the maximum number of pages to crawl, allowed domains, and the output format.
54
+ To crawl a website, use the `crawl` method. It takes the starting URL and optional parameters as arguments. You can control depth, limits, formats, and more.
54
55
 
55
56
  ```python
56
- crawl_status = app.crawl_url(
57
+ crawl_status = firecrawl.crawl(
57
58
  'https://firecrawl.dev',
58
59
  limit=100,
59
60
  scrape_options=ScrapeOptions(formats=['markdown', 'html']),
@@ -66,23 +67,23 @@ print(crawl_status)
66
67
 
67
68
  <Tip>Looking for async operations? Check out the [Async Class](#async-class) section below.</Tip>
68
69
 
69
- To crawl a website asynchronously, use the `crawl_url_async` method. It returns the crawl `ID` which you can use to check the status of the crawl job. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the maximum number of pages to crawl, allowed domains, and the output format.
70
+ To enqueue a crawl asynchronously, use `start_crawl`. It returns the crawl `ID` which you can use to check the status of the crawl job.
70
71
 
71
72
  ```python
72
- crawl_status = app.async_crawl_url(
73
+ crawl_job = firecrawl.start_crawl(
73
74
  'https://firecrawl.dev',
74
75
  limit=100,
75
76
  scrape_options=ScrapeOptions(formats=['markdown', 'html']),
76
77
  )
77
- print(crawl_status)
78
+ print(crawl_job)
78
79
  ```
79
80
 
80
81
  ### Checking Crawl Status
81
82
 
82
- To check the status of a crawl job, use the `check_crawl_status` method. It takes the job ID as a parameter and returns the current status of the crawl job.
83
+ To check the status of a crawl job, use the `get_crawl_status` method. It takes the job ID as a parameter and returns the current status of the crawl job.
83
84
 
84
85
  ```python
85
- crawl_status = app.check_crawl_status("<crawl_id>")
86
+ crawl_status = firecrawl.get_crawl_status("<crawl_id>")
86
87
  print(crawl_status)
87
88
  ```
88
89
 
@@ -91,17 +92,17 @@ print(crawl_status)
91
92
  To cancel an asynchronous crawl job, use the `cancel_crawl` method. It takes the job ID of the asynchronous crawl as a parameter and returns the cancellation status.
92
93
 
93
94
  ```python
94
- cancel_crawl = app.cancel_crawl(id)
95
+ cancel_crawl = firecrawl.cancel_crawl(id)
95
96
  print(cancel_crawl)
96
97
  ```
97
98
 
98
99
  ### Map a Website
99
100
 
100
- Use `map_url` to generate a list of URLs from a website. The `params` argument let you customize the mapping process, including options to exclude subdomains or to utilize the sitemap.
101
+ Use `map` to generate a list of URLs from a website. Options let you customize the mapping process, including whether to use the sitemap or include subdomains.
101
102
 
102
103
  ```python
103
- # Map a website:
104
- map_result = app.map_url('https://firecrawl.dev')
104
+ # Map a website (v2):
105
+ map_result = firecrawl.map('https://firecrawl.dev')
105
106
  print(map_result)
106
107
  ```
107
108
 
@@ -152,20 +153,35 @@ The SDK handles errors returned by the Firecrawl API and raises appropriate exce
152
153
 
153
154
  ## Async Class
154
155
 
155
- For async operations, you can use the `AsyncFirecrawlApp` class. Its methods are the same as the `FirecrawlApp` class, but they don't block the main thread.
156
+ For async operations, you can use the `AsyncFirecrawl` class. Its methods mirror the `Firecrawl` class, but you `await` them.
156
157
 
157
158
  ```python
158
- from firecrawl import AsyncFirecrawlApp
159
+ from firecrawl import AsyncFirecrawl
159
160
 
160
- app = AsyncFirecrawlApp(api_key="YOUR_API_KEY")
161
+ firecrawl = AsyncFirecrawl(api_key="YOUR_API_KEY")
161
162
 
162
- # Async Scrape
163
+ # Async Scrape (v2)
163
164
  async def example_scrape():
164
- scrape_result = await app.scrape_url(url="https://example.com")
165
+ scrape_result = await firecrawl.scrape(url="https://example.com")
165
166
  print(scrape_result)
166
167
 
167
- # Async Crawl
168
+ # Async Crawl (v2)
168
169
  async def example_crawl():
169
- crawl_result = await app.crawl_url(url="https://example.com")
170
+ crawl_result = await firecrawl.crawl(url="https://example.com")
170
171
  print(crawl_result)
172
+ ```
173
+
174
+ ## v1 compatibility
175
+
176
+ For legacy code paths, v1 remains available under `firecrawl.v1` with the original method names.
177
+
178
+ ```python
179
+ from firecrawl import Firecrawl
180
+
181
+ firecrawl = Firecrawl(api_key="YOUR_API_KEY")
182
+
183
+ # v1 methods (feature‑frozen)
184
+ doc_v1 = firecrawl.v1.scrape_url('https://firecrawl.dev', formats=['markdown', 'html'])
185
+ crawl_v1 = firecrawl.v1.crawl_url('https://firecrawl.dev', limit=100)
186
+ map_v1 = firecrawl.v1.map_url('https://firecrawl.dev')
171
187
  ```
@@ -1,19 +1,23 @@
1
1
  """
2
- This is the Firecrawl package.
2
+ Firecrawl Python SDK
3
3
 
4
- This package provides a Python SDK for interacting with the Firecrawl API.
5
- It includes methods to scrape URLs, perform searches, initiate and monitor crawl jobs,
6
- and check the status of these jobs.
7
-
8
- For more information visit https://github.com/firecrawl/
9
4
  """
10
5
 
11
6
  import logging
12
7
  import os
13
8
 
14
- from .firecrawl import FirecrawlApp, AsyncFirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
9
+ from .client import Firecrawl, AsyncFirecrawl, FirecrawlApp, AsyncFirecrawlApp
10
+ from .v2.watcher import Watcher
11
+ from .v2.watcher_async import AsyncWatcher
12
+ from .v1 import (
13
+ V1FirecrawlApp,
14
+ AsyncV1FirecrawlApp,
15
+ V1JsonConfig,
16
+ V1ScrapeOptions,
17
+ V1ChangeTrackingOptions,
18
+ )
15
19
 
16
- __version__ = "2.16.5"
20
+ __version__ = "3.0.3"
17
21
 
18
22
  # Define the logger for the Firecrawl project
19
23
  logger: logging.Logger = logging.getLogger("firecrawl")
@@ -27,17 +31,14 @@ def _configure_logger() -> None:
27
31
  format to the firecrawl logger.
28
32
  """
29
33
  try:
30
- # Create the formatter
31
34
  formatter = logging.Formatter(
32
35
  "[%(asctime)s - %(name)s:%(lineno)d - %(levelname)s] %(message)s",
33
36
  datefmt="%Y-%m-%d %H:%M:%S",
34
37
  )
35
38
 
36
- # Create the console handler and set the formatter
37
39
  console_handler = logging.StreamHandler()
38
40
  console_handler.setFormatter(formatter)
39
41
 
40
- # Add the console handler to the firecrawl logger
41
42
  logger.addHandler(console_handler)
42
43
  except Exception as e:
43
44
  logger.error("Failed to configure logging: %s", e)
@@ -45,20 +46,15 @@ def _configure_logger() -> None:
45
46
 
46
47
  def setup_logging() -> None:
47
48
  """Set up logging based on the FIRECRAWL_LOGGING_LEVEL environment variable."""
48
- # Check if the firecrawl logger already has a handler
49
49
  if logger.hasHandlers():
50
- return # To prevent duplicate logging
50
+ return
51
51
 
52
- # Check if the FIRECRAWL_LOGGING_LEVEL environment variable is set
53
52
  if not (env := os.getenv("FIRECRAWL_LOGGING_LEVEL", "").upper()):
54
- # Attach a no-op handler to prevent warnings about no handlers
55
53
  logger.addHandler(logging.NullHandler())
56
54
  return
57
55
 
58
- # Attach the console handler to the firecrawl logger
59
56
  _configure_logger()
60
57
 
61
- # Set the logging level based on the FIRECRAWL_LOGGING_LEVEL environment variable
62
58
  if env == "DEBUG":
63
59
  logger.setLevel(logging.DEBUG)
64
60
  elif env == "INFO":
@@ -73,7 +69,19 @@ def setup_logging() -> None:
73
69
  logger.setLevel(logging.INFO)
74
70
  logger.warning("Unknown logging level: %s, defaulting to INFO", env)
75
71
 
76
-
77
- # Initialize logging configuration when the module is imported
78
72
  setup_logging()
79
73
  logger.debug("Debugging logger setup")
74
+
75
+ __all__ = [
76
+ 'Firecrawl',
77
+ 'AsyncFirecrawl',
78
+ 'FirecrawlApp',
79
+ 'AsyncFirecrawlApp',
80
+ 'Watcher',
81
+ 'AsyncWatcher',
82
+ 'V1FirecrawlApp',
83
+ 'AsyncV1FirecrawlApp',
84
+ 'V1JsonConfig',
85
+ 'V1ScrapeOptions',
86
+ 'V1ChangeTrackingOptions',
87
+ ]
@@ -0,0 +1,79 @@
1
+ import os
2
+ import asyncio
3
+ import pytest
4
+ from dotenv import load_dotenv
5
+ from firecrawl import AsyncFirecrawl
6
+
7
+
8
+ load_dotenv()
9
+
10
+ if not os.getenv("API_KEY"):
11
+ raise ValueError("API_KEY is not set")
12
+
13
+ if not os.getenv("API_URL"):
14
+ raise ValueError("API_URL is not set")
15
+
16
+
17
+ @pytest.mark.asyncio
18
+ async def test_async_batch_start_and_status():
19
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
20
+ start = await client.start_batch_scrape([
21
+ "https://docs.firecrawl.dev",
22
+ "https://firecrawl.dev",
23
+ ], formats=["markdown"], max_concurrency=1)
24
+ job_id = start.id
25
+
26
+ deadline = asyncio.get_event_loop().time() + 240
27
+ status = await client.get_batch_scrape_status(job_id)
28
+ while status.status not in ("completed", "failed", "cancelled") and asyncio.get_event_loop().time() < deadline:
29
+ await asyncio.sleep(2)
30
+ status = await client.get_batch_scrape_status(job_id)
31
+
32
+ assert status.status in ("completed", "failed", "cancelled")
33
+
34
+
35
+ @pytest.mark.asyncio
36
+ async def test_async_batch_wait_minimal():
37
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
38
+ job = await client.batch_scrape([
39
+ "https://docs.firecrawl.dev",
40
+ "https://firecrawl.dev",
41
+ ], formats=["markdown"], poll_interval=1, timeout=120)
42
+ assert job.status in ("completed", "failed")
43
+
44
+
45
+ @pytest.mark.asyncio
46
+ async def test_async_batch_wait_with_all_params():
47
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
48
+ json_schema = {"type": "object", "properties": {"title": {"type": "string"}}, "required": ["title"]}
49
+ job = await client.batch_scrape(
50
+ [
51
+ "https://docs.firecrawl.dev",
52
+ "https://firecrawl.dev",
53
+ ],
54
+ formats=[
55
+ "markdown",
56
+ {"type": "json", "prompt": "Extract page title", "schema": json_schema},
57
+ {"type": "changeTracking", "prompt": "Track changes", "modes": ["json"]},
58
+ ],
59
+ only_main_content=True,
60
+ mobile=False,
61
+ ignore_invalid_urls=True,
62
+ max_concurrency=2,
63
+ zero_data_retention=False,
64
+ poll_interval=1,
65
+ timeout=180,
66
+ )
67
+ assert job.status in ("completed", "failed")
68
+
69
+
70
+ @pytest.mark.asyncio
71
+ async def test_async_cancel_batch():
72
+ client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
73
+ start = await client.start_batch_scrape([
74
+ "https://docs.firecrawl.dev",
75
+ "https://firecrawl.dev",
76
+ ], formats=["markdown"], max_concurrency=1)
77
+ ok = await client.cancel_batch_scrape(start.id)
78
+ assert ok is True
79
+