firecrawl 2.16.5__py3-none-any.whl → 3.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

Files changed (82) hide show
  1. firecrawl/__init__.py +27 -19
  2. firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +79 -0
  3. firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +189 -0
  4. firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +38 -0
  5. firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +40 -0
  6. firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +137 -0
  7. firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +183 -0
  8. firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +35 -0
  9. firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +43 -0
  10. firecrawl/__tests__/e2e/v2/conftest.py +73 -0
  11. firecrawl/__tests__/e2e/v2/test_async.py +73 -0
  12. firecrawl/__tests__/e2e/v2/test_batch_scrape.py +105 -0
  13. firecrawl/__tests__/e2e/v2/test_crawl.py +276 -0
  14. firecrawl/__tests__/e2e/v2/test_extract.py +54 -0
  15. firecrawl/__tests__/e2e/v2/test_map.py +60 -0
  16. firecrawl/__tests__/e2e/v2/test_scrape.py +154 -0
  17. firecrawl/__tests__/e2e/v2/test_search.py +265 -0
  18. firecrawl/__tests__/e2e/v2/test_usage.py +26 -0
  19. firecrawl/__tests__/e2e/v2/test_watcher.py +65 -0
  20. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +12 -0
  21. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +61 -0
  22. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +12 -0
  23. firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +19 -0
  24. firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +50 -0
  25. firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +63 -0
  26. firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +28 -0
  27. firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +117 -0
  28. firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +90 -0
  29. firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +70 -0
  30. firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +240 -0
  31. firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +107 -0
  32. firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +53 -0
  33. firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +92 -0
  34. firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +167 -0
  35. firecrawl/__tests__/unit/v2/methods/test_search_validation.py +206 -0
  36. firecrawl/__tests__/unit/v2/methods/test_usage_types.py +18 -0
  37. firecrawl/__tests__/unit/v2/methods/test_webhook.py +123 -0
  38. firecrawl/__tests__/unit/v2/utils/test_validation.py +290 -0
  39. firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +332 -0
  40. firecrawl/client.py +241 -0
  41. firecrawl/{firecrawl.py → firecrawl.backup.py} +17 -15
  42. firecrawl/types.py +157 -0
  43. firecrawl/v1/__init__.py +14 -0
  44. firecrawl/v1/client.py +4653 -0
  45. firecrawl/v2/__init__.py +4 -0
  46. firecrawl/v2/client.py +802 -0
  47. firecrawl/v2/client_async.py +250 -0
  48. firecrawl/v2/methods/aio/__init__.py +1 -0
  49. firecrawl/v2/methods/aio/batch.py +85 -0
  50. firecrawl/v2/methods/aio/crawl.py +174 -0
  51. firecrawl/v2/methods/aio/extract.py +126 -0
  52. firecrawl/v2/methods/aio/map.py +59 -0
  53. firecrawl/v2/methods/aio/scrape.py +36 -0
  54. firecrawl/v2/methods/aio/search.py +58 -0
  55. firecrawl/v2/methods/aio/usage.py +42 -0
  56. firecrawl/v2/methods/batch.py +420 -0
  57. firecrawl/v2/methods/crawl.py +468 -0
  58. firecrawl/v2/methods/extract.py +131 -0
  59. firecrawl/v2/methods/map.py +77 -0
  60. firecrawl/v2/methods/scrape.py +68 -0
  61. firecrawl/v2/methods/search.py +173 -0
  62. firecrawl/v2/methods/usage.py +41 -0
  63. firecrawl/v2/types.py +546 -0
  64. firecrawl/v2/utils/__init__.py +9 -0
  65. firecrawl/v2/utils/error_handler.py +107 -0
  66. firecrawl/v2/utils/get_version.py +15 -0
  67. firecrawl/v2/utils/http_client.py +153 -0
  68. firecrawl/v2/utils/http_client_async.py +64 -0
  69. firecrawl/v2/utils/validation.py +324 -0
  70. firecrawl/v2/watcher.py +312 -0
  71. firecrawl/v2/watcher_async.py +245 -0
  72. {firecrawl-2.16.5.dist-info → firecrawl-3.0.3.dist-info}/LICENSE +0 -0
  73. {firecrawl-2.16.5.dist-info → firecrawl-3.0.3.dist-info}/METADATA +49 -32
  74. firecrawl-3.0.3.dist-info/RECORD +78 -0
  75. tests/test_timeout_conversion.py +117 -0
  76. firecrawl/__tests__/e2e_withAuth/__init__.py +0 -0
  77. firecrawl/__tests__/e2e_withAuth/test.py +0 -170
  78. firecrawl/__tests__/v1/e2e_withAuth/__init__.py +0 -0
  79. firecrawl/__tests__/v1/e2e_withAuth/test.py +0 -465
  80. firecrawl-2.16.5.dist-info/RECORD +0 -12
  81. {firecrawl-2.16.5.dist-info → firecrawl-3.0.3.dist-info}/WHEEL +0 -0
  82. {firecrawl-2.16.5.dist-info → firecrawl-3.0.3.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,15 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 2.16.5
3
+ Version: 3.0.3
4
4
  Summary: Python SDK for Firecrawl API
5
- Home-page: https://github.com/mendableai/firecrawl
5
+ Home-page: https://github.com/firecrawl/firecrawl
6
6
  Author: Mendable.ai
7
7
  Author-email: "Mendable.ai" <nick@mendable.ai>
8
8
  Maintainer-email: "Mendable.ai" <nick@mendable.ai>
9
9
  License: MIT License
10
10
  Project-URL: Documentation, https://docs.firecrawl.dev
11
- Project-URL: Source, https://github.com/mendableai/firecrawl
12
- Project-URL: Tracker, https://github.com/mendableai/firecrawl/issues
11
+ Project-URL: Source, https://github.com/firecrawl/firecrawl
12
+ Project-URL: Tracker, https://github.com/firecrawl/firecrawl/issues
13
13
  Keywords: SDK,API,firecrawl
14
14
  Classifier: Development Status :: 5 - Production/Stable
15
15
  Classifier: Environment :: Web Environment
@@ -34,6 +34,7 @@ Requires-Python: >=3.8
34
34
  Description-Content-Type: text/markdown
35
35
  License-File: LICENSE
36
36
  Requires-Dist: requests
37
+ Requires-Dist: httpx
37
38
  Requires-Dist: python-dotenv
38
39
  Requires-Dist: websockets
39
40
  Requires-Dist: nest-asyncio
@@ -55,24 +56,25 @@ pip install firecrawl-py
55
56
  ## Usage
56
57
 
57
58
  1. Get an API key from [firecrawl.dev](https://firecrawl.dev)
58
- 2. Set the API key as an environment variable named `FIRECRAWL_API_KEY` or pass it as a parameter to the `FirecrawlApp` class.
59
+ 2. Set the API key as an environment variable named `FIRECRAWL_API_KEY` or pass it as a parameter to the `Firecrawl` class.
59
60
 
60
61
  Here's an example of how to use the SDK:
61
62
 
62
63
  ```python
63
- from firecrawl import FirecrawlApp, ScrapeOptions
64
+ from firecrawl import Firecrawl
65
+ from firecrawl.types import ScrapeOptions
64
66
 
65
- app = FirecrawlApp(api_key="fc-YOUR_API_KEY")
67
+ firecrawl = Firecrawl(api_key="fc-YOUR_API_KEY")
66
68
 
67
- # Scrape a website:
68
- data = app.scrape_url(
69
+ # Scrape a website (v2):
70
+ data = firecrawl.scrape(
69
71
  'https://firecrawl.dev',
70
72
  formats=['markdown', 'html']
71
73
  )
72
74
  print(data)
73
75
 
74
- # Crawl a website:
75
- crawl_status = app.crawl_url(
76
+ # Crawl a website (v2 waiter):
77
+ crawl_status = firecrawl.crawl(
76
78
  'https://firecrawl.dev',
77
79
  limit=100,
78
80
  scrape_options=ScrapeOptions(formats=['markdown', 'html'])
@@ -82,20 +84,20 @@ print(crawl_status)
82
84
 
83
85
  ### Scraping a URL
84
86
 
85
- To scrape a single URL, use the `scrape_url` method. It takes the URL as a parameter and returns the scraped data as a dictionary.
87
+ To scrape a single URL, use the `scrape` method. It takes the URL as a parameter and returns a document with the requested formats.
86
88
 
87
89
  ```python
88
- # Scrape a website:
89
- scrape_result = app.scrape_url('firecrawl.dev', formats=['markdown', 'html'])
90
+ # Scrape a website (v2):
91
+ scrape_result = firecrawl.scrape('https://firecrawl.dev', formats=['markdown', 'html'])
90
92
  print(scrape_result)
91
93
  ```
92
94
 
93
95
  ### Crawling a Website
94
96
 
95
- To crawl a website, use the `crawl_url` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the maximum number of pages to crawl, allowed domains, and the output format.
97
+ To crawl a website, use the `crawl` method. It takes the starting URL and optional parameters as arguments. You can control depth, limits, formats, and more.
96
98
 
97
99
  ```python
98
- crawl_status = app.crawl_url(
100
+ crawl_status = firecrawl.crawl(
99
101
  'https://firecrawl.dev',
100
102
  limit=100,
101
103
  scrape_options=ScrapeOptions(formats=['markdown', 'html']),
@@ -108,23 +110,23 @@ print(crawl_status)
108
110
 
109
111
  <Tip>Looking for async operations? Check out the [Async Class](#async-class) section below.</Tip>
110
112
 
111
- To crawl a website asynchronously, use the `crawl_url_async` method. It returns the crawl `ID` which you can use to check the status of the crawl job. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the maximum number of pages to crawl, allowed domains, and the output format.
113
+ To enqueue a crawl asynchronously, use `start_crawl`. It returns the crawl `ID` which you can use to check the status of the crawl job.
112
114
 
113
115
  ```python
114
- crawl_status = app.async_crawl_url(
116
+ crawl_job = firecrawl.start_crawl(
115
117
  'https://firecrawl.dev',
116
118
  limit=100,
117
119
  scrape_options=ScrapeOptions(formats=['markdown', 'html']),
118
120
  )
119
- print(crawl_status)
121
+ print(crawl_job)
120
122
  ```
121
123
 
122
124
  ### Checking Crawl Status
123
125
 
124
- To check the status of a crawl job, use the `check_crawl_status` method. It takes the job ID as a parameter and returns the current status of the crawl job.
126
+ To check the status of a crawl job, use the `get_crawl_status` method. It takes the job ID as a parameter and returns the current status of the crawl job.
125
127
 
126
128
  ```python
127
- crawl_status = app.check_crawl_status("<crawl_id>")
129
+ crawl_status = firecrawl.get_crawl_status("<crawl_id>")
128
130
  print(crawl_status)
129
131
  ```
130
132
 
@@ -133,17 +135,17 @@ print(crawl_status)
133
135
  To cancel an asynchronous crawl job, use the `cancel_crawl` method. It takes the job ID of the asynchronous crawl as a parameter and returns the cancellation status.
134
136
 
135
137
  ```python
136
- cancel_crawl = app.cancel_crawl(id)
138
+ cancel_crawl = firecrawl.cancel_crawl(id)
137
139
  print(cancel_crawl)
138
140
  ```
139
141
 
140
142
  ### Map a Website
141
143
 
142
- Use `map_url` to generate a list of URLs from a website. The `params` argument let you customize the mapping process, including options to exclude subdomains or to utilize the sitemap.
144
+ Use `map` to generate a list of URLs from a website. Options let you customize the mapping process, including whether to use the sitemap or include subdomains.
143
145
 
144
146
  ```python
145
- # Map a website:
146
- map_result = app.map_url('https://firecrawl.dev')
147
+ # Map a website (v2):
148
+ map_result = firecrawl.map('https://firecrawl.dev')
147
149
  print(map_result)
148
150
  ```
149
151
 
@@ -194,20 +196,35 @@ The SDK handles errors returned by the Firecrawl API and raises appropriate exce
194
196
 
195
197
  ## Async Class
196
198
 
197
- For async operations, you can use the `AsyncFirecrawlApp` class. Its methods are the same as the `FirecrawlApp` class, but they don't block the main thread.
199
+ For async operations, you can use the `AsyncFirecrawl` class. Its methods mirror the `Firecrawl` class, but you `await` them.
198
200
 
199
201
  ```python
200
- from firecrawl import AsyncFirecrawlApp
202
+ from firecrawl import AsyncFirecrawl
201
203
 
202
- app = AsyncFirecrawlApp(api_key="YOUR_API_KEY")
204
+ firecrawl = AsyncFirecrawl(api_key="YOUR_API_KEY")
203
205
 
204
- # Async Scrape
206
+ # Async Scrape (v2)
205
207
  async def example_scrape():
206
- scrape_result = await app.scrape_url(url="https://example.com")
208
+ scrape_result = await firecrawl.scrape(url="https://example.com")
207
209
  print(scrape_result)
208
210
 
209
- # Async Crawl
211
+ # Async Crawl (v2)
210
212
  async def example_crawl():
211
- crawl_result = await app.crawl_url(url="https://example.com")
213
+ crawl_result = await firecrawl.crawl(url="https://example.com")
212
214
  print(crawl_result)
213
215
  ```
216
+
217
+ ## v1 compatibility
218
+
219
+ For legacy code paths, v1 remains available under `firecrawl.v1` with the original method names.
220
+
221
+ ```python
222
+ from firecrawl import Firecrawl
223
+
224
+ firecrawl = Firecrawl(api_key="YOUR_API_KEY")
225
+
226
+ # v1 methods (feature‑frozen)
227
+ doc_v1 = firecrawl.v1.scrape_url('https://firecrawl.dev', formats=['markdown', 'html'])
228
+ crawl_v1 = firecrawl.v1.crawl_url('https://firecrawl.dev', limit=100)
229
+ map_v1 = firecrawl.v1.map_url('https://firecrawl.dev')
230
+ ```
@@ -0,0 +1,78 @@
1
+ firecrawl/__init__.py,sha256=5w9g4kvMhD7vpl37kKrLpgrUEQ1OWfXyj4tzsvAiQyE,2191
2
+ firecrawl/client.py,sha256=VxlMrvoq288KiIMKFk7Fq22KG0DGLZQQm56vilT71pQ,11058
3
+ firecrawl/firecrawl.backup.py,sha256=v1FEN3jR4g5Aupg4xp6SLkuFvYMQuUKND2YELbYjE6c,200430
4
+ firecrawl/types.py,sha256=yZ4iza0M1T2kxNbt-tLEOKH7o6mFKZZ11VAZGodHSq4,2734
5
+ firecrawl/__tests__/e2e/v2/conftest.py,sha256=I28TUpN5j0-9gM79NlbrDS8Jlsheao657od2f-2xK0Y,2587
6
+ firecrawl/__tests__/e2e/v2/test_async.py,sha256=ZXpf1FVOJgNclITglrxIyFwP4cOiqzWLicGaxIm70BQ,2526
7
+ firecrawl/__tests__/e2e/v2/test_batch_scrape.py,sha256=H9GtuwHIFdOQ958SOVThi_kvDDxcXAK_ECRh95ogonQ,3265
8
+ firecrawl/__tests__/e2e/v2/test_crawl.py,sha256=cOssZvIwtghAtLiM1QdNLhPEwAxZ9j9umTrBUPtJjpU,9951
9
+ firecrawl/__tests__/e2e/v2/test_extract.py,sha256=HgvGiDlyWtFygiPo5EP44Dem1oWrwgRF-hfc1LfeVSU,1670
10
+ firecrawl/__tests__/e2e/v2/test_map.py,sha256=9sT-Yq8V_8c9esl_bv5hnTA9WXb2Dg81kj6M-s0484c,1618
11
+ firecrawl/__tests__/e2e/v2/test_scrape.py,sha256=psW2nfcA_hMFpZ4msL_VJWJTMa3Sidp11ubhftbm52g,5759
12
+ firecrawl/__tests__/e2e/v2/test_search.py,sha256=MN-q82gHlm5DT2HsnAQgW1NwVbgowlFYmKW1KGJd1ig,8811
13
+ firecrawl/__tests__/e2e/v2/test_usage.py,sha256=JlBkYblhThua5qF2crRjsPpq4Ja0cBsdzxZ5zxXnQ_Y,805
14
+ firecrawl/__tests__/e2e/v2/test_watcher.py,sha256=OPTKLhVAKWqXl2Tieo6zCN1xpEwZDsz-B977CVJgLMA,1932
15
+ firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py,sha256=gJv_mLzzoAYftETB2TLkrpSfB5c04kaYgkD4hQTYsIg,2639
16
+ firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py,sha256=AefCZA1he1UkGv80UXtyQru-zQbESd1L4yIAdycW-Y0,7317
17
+ firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py,sha256=3CNRIFzgBMcOYOLhnKcK1k5a3Gy--u08EGDkL31uieM,1199
18
+ firecrawl/__tests__/e2e/v2/aio/test_aio_map.py,sha256=nckl1kbiEaaTdu5lm__tOoTDG-txTYwwSH3KZEvyKzc,1199
19
+ firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py,sha256=b17A7advBEjxrjdait2w8GHztZeKy_P3zZ3ixm5H7xw,4453
20
+ firecrawl/__tests__/e2e/v2/aio/test_aio_search.py,sha256=dnrRyTIzivlwe5wt5Wa0hdghZcJmNjC1l-XrAA_JZUU,7308
21
+ firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py,sha256=Dh9BVo48NKSZOKgLbO7n8fpMjvYmeMXDFzbIhnCTMhE,1014
22
+ firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py,sha256=hwES4Nu5c0hniZ9heIPDfvh_2JmJ2wPoX9ULTZ0Asjs,1471
23
+ firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py,sha256=HeOxN-sPYSssytcIRAEicJSZsFt_Oa5qGXAtdumR54c,4040
24
+ firecrawl/__tests__/unit/v2/methods/test_crawl_params.py,sha256=p9hzg14uAs1iHKXPDSXhGU6hEzPBF_Ae34RAf5XYa10,2387
25
+ firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py,sha256=9DbLkBg6tuMyg7ASGX_oaQmAy0VCV4oITCOrfeR2UkY,8806
26
+ firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py,sha256=kErOmHSD01eMjXiMd4rgsMVGd_aU2G9uVymBjbAFoGw,3918
27
+ firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py,sha256=toVcgnMp_cFeYsIUuyKGEWZGp0nAAkzaeFGUbY0zY0o,1868
28
+ firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py,sha256=wDOslsA5BN4kyezlaT5GeMv_Ifn8f461EaA7i5ujnaQ,3482
29
+ firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py,sha256=14lUgFpQsiosgMKjDustBRVE0zXnHujBI76F8BC5PZ4,6072
30
+ firecrawl/__tests__/unit/v2/methods/test_search_validation.py,sha256=PaV_kSgzjW8A3eFBCCn1-y4WFZBR2nf84NZk4UEBPX8,8275
31
+ firecrawl/__tests__/unit/v2/methods/test_usage_types.py,sha256=cCHHfa6agSjD0brQ9rcAcw2kaI9riUH5C0dXV-fqktg,591
32
+ firecrawl/__tests__/unit/v2/methods/test_webhook.py,sha256=AvvW-bKpUA--Lvtif2bmUIp-AxiaMJ29ie1i9dk8WbI,4586
33
+ firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py,sha256=9azJxVvDOBqUevLp-wBF9gF7Ptj-7nN6LOkPQncFX2M,456
34
+ firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py,sha256=vFbitMhH92JS5AtU78KQPi6kbT2fv68i9-rBrY5hVss,2574
35
+ firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py,sha256=WMgltdrrT2HOflqGyahC4v-Wb29_8sypN0hwS9lYXe8,403
36
+ firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py,sha256=PdUJrR0JLWqrithAnRXwuRrnsIN2h_DTu6-xvTOn_UU,725
37
+ firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py,sha256=A5DT4wpH4vrIPvFxKVHrtDH5A3bgJ_ad4fmVQ8LN1t0,1993
38
+ firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py,sha256=hFk4XgqF3aFPGFJe0ikB1uwf_0FsppNGA088OrWUXvg,2091
39
+ firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py,sha256=E26UnUhpbjG-EG0ab4WRD94AxA5IBWmIHq8ZLBOWoAA,1202
40
+ firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py,sha256=pUwuWhRbVUTbgsZn4hgZesMkTMesTv_NPmvFW--ls-Y,3815
41
+ firecrawl/__tests__/unit/v2/utils/test_validation.py,sha256=E4n4jpBhH_W7E0ikI5r8KMAKiOhbfGD3i_B8-dv3PlI,10803
42
+ firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py,sha256=87w47n0iOihtu4jTR4-4rw1-xVKWmLg2BOBGxjQPnUk,9517
43
+ firecrawl/v1/__init__.py,sha256=aP1oisPeZVGGZynvENc07JySMOZfv_4zAlxQ0ecMJXA,481
44
+ firecrawl/v1/client.py,sha256=sydurfEFTsXyowyaGryA1lkPxN_r9Nf6iQpM43OwJyM,201672
45
+ firecrawl/v2/__init__.py,sha256=Jc6a8tBjYG5OPkjDM5pl-notyys-7DEj7PLEfepv3fc,137
46
+ firecrawl/v2/client.py,sha256=P6WAzwYGLLIANTrqAM-K4EUdGWQoFsi-zCjBibbxKQw,30507
47
+ firecrawl/v2/client_async.py,sha256=zwxHis1bSh0tSF1480ze-4XDQEDJ5yDur1ZqtL94dwc,10127
48
+ firecrawl/v2/types.py,sha256=Qj4kZ05rKKUQRzzcn4xZbcUZHgt6LXdG29T0NyZTPns,17866
49
+ firecrawl/v2/watcher.py,sha256=tUPyYEGfQq93sAPDzxEbOmDTaUqW6pltKGHz2QdSGks,15063
50
+ firecrawl/v2/watcher_async.py,sha256=yw3Jp_tNvTgR697AyNPYhAIb0vL5KixUwv2oVkHaQEA,10456
51
+ firecrawl/v2/methods/batch.py,sha256=bTn9uMslVOfQdU3645kmt151t1j0suPPyNRgYM9zXHU,12165
52
+ firecrawl/v2/methods/crawl.py,sha256=xrUe2y_T7ZYmAVll45Gag7BdS-Mmd1XxbJ88hXAMSuI,15404
53
+ firecrawl/v2/methods/extract.py,sha256=-Jr4BtraU3b7hd3JIY73V-S69rUclxyXyUpoQb6DCQk,4274
54
+ firecrawl/v2/methods/map.py,sha256=4SADb0-lkbdOWDmO6k8_TzK0yRti5xsN40N45nUl9uA,2592
55
+ firecrawl/v2/methods/scrape.py,sha256=Sd3KNNCmSXvR17yLB72OEPeVPKk5DGM-chkm0WpYtSk,2178
56
+ firecrawl/v2/methods/search.py,sha256=Ou0R_3rO0co7BJx3XBhiTX4bXPFlFIuU8b68bzaFMes,6488
57
+ firecrawl/v2/methods/usage.py,sha256=OJlkxwaB-AAtgO3WLr9QiqBRmjdh6GVhroCgleegupQ,1460
58
+ firecrawl/v2/methods/aio/__init__.py,sha256=RocMJnGwnLIvGu3G8ZvY8INkipC7WHZiu2bE31eSyJs,35
59
+ firecrawl/v2/methods/aio/batch.py,sha256=GS_xsd_Uib1fxFITBK1sH88VGzFMrIcqJVQqOvMQ540,3735
60
+ firecrawl/v2/methods/aio/crawl.py,sha256=PEFIqZ7UtTBZSbs3fQuoxWMN68WbsTcj2AnZZvnfggk,6936
61
+ firecrawl/v2/methods/aio/extract.py,sha256=IfNr2ETqt4dR73JFzrEYI4kk5vpKnJOG0BmPEjGEoO4,4217
62
+ firecrawl/v2/methods/aio/map.py,sha256=EuT-5A0cQr_e5SBfEZ6pnl8u0JUwEEvSwhyT2N-QoKU,2326
63
+ firecrawl/v2/methods/aio/scrape.py,sha256=-VLFlE7Ma9TS0yXN4esvC3CPCic1Kq6d5Hc4j8CudGc,1586
64
+ firecrawl/v2/methods/aio/search.py,sha256=UA5KJlzOvSqc3TYEm1wnZLNVoP4SsrRwdQbXGe7UcDk,2781
65
+ firecrawl/v2/methods/aio/usage.py,sha256=OtBi6X-aT09MMR2dpm3vBCm9JrJZIJLCQ8jJ3L7vie4,1606
66
+ firecrawl/v2/utils/__init__.py,sha256=i1GgxySmqEXpWSBQCu3iZBPIJG7fXj0QXCDWGwerWNs,338
67
+ firecrawl/v2/utils/error_handler.py,sha256=Iuf916dHphDY8ObNNlWy75628DFeJ0Rv8ljRp4LttLE,4199
68
+ firecrawl/v2/utils/get_version.py,sha256=0CxW_41q2hlzIxEWOivUCaYw3GFiSIH32RPUMcIgwAY,492
69
+ firecrawl/v2/utils/http_client.py,sha256=_n8mp4xi6GGihg662Lsv6TSlvw9zykyADwEk0fg8mYA,4873
70
+ firecrawl/v2/utils/http_client_async.py,sha256=P4XG6nTz6kKH3vCPTz6i7DRhbpK4IImRGaFvQFGBFRc,1874
71
+ firecrawl/v2/utils/validation.py,sha256=L8by7z-t6GuMGIYkK7il1BM8d-4_-sAdG9hDMF_LeG4,14518
72
+ tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
73
+ tests/test_timeout_conversion.py,sha256=PWlIEMASQNhu4cp1OW_ebklnE9NCiigPnEFCtI5N3w0,3996
74
+ firecrawl-3.0.3.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
75
+ firecrawl-3.0.3.dist-info/METADATA,sha256=Z2T2KXXKCvSTsCgbtgkGmD-guzilPnWncCO9eZT2s50,7305
76
+ firecrawl-3.0.3.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
77
+ firecrawl-3.0.3.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
78
+ firecrawl-3.0.3.dist-info/RECORD,,
@@ -0,0 +1,117 @@
1
+ import unittest
2
+ from unittest.mock import patch, MagicMock
3
+ import os
4
+ from firecrawl import FirecrawlApp
5
+
6
+
7
+ class TestTimeoutConversion(unittest.TestCase):
8
+
9
+ @patch('requests.post')
10
+ def test_scrape_url_timeout_conversion(self, mock_post):
11
+ mock_response = MagicMock()
12
+ mock_response.status_code = 200
13
+ mock_response.json.return_value = {
14
+ 'success': True,
15
+ 'data': {
16
+ 'markdown': 'Test content'
17
+ }
18
+ }
19
+ mock_post.return_value = mock_response
20
+
21
+ app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
22
+ app.scrape_url('https://example.com', timeout=60000)
23
+
24
+ args, kwargs = mock_post.call_args
25
+ self.assertEqual(kwargs['timeout'], 65.0)
26
+
27
+ @patch('requests.post')
28
+ def test_scrape_url_default_timeout(self, mock_post):
29
+ mock_response = MagicMock()
30
+ mock_response.status_code = 200
31
+ mock_response.json.return_value = {
32
+ 'success': True,
33
+ 'data': {
34
+ 'markdown': 'Test content'
35
+ }
36
+ }
37
+ mock_post.return_value = mock_response
38
+
39
+ app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
40
+ app.scrape_url('https://example.com')
41
+
42
+ args, kwargs = mock_post.call_args
43
+ self.assertEqual(kwargs['timeout'], 35.0)
44
+
45
+ @patch('requests.post')
46
+ def test_post_request_timeout_conversion(self, mock_post):
47
+ mock_response = MagicMock()
48
+ mock_response.status_code = 200
49
+ mock_post.return_value = mock_response
50
+
51
+ app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
52
+
53
+ data = {'timeout': 30000}
54
+ headers = {'Content-Type': 'application/json'}
55
+
56
+ app._post_request('https://example.com/api', data, headers)
57
+
58
+ args, kwargs = mock_post.call_args
59
+ self.assertEqual(kwargs['timeout'], 35.0)
60
+
61
+ @patch('requests.post')
62
+ def test_post_request_default_timeout(self, mock_post):
63
+ mock_response = MagicMock()
64
+ mock_response.status_code = 200
65
+ mock_post.return_value = mock_response
66
+
67
+ app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
68
+
69
+ data = {'timeout': 30000, 'url': 'https://example.com'}
70
+ headers = {'Content-Type': 'application/json'}
71
+
72
+ app._post_request('https://example.com/api', data, headers)
73
+
74
+ args, kwargs = mock_post.call_args
75
+ self.assertEqual(kwargs['timeout'], 35.0)
76
+
77
+ @patch('requests.post')
78
+ def test_timeout_edge_cases(self, mock_post):
79
+ mock_response = MagicMock()
80
+ mock_response.status_code = 200
81
+ mock_response.json.return_value = {
82
+ 'success': True,
83
+ 'data': {
84
+ 'markdown': 'Test content'
85
+ }
86
+ }
87
+ mock_post.return_value = mock_response
88
+
89
+ app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
90
+
91
+ app.scrape_url('https://example.com', timeout=1000)
92
+ args, kwargs = mock_post.call_args
93
+ self.assertEqual(kwargs['timeout'], 6.0)
94
+
95
+ app.scrape_url('https://example.com', timeout=0)
96
+ args, kwargs = mock_post.call_args
97
+ self.assertEqual(kwargs['timeout'], 5.0)
98
+
99
+ @patch('requests.post')
100
+ def test_post_request_no_timeout_key(self, mock_post):
101
+ mock_response = MagicMock()
102
+ mock_response.status_code = 200
103
+ mock_post.return_value = mock_response
104
+
105
+ app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
106
+
107
+ data = {'url': 'https://example.com'}
108
+ headers = {'Content-Type': 'application/json'}
109
+
110
+ app._post_request('https://example.com/api', data, headers)
111
+
112
+ args, kwargs = mock_post.call_args
113
+ self.assertIsNone(kwargs['timeout'])
114
+
115
+
116
+ if __name__ == '__main__':
117
+ unittest.main()
File without changes
@@ -1,170 +0,0 @@
1
- import importlib.util
2
- import pytest
3
- import time
4
- import os
5
- from uuid import uuid4
6
- from dotenv import load_dotenv
7
-
8
- load_dotenv()
9
-
10
- API_URL = "http://127.0.0.1:3002"
11
- ABSOLUTE_FIRECRAWL_PATH = "firecrawl/firecrawl.py"
12
- TEST_API_KEY = os.getenv('TEST_API_KEY')
13
-
14
- print(f"ABSOLUTE_FIRECRAWL_PATH: {ABSOLUTE_FIRECRAWL_PATH}")
15
-
16
- spec = importlib.util.spec_from_file_location("FirecrawlApp", ABSOLUTE_FIRECRAWL_PATH)
17
- firecrawl = importlib.util.module_from_spec(spec)
18
- spec.loader.exec_module(firecrawl)
19
- FirecrawlApp = firecrawl.FirecrawlApp
20
-
21
- def test_no_api_key():
22
- with pytest.raises(Exception) as excinfo:
23
- invalid_app = FirecrawlApp(api_url=API_URL, version='v0')
24
- assert "No API key provided" in str(excinfo.value)
25
-
26
- def test_scrape_url_invalid_api_key():
27
- invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key", version='v0')
28
- with pytest.raises(Exception) as excinfo:
29
- invalid_app.scrape_url('https://firecrawl.dev')
30
- assert "Unexpected error during scrape URL: Status code 401. Unauthorized: Invalid token" in str(excinfo.value)
31
-
32
- # def test_blocklisted_url():
33
- # blocklisted_url = "https://facebook.com/fake-test"
34
- # app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
35
- # with pytest.raises(Exception) as excinfo:
36
- # app.scrape_url(blocklisted_url)
37
- # assert "Unexpected error during scrape URL: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." in str(excinfo.value)
38
-
39
- def test_successful_response_with_valid_preview_token():
40
- app = FirecrawlApp(api_url=API_URL, api_key=os.getenv('PREVIEW_TOKEN'), version='v0')
41
- response = app.scrape_url('https://roastmywebsite.ai')
42
- assert response is not None
43
- assert 'content' in response
44
- assert "_Roast_" in response['content']
45
-
46
- def test_scrape_url_e2e():
47
- app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
48
- response = app.scrape_url('https://roastmywebsite.ai')
49
- print(response)
50
-
51
- assert response is not None
52
- assert 'content' in response
53
- assert 'markdown' in response
54
- assert 'metadata' in response
55
- assert 'html' not in response
56
- assert "_Roast_" in response['content']
57
-
58
- def test_successful_response_with_valid_api_key_and_include_html():
59
- app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
60
- response = app.scrape_url('https://roastmywebsite.ai', {'pageOptions': {'includeHtml': True}})
61
- assert response is not None
62
- assert 'content' in response
63
- assert 'markdown' in response
64
- assert 'html' in response
65
- assert 'metadata' in response
66
- assert "_Roast_" in response['content']
67
- assert "_Roast_" in response['markdown']
68
- assert "<h1" in response['html']
69
-
70
- def test_successful_response_for_valid_scrape_with_pdf_file():
71
- app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
72
- response = app.scrape_url('https://arxiv.org/pdf/astro-ph/9301001.pdf')
73
- assert response is not None
74
- assert 'content' in response
75
- assert 'metadata' in response
76
- assert 'We present spectrophotometric observations of the Broad Line Radio Galaxy' in response['content']
77
-
78
- def test_successful_response_for_valid_scrape_with_pdf_file_without_explicit_extension():
79
- app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
80
- response = app.scrape_url('https://arxiv.org/pdf/astro-ph/9301001')
81
- time.sleep(6) # wait for 6 seconds
82
- assert response is not None
83
- assert 'content' in response
84
- assert 'metadata' in response
85
- assert 'We present spectrophotometric observations of the Broad Line Radio Galaxy' in response['content']
86
-
87
- def test_crawl_url_invalid_api_key():
88
- invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key", version='v0')
89
- with pytest.raises(Exception) as excinfo:
90
- invalid_app.crawl_url('https://firecrawl.dev')
91
- assert "Unexpected error during start crawl job: Status code 401. Unauthorized: Invalid token" in str(excinfo.value)
92
-
93
- # def test_should_return_error_for_blocklisted_url():
94
- # app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
95
- # blocklisted_url = "https://twitter.com/fake-test"
96
- # with pytest.raises(Exception) as excinfo:
97
- # app.crawl_url(blocklisted_url)
98
- # assert "Unexpected error during start crawl job: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." in str(excinfo.value)
99
-
100
- def test_crawl_url_wait_for_completion_e2e():
101
- app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
102
- response = app.crawl_url('https://roastmywebsite.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True)
103
- assert response is not None
104
- assert len(response) > 0
105
- assert 'content' in response[0]
106
- assert "_Roast_" in response[0]['content']
107
-
108
- def test_crawl_url_with_idempotency_key_e2e():
109
- app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
110
- uniqueIdempotencyKey = str(uuid4())
111
- response = app.crawl_url('https://roastmywebsite.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)
112
- assert response is not None
113
- assert len(response) > 0
114
- assert 'content' in response[0]
115
- assert "_Roast_" in response[0]['content']
116
-
117
- with pytest.raises(Exception) as excinfo:
118
- app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)
119
- assert "Conflict: Failed to start crawl job due to a conflict. Idempotency key already used" in str(excinfo.value)
120
-
121
- def test_check_crawl_status_e2e():
122
- app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
123
- response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, False)
124
- assert response is not None
125
- assert 'jobId' in response
126
-
127
- time.sleep(30) # wait for 30 seconds
128
- status_response = app.check_crawl_status(response['jobId'])
129
- assert status_response is not None
130
- assert 'status' in status_response
131
- assert status_response['status'] == 'completed'
132
- assert 'data' in status_response
133
- assert len(status_response['data']) > 0
134
-
135
- def test_search_e2e():
136
- app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
137
- response = app.search("test query")
138
- assert response is not None
139
- assert 'content' in response[0]
140
- assert len(response) > 2
141
-
142
- def test_search_invalid_api_key():
143
- invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key", version='v0')
144
- with pytest.raises(Exception) as excinfo:
145
- invalid_app.search("test query")
146
- assert "Unexpected error during search: Status code 401. Unauthorized: Invalid token" in str(excinfo.value)
147
-
148
- def test_llm_extraction():
149
- app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
150
- response = app.scrape_url("https://firecrawl.dev", {
151
- 'extractorOptions': {
152
- 'mode': 'llm-extraction',
153
- 'extractionPrompt': "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
154
- 'extractionSchema': {
155
- 'type': 'object',
156
- 'properties': {
157
- 'company_mission': {'type': 'string'},
158
- 'supports_sso': {'type': 'boolean'},
159
- 'is_open_source': {'type': 'boolean'}
160
- },
161
- 'required': ['company_mission', 'supports_sso', 'is_open_source']
162
- }
163
- }
164
- })
165
- assert response is not None
166
- assert 'llm_extraction' in response
167
- llm_extraction = response['llm_extraction']
168
- assert 'company_mission' in llm_extraction
169
- assert isinstance(llm_extraction['supports_sso'], bool)
170
- assert isinstance(llm_extraction['is_open_source'], bool)
File without changes