firecrawl 4.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. firecrawl/__init__.py +87 -0
  2. firecrawl/__tests__/e2e/v2/aio/conftest.py +62 -0
  3. firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +69 -0
  4. firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +189 -0
  5. firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +39 -0
  6. firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +41 -0
  7. firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +138 -0
  8. firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +249 -0
  9. firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +42 -0
  10. firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +43 -0
  11. firecrawl/__tests__/e2e/v2/conftest.py +73 -0
  12. firecrawl/__tests__/e2e/v2/test_async.py +73 -0
  13. firecrawl/__tests__/e2e/v2/test_batch_scrape.py +106 -0
  14. firecrawl/__tests__/e2e/v2/test_crawl.py +278 -0
  15. firecrawl/__tests__/e2e/v2/test_extract.py +55 -0
  16. firecrawl/__tests__/e2e/v2/test_map.py +61 -0
  17. firecrawl/__tests__/e2e/v2/test_scrape.py +191 -0
  18. firecrawl/__tests__/e2e/v2/test_search.py +270 -0
  19. firecrawl/__tests__/e2e/v2/test_usage.py +26 -0
  20. firecrawl/__tests__/e2e/v2/test_watcher.py +65 -0
  21. firecrawl/__tests__/unit/test_recursive_schema_v1.py +1209 -0
  22. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +12 -0
  23. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +79 -0
  24. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +12 -0
  25. firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +20 -0
  26. firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +50 -0
  27. firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +64 -0
  28. firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +28 -0
  29. firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +117 -0
  30. firecrawl/__tests__/unit/v2/methods/test_agent.py +367 -0
  31. firecrawl/__tests__/unit/v2/methods/test_agent_request_preparation.py +226 -0
  32. firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +90 -0
  33. firecrawl/__tests__/unit/v2/methods/test_branding.py +214 -0
  34. firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +70 -0
  35. firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +240 -0
  36. firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +107 -0
  37. firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +54 -0
  38. firecrawl/__tests__/unit/v2/methods/test_pagination.py +671 -0
  39. firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +109 -0
  40. firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +169 -0
  41. firecrawl/__tests__/unit/v2/methods/test_search_validation.py +236 -0
  42. firecrawl/__tests__/unit/v2/methods/test_usage_types.py +18 -0
  43. firecrawl/__tests__/unit/v2/methods/test_webhook.py +123 -0
  44. firecrawl/__tests__/unit/v2/utils/test_metadata_extras.py +94 -0
  45. firecrawl/__tests__/unit/v2/utils/test_metadata_extras_multivalue.py +22 -0
  46. firecrawl/__tests__/unit/v2/utils/test_recursive_schema.py +1133 -0
  47. firecrawl/__tests__/unit/v2/utils/test_validation.py +311 -0
  48. firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +332 -0
  49. firecrawl/client.py +281 -0
  50. firecrawl/firecrawl.backup.py +4635 -0
  51. firecrawl/types.py +167 -0
  52. firecrawl/v1/__init__.py +14 -0
  53. firecrawl/v1/client.py +5164 -0
  54. firecrawl/v2/__init__.py +4 -0
  55. firecrawl/v2/client.py +967 -0
  56. firecrawl/v2/client_async.py +408 -0
  57. firecrawl/v2/methods/agent.py +144 -0
  58. firecrawl/v2/methods/aio/__init__.py +1 -0
  59. firecrawl/v2/methods/aio/agent.py +137 -0
  60. firecrawl/v2/methods/aio/batch.py +188 -0
  61. firecrawl/v2/methods/aio/crawl.py +351 -0
  62. firecrawl/v2/methods/aio/extract.py +133 -0
  63. firecrawl/v2/methods/aio/map.py +65 -0
  64. firecrawl/v2/methods/aio/scrape.py +33 -0
  65. firecrawl/v2/methods/aio/search.py +176 -0
  66. firecrawl/v2/methods/aio/usage.py +89 -0
  67. firecrawl/v2/methods/batch.py +499 -0
  68. firecrawl/v2/methods/crawl.py +592 -0
  69. firecrawl/v2/methods/extract.py +161 -0
  70. firecrawl/v2/methods/map.py +83 -0
  71. firecrawl/v2/methods/scrape.py +64 -0
  72. firecrawl/v2/methods/search.py +215 -0
  73. firecrawl/v2/methods/usage.py +84 -0
  74. firecrawl/v2/types.py +1143 -0
  75. firecrawl/v2/utils/__init__.py +9 -0
  76. firecrawl/v2/utils/error_handler.py +107 -0
  77. firecrawl/v2/utils/get_version.py +15 -0
  78. firecrawl/v2/utils/http_client.py +178 -0
  79. firecrawl/v2/utils/http_client_async.py +69 -0
  80. firecrawl/v2/utils/normalize.py +125 -0
  81. firecrawl/v2/utils/validation.py +692 -0
  82. firecrawl/v2/watcher.py +301 -0
  83. firecrawl/v2/watcher_async.py +243 -0
  84. firecrawl-4.12.0.dist-info/METADATA +234 -0
  85. firecrawl-4.12.0.dist-info/RECORD +92 -0
  86. firecrawl-4.12.0.dist-info/WHEEL +5 -0
  87. firecrawl-4.12.0.dist-info/licenses/LICENSE +21 -0
  88. firecrawl-4.12.0.dist-info/top_level.txt +2 -0
  89. tests/test_agent_integration.py +277 -0
  90. tests/test_api_key_handling.py +44 -0
  91. tests/test_change_tracking.py +98 -0
  92. tests/test_timeout_conversion.py +117 -0
@@ -0,0 +1,234 @@
1
+ Metadata-Version: 2.4
2
+ Name: firecrawl
3
+ Version: 4.12.0
4
+ Summary: Python SDK for Firecrawl API
5
+ Home-page: https://github.com/firecrawl/firecrawl
6
+ Author: Mendable.ai
7
+ Author-email: "Mendable.ai" <nick@mendable.ai>
8
+ Maintainer-email: "Mendable.ai" <nick@mendable.ai>
9
+ License: MIT License
10
+ Project-URL: Documentation, https://docs.firecrawl.dev
11
+ Project-URL: Source, https://github.com/firecrawl/firecrawl
12
+ Project-URL: Tracker, https://github.com/firecrawl/firecrawl/issues
13
+ Keywords: SDK,API,firecrawl
14
+ Classifier: Development Status :: 5 - Production/Stable
15
+ Classifier: Environment :: Web Environment
16
+ Classifier: Intended Audience :: Developers
17
+ Classifier: License :: OSI Approved :: MIT License
18
+ Classifier: Natural Language :: English
19
+ Classifier: Operating System :: OS Independent
20
+ Classifier: Programming Language :: Python
21
+ Classifier: Programming Language :: Python :: 3
22
+ Classifier: Programming Language :: Python :: 3.8
23
+ Classifier: Programming Language :: Python :: 3.9
24
+ Classifier: Programming Language :: Python :: 3.10
25
+ Classifier: Topic :: Internet
26
+ Classifier: Topic :: Internet :: WWW/HTTP
27
+ Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search
28
+ Classifier: Topic :: Software Development
29
+ Classifier: Topic :: Software Development :: Libraries
30
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
31
+ Classifier: Topic :: Text Processing
32
+ Classifier: Topic :: Text Processing :: Indexing
33
+ Requires-Python: >=3.8
34
+ Description-Content-Type: text/markdown
35
+ License-File: LICENSE
36
+ Requires-Dist: requests
37
+ Requires-Dist: httpx
38
+ Requires-Dist: python-dotenv
39
+ Requires-Dist: websockets
40
+ Requires-Dist: nest-asyncio
41
+ Requires-Dist: pydantic>=2.0
42
+ Requires-Dist: aiohttp
43
+ Dynamic: author
44
+ Dynamic: home-page
45
+ Dynamic: license-file
46
+ Dynamic: requires-python
47
+
48
+ # Firecrawl Python SDK
49
+
50
+ The Firecrawl Python SDK is a library that allows you to easily scrape and crawl websites, and output the data in a format ready for use with language models (LLMs). It provides a simple and intuitive interface for interacting with the Firecrawl API.
51
+
52
+ ## Installation
53
+
54
+ To install the Firecrawl Python SDK, you can use pip:
55
+
56
+ ```bash
57
+ pip install firecrawl-py
58
+ ```
59
+
60
+ ## Usage
61
+
62
+ 1. Get an API key from [firecrawl.dev](https://firecrawl.dev)
63
+ 2. Set the API key as an environment variable named `FIRECRAWL_API_KEY` or pass it as a parameter to the `Firecrawl` class.
64
+
65
+ Here's an example of how to use the SDK:
66
+
67
+ ```python
68
+ from firecrawl import Firecrawl
69
+ from firecrawl.types import ScrapeOptions
70
+
71
+ firecrawl = Firecrawl(api_key="fc-YOUR_API_KEY")
72
+
73
+ # Scrape a website (v2):
74
+ data = firecrawl.scrape(
75
+ 'https://firecrawl.dev',
76
+ formats=['markdown', 'html']
77
+ )
78
+ print(data)
79
+
80
+ # Crawl a website (v2 waiter):
81
+ crawl_status = firecrawl.crawl(
82
+ 'https://firecrawl.dev',
83
+ limit=100,
84
+ scrape_options=ScrapeOptions(formats=['markdown', 'html'])
85
+ )
86
+ print(crawl_status)
87
+ ```
88
+
89
+ ### Scraping a URL
90
+
91
+ To scrape a single URL, use the `scrape` method. It takes the URL as a parameter and returns a document with the requested formats.
92
+
93
+ ```python
94
+ # Scrape a website (v2):
95
+ scrape_result = firecrawl.scrape('https://firecrawl.dev', formats=['markdown', 'html'])
96
+ print(scrape_result)
97
+ ```
98
+
99
+ ### Crawling a Website
100
+
101
+ To crawl a website, use the `crawl` method. It takes the starting URL and optional parameters as arguments. You can control depth, limits, formats, and more.
102
+
103
+ ```python
104
+ crawl_status = firecrawl.crawl(
105
+ 'https://firecrawl.dev',
106
+ limit=100,
107
+ scrape_options=ScrapeOptions(formats=['markdown', 'html']),
108
+ poll_interval=30
109
+ )
110
+ print(crawl_status)
111
+ ```
112
+
113
+ ### Asynchronous Crawling
114
+
115
+ <Tip>Looking for async operations? Check out the [Async Class](#async-class) section below.</Tip>
116
+
117
+ To enqueue a crawl asynchronously, use `start_crawl`. It returns the crawl `ID` which you can use to check the status of the crawl job.
118
+
119
+ ```python
120
+ crawl_job = firecrawl.start_crawl(
121
+ 'https://firecrawl.dev',
122
+ limit=100,
123
+ scrape_options=ScrapeOptions(formats=['markdown', 'html']),
124
+ )
125
+ print(crawl_job)
126
+ ```
127
+
128
+ ### Checking Crawl Status
129
+
130
+ To check the status of a crawl job, use the `get_crawl_status` method. It takes the job ID as a parameter and returns the current status of the crawl job.
131
+
132
+ ```python
133
+ crawl_status = firecrawl.get_crawl_status("<crawl_id>")
134
+ print(crawl_status)
135
+ ```
136
+
137
+ ### Cancelling a Crawl
138
+
139
+ To cancel an asynchronous crawl job, use the `cancel_crawl` method. It takes the job ID of the asynchronous crawl as a parameter and returns the cancellation status.
140
+
141
+ ```python
142
+ cancel_crawl = firecrawl.cancel_crawl(id)
143
+ print(cancel_crawl)
144
+ ```
145
+
146
+ ### Map a Website
147
+
148
+ Use `map` to generate a list of URLs from a website. Options let you customize the mapping process, including whether to use the sitemap or include subdomains.
149
+
150
+ ```python
151
+ # Map a website (v2):
152
+ map_result = firecrawl.map('https://firecrawl.dev')
153
+ print(map_result)
154
+ ```
155
+
156
+ {/* ### Extracting Structured Data from Websites
157
+
158
+ To extract structured data from websites, use the `extract` method. It takes the URLs to extract data from, a prompt, and a schema as arguments. The schema is a Pydantic model that defines the structure of the extracted data.
159
+
160
+ <ExtractPythonShort /> */}
161
+
162
+ ### Crawling a Website with WebSockets
163
+
164
+ To crawl a website with WebSockets, use the `crawl_url_and_watch` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the maximum number of pages to crawl, allowed domains, and the output format.
165
+
166
+ ```python
167
+ # inside an async function...
168
+ nest_asyncio.apply()
169
+
170
+ # Define event handlers
171
+ def on_document(detail):
172
+ print("DOC", detail)
173
+
174
+ def on_error(detail):
175
+ print("ERR", detail['error'])
176
+
177
+ def on_done(detail):
178
+ print("DONE", detail['status'])
179
+
180
+ # Function to start the crawl and watch process
181
+ async def start_crawl_and_watch():
182
+ # Initiate the crawl job and get the watcher
183
+ watcher = app.crawl_url_and_watch('firecrawl.dev', exclude_paths=['blog/*'], limit=5)
184
+
185
+ # Add event listeners
186
+ watcher.add_event_listener("document", on_document)
187
+ watcher.add_event_listener("error", on_error)
188
+ watcher.add_event_listener("done", on_done)
189
+
190
+ # Start the watcher
191
+ await watcher.connect()
192
+
193
+ # Run the event loop
194
+ await start_crawl_and_watch()
195
+ ```
196
+
197
+ ## Error Handling
198
+
199
+ The SDK handles errors returned by the Firecrawl API and raises appropriate exceptions. If an error occurs during a request, an exception will be raised with a descriptive error message.
200
+
201
+ ## Async Class
202
+
203
+ For async operations, you can use the `AsyncFirecrawl` class. Its methods mirror the `Firecrawl` class, but you `await` them.
204
+
205
+ ```python
206
+ from firecrawl import AsyncFirecrawl
207
+
208
+ firecrawl = AsyncFirecrawl(api_key="YOUR_API_KEY")
209
+
210
+ # Async Scrape (v2)
211
+ async def example_scrape():
212
+ scrape_result = await firecrawl.scrape(url="https://example.com")
213
+ print(scrape_result)
214
+
215
+ # Async Crawl (v2)
216
+ async def example_crawl():
217
+ crawl_result = await firecrawl.crawl(url="https://example.com")
218
+ print(crawl_result)
219
+ ```
220
+
221
+ ## v1 compatibility
222
+
223
+ For legacy code paths, v1 remains available under `firecrawl.v1` with the original method names.
224
+
225
+ ```python
226
+ from firecrawl import Firecrawl
227
+
228
+ firecrawl = Firecrawl(api_key="YOUR_API_KEY")
229
+
230
+ # v1 methods (feature‑frozen)
231
+ doc_v1 = firecrawl.v1.scrape_url('https://firecrawl.dev', formats=['markdown', 'html'])
232
+ crawl_v1 = firecrawl.v1.crawl_url('https://firecrawl.dev', limit=100)
233
+ map_v1 = firecrawl.v1.map_url('https://firecrawl.dev')
234
+ ```
@@ -0,0 +1,92 @@
1
+ firecrawl/__init__.py,sha256=q1bLmuUqk74q-9DlgP_46FYnQJEOpUqInNB06fvj8W8,2193
2
+ firecrawl/client.py,sha256=u0bT1vkR0TOzW-2ZnJ88Tj6llbruOFvLjM9EPF7Cm4Q,12756
3
+ firecrawl/firecrawl.backup.py,sha256=v1FEN3jR4g5Aupg4xp6SLkuFvYMQuUKND2YELbYjE6c,200430
4
+ firecrawl/types.py,sha256=RmLTq14Z-Nf883wgZxubrtn2HDu9mecsCEdcIdBCu14,2923
5
+ firecrawl/__tests__/e2e/v2/conftest.py,sha256=I28TUpN5j0-9gM79NlbrDS8Jlsheao657od2f-2xK0Y,2587
6
+ firecrawl/__tests__/e2e/v2/test_async.py,sha256=ZXpf1FVOJgNclITglrxIyFwP4cOiqzWLicGaxIm70BQ,2526
7
+ firecrawl/__tests__/e2e/v2/test_batch_scrape.py,sha256=tbuJ9y10ec9TtOnq97zmaEpOgZr9VzplRtZ_b6jkhq4,3302
8
+ firecrawl/__tests__/e2e/v2/test_crawl.py,sha256=HTMx4cFzLjzWyBVgnzfIlNJ0CRCBx5q2u5LSlnFqtg8,10014
9
+ firecrawl/__tests__/e2e/v2/test_extract.py,sha256=b3WL4xPtINrPAn7oKKyYWyPIMSl0fr_DGVUU5NjJe-Y,1707
10
+ firecrawl/__tests__/e2e/v2/test_map.py,sha256=K7abzGcmQp4FLchZytQv4Kwkm9AAivPYyAC5kCb8ecE,1655
11
+ firecrawl/__tests__/e2e/v2/test_scrape.py,sha256=4ElTgZqPmoQCC5tfjDnbw7W75oWa7PJ9WPXWaHVMRMs,7235
12
+ firecrawl/__tests__/e2e/v2/test_search.py,sha256=xlWuBqcwfWGkLpQidcsG3kdbyqHFjLQTMsJzCE_CFyY,9112
13
+ firecrawl/__tests__/e2e/v2/test_usage.py,sha256=JlBkYblhThua5qF2crRjsPpq4Ja0cBsdzxZ5zxXnQ_Y,805
14
+ firecrawl/__tests__/e2e/v2/test_watcher.py,sha256=OPTKLhVAKWqXl2Tieo6zCN1xpEwZDsz-B977CVJgLMA,1932
15
+ firecrawl/__tests__/e2e/v2/aio/conftest.py,sha256=hgLGGrNDSdpVx9KvxMw-a4HwNW4WVHWkwIncHJDxG04,1812
16
+ firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py,sha256=SGmQ8nAcbzs86TJCBzYJrF1XJ25Q96mLb0JnEkBSuto,2429
17
+ firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py,sha256=oytaAS7GCMlVv-EMLoSVkK3PiRt19FmBdCv8W4kstXc,7315
18
+ firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py,sha256=RGIBtnvAKebwpPy1ZWWT-AqFvdf4bn_Td6y55F5l5As,1232
19
+ firecrawl/__tests__/e2e/v2/aio/test_aio_map.py,sha256=uaSIk68BeeA9Z-2NL-HCYLcRMAEWyKX7oplpfbKUf20,1232
20
+ firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py,sha256=xjCXkmoyp4AVa2LiAJKlMYOzDnEAnenkWMK2jhHkD7U,4486
21
+ firecrawl/__tests__/e2e/v2/aio/test_aio_search.py,sha256=_IkHkIuvWY6vH99EsqrCZuKcfAX8qkG4NVG4KJNYu-0,8279
22
+ firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py,sha256=lVGfwR79eaZamUZXgKStUJcpclCnnlpwHGo2pMOUhCY,1255
23
+ firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py,sha256=hwES4Nu5c0hniZ9heIPDfvh_2JmJ2wPoX9ULTZ0Asjs,1471
24
+ firecrawl/__tests__/unit/test_recursive_schema_v1.py,sha256=raMbKRnJQT3BBue67twwDBt64hwfgSAmX4xMF6XtIFk,41706
25
+ firecrawl/__tests__/unit/v2/methods/test_agent.py,sha256=gl6ZSXwhFbOyyigL2rAKxNz7LNKvxV7n9BiMdQUAAno,11863
26
+ firecrawl/__tests__/unit/v2/methods/test_agent_request_preparation.py,sha256=38_KZE6QJyV3mzXIJv7ecQ_Ugp0p6FFer6TeZxVh6OQ,7699
27
+ firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py,sha256=xAx-aH4bD6uCWavg1cw_8-9FnLIFJNkvVPyOCVJ7r2E,4052
28
+ firecrawl/__tests__/unit/v2/methods/test_branding.py,sha256=f5DkvCMSQKcLEvzByOO0Ae3FuCSj8YzJjDvey6svJJM,8281
29
+ firecrawl/__tests__/unit/v2/methods/test_crawl_params.py,sha256=p9hzg14uAs1iHKXPDSXhGU6hEzPBF_Ae34RAf5XYa10,2387
30
+ firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py,sha256=PEKbooNXfQwPpvcPHXABJnveztgAA-RFBhtlSs8uPro,8780
31
+ firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py,sha256=kErOmHSD01eMjXiMd4rgsMVGd_aU2G9uVymBjbAFoGw,3918
32
+ firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py,sha256=w4FZrDqk9XGOuCHw3SV5CkbRuyb_F4Kc8C5eJ7zVcFs,1959
33
+ firecrawl/__tests__/unit/v2/methods/test_pagination.py,sha256=-L4MLt6P_UVaQZQP9GVajxktABzqJHz7CojxuZnGjwI,24967
34
+ firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py,sha256=mxx4B7v4cC42ivLUCosFB2cBIaBI7m9uOUsbE8pyyGU,4077
35
+ firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py,sha256=tIImolryFlltn-zKoGFYodhc8eCInvxcmSu5n9ao1F0,6164
36
+ firecrawl/__tests__/unit/v2/methods/test_search_validation.py,sha256=Ilj-57ibqKpm84CZs8eBP3SKjWONvE5yWDr-mcuwzWw,9499
37
+ firecrawl/__tests__/unit/v2/methods/test_usage_types.py,sha256=cCHHfa6agSjD0brQ9rcAcw2kaI9riUH5C0dXV-fqktg,591
38
+ firecrawl/__tests__/unit/v2/methods/test_webhook.py,sha256=AvvW-bKpUA--Lvtif2bmUIp-AxiaMJ29ie1i9dk8WbI,4586
39
+ firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py,sha256=9azJxVvDOBqUevLp-wBF9gF7Ptj-7nN6LOkPQncFX2M,456
40
+ firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py,sha256=fOSPJcCVsjk2WSDViwTqTnAPsUvsb6yT9lVG_q7iQfk,3208
41
+ firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py,sha256=WMgltdrrT2HOflqGyahC4v-Wb29_8sypN0hwS9lYXe8,403
42
+ firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py,sha256=DFjgi4wdcesuD7SQzRSzqjbpN2YSSMkMY7oJ-q_wyrA,809
43
+ firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py,sha256=A5DT4wpH4vrIPvFxKVHrtDH5A3bgJ_ad4fmVQ8LN1t0,1993
44
+ firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py,sha256=UAubD9xPX7H5oI6gttTxR3opvc3D-5ZDVCBHdpNFtYU,2182
45
+ firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py,sha256=E26UnUhpbjG-EG0ab4WRD94AxA5IBWmIHq8ZLBOWoAA,1202
46
+ firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py,sha256=pUwuWhRbVUTbgsZn4hgZesMkTMesTv_NPmvFW--ls-Y,3815
47
+ firecrawl/__tests__/unit/v2/utils/test_metadata_extras.py,sha256=2XqJ0UAWun9s-fnj8CvAvLMNIdafcsX4uub8wfqh5Eg,3468
48
+ firecrawl/__tests__/unit/v2/utils/test_metadata_extras_multivalue.py,sha256=E3sbMRhHHWND3BbNW6i3onRbRZRNHK_cEd7KCTKWAkw,702
49
+ firecrawl/__tests__/unit/v2/utils/test_recursive_schema.py,sha256=u27COoDIJySQJ9NOAuxy6U4lpuyUhIGoaEQMfUwD73I,37930
50
+ firecrawl/__tests__/unit/v2/utils/test_validation.py,sha256=wiCgvIrXAilEZ94clCYbbiygtj5OUyctWcaYB6pyzm0,11535
51
+ firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py,sha256=87w47n0iOihtu4jTR4-4rw1-xVKWmLg2BOBGxjQPnUk,9517
52
+ firecrawl/v1/__init__.py,sha256=aP1oisPeZVGGZynvENc07JySMOZfv_4zAlxQ0ecMJXA,481
53
+ firecrawl/v1/client.py,sha256=lkbgeZolvqf1aAv58Vb38cPuuj2nMmbkESzPXS_lhCw,221663
54
+ firecrawl/v2/__init__.py,sha256=Jc6a8tBjYG5OPkjDM5pl-notyys-7DEj7PLEfepv3fc,137
55
+ firecrawl/v2/client.py,sha256=4j65L6yvf6lOb0GkD9QDnqx_x3WwhRR6Apn0a_n9pg0,36890
56
+ firecrawl/v2/client_async.py,sha256=qFoh4w-r08tOc6GLFl5DzRxdbvibnZm0SBqIcsJCjiU,15819
57
+ firecrawl/v2/types.py,sha256=1A8W_81Lv5gXDBrzLurb6qKJXFw3wO-L8i-CF_sbViA,31059
58
+ firecrawl/v2/watcher.py,sha256=FOU71tqSKxgeuGycu4ye0SLc2dw7clIcoQjPsi-4Csc,14229
59
+ firecrawl/v2/watcher_async.py,sha256=dMMACMgeKrne_xSYeRvPu0m8nXqdNkDEsaiNBiD5ilw,10370
60
+ firecrawl/v2/methods/agent.py,sha256=Qd0-Qj7g4W63WSZ-njFsPepppgbHJC6MVe4UhT7mhDs,4425
61
+ firecrawl/v2/methods/batch.py,sha256=k7IoCUGpIirgk0Z-EqJJVSBRU09HfzRta9IxxLimbz4,14756
62
+ firecrawl/v2/methods/crawl.py,sha256=XyR88MPfF11HUOhZR3JJTQIv477ZyJ2uty286H-p5K4,20049
63
+ firecrawl/v2/methods/extract.py,sha256=ml0IEc7ckeTtapOYzgETjJjmHSc3JcJT5FK9_DL4S5Y,5538
64
+ firecrawl/v2/methods/map.py,sha256=5LdPTmGu2u1dvgojIdYaUgcWNQ6otVdiFWOsqdR0iHc,2981
65
+ firecrawl/v2/methods/scrape.py,sha256=CSHBwC-P91UfrW3zHirjNAs2h899FKcWvd1DY_4fJdo,1921
66
+ firecrawl/v2/methods/search.py,sha256=VD619vGZECZ8ESFAbFNWvSUCaEV3WK96VIxVyQb6_z4,8537
67
+ firecrawl/v2/methods/usage.py,sha256=NqkmFd-ziw8ijbZxwaxjxZHl85u0LTe_TYqr_NGWFwE,3693
68
+ firecrawl/v2/methods/aio/__init__.py,sha256=RocMJnGwnLIvGu3G8ZvY8INkipC7WHZiu2bE31eSyJs,35
69
+ firecrawl/v2/methods/aio/agent.py,sha256=rcGlFkaDgfzglPl_aCQ7k0kNHqV8A6dBW3E4b3DauLc,4323
70
+ firecrawl/v2/methods/aio/batch.py,sha256=_jhoM4w9zVCQ9qb3sIKp9BTvkV_8IT_PnbhJcaiVcso,6975
71
+ firecrawl/v2/methods/aio/crawl.py,sha256=X5P4X_kEI2-Fcm4p46I-qdUW-RomlreAQNBxztX2pfo,12244
72
+ firecrawl/v2/methods/aio/extract.py,sha256=oc7LcjJ3g3nGYJeedEn2YWOg8X0NqgQpd0DrlI0SyiU,4516
73
+ firecrawl/v2/methods/aio/map.py,sha256=ZFGthx3BuTZ1elewssceHtmyWdlAMK2B97vSnHF-ELA,2683
74
+ firecrawl/v2/methods/aio/scrape.py,sha256=ilA9qco8YGwCFpE0PN1XBQUyuHPQwH2QioZ-xsfxhgU,1386
75
+ firecrawl/v2/methods/aio/search.py,sha256=d1SFbK1HtBKvR8qCvJQlJQCkN-3pbire80Fbbn7CnSw,6431
76
+ firecrawl/v2/methods/aio/usage.py,sha256=iUzTkdAWRheq-V5rRXcW0bc3MSODaVS1AqroRF0fO9M,3964
77
+ firecrawl/v2/utils/__init__.py,sha256=i1GgxySmqEXpWSBQCu3iZBPIJG7fXj0QXCDWGwerWNs,338
78
+ firecrawl/v2/utils/error_handler.py,sha256=Iuf916dHphDY8ObNNlWy75628DFeJ0Rv8ljRp4LttLE,4199
79
+ firecrawl/v2/utils/get_version.py,sha256=0CxW_41q2hlzIxEWOivUCaYw3GFiSIH32RPUMcIgwAY,492
80
+ firecrawl/v2/utils/http_client.py,sha256=0hII3mnF_1Vd1nElu-hC9PipTUABGamUKb27q92_m5E,6068
81
+ firecrawl/v2/utils/http_client_async.py,sha256=Mt6Dw_i2R_W81ONXnl9N_AlPiggfylOPfbD5Rpgi7tA,1991
82
+ firecrawl/v2/utils/normalize.py,sha256=vjZFT0q-Nl5nGyRd21cWbJhzvnu0FtIysNI7tbxp1zk,4173
83
+ firecrawl/v2/utils/validation.py,sha256=LQo7iNuC-stjDBfUml8h6NoWpnxdgJ2kfzD-63iJbuM,27330
84
+ firecrawl-4.12.0.dist-info/licenses/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
85
+ tests/test_agent_integration.py,sha256=8Uo0K4pOjFQV6F_2Wu1_2sYYlCdci9gbvUHpiAb9TxY,10217
86
+ tests/test_api_key_handling.py,sha256=iNaHp6zc9bIwpN3DdiWB2Rzk0j7HCP7VgpRE_1byNYc,1303
87
+ tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
88
+ tests/test_timeout_conversion.py,sha256=PWlIEMASQNhu4cp1OW_ebklnE9NCiigPnEFCtI5N3w0,3996
89
+ firecrawl-4.12.0.dist-info/METADATA,sha256=7qxysEMUMo2zFMRSk-_F1dZ1XmY1rDFlKbHn9q0oAGs,7393
90
+ firecrawl-4.12.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
91
+ firecrawl-4.12.0.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
92
+ firecrawl-4.12.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Sideguide Technologies Inc.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,2 @@
1
+ firecrawl
2
+ tests
@@ -0,0 +1,277 @@
1
+ """
2
+ Integration tests for agent method with mocked requests.
3
+ """
4
+
5
+ import unittest
6
+ from unittest.mock import patch, MagicMock
7
+ from pydantic import BaseModel, Field
8
+ from typing import List, Optional
9
+
10
+ from firecrawl import FirecrawlApp
11
+
12
+
13
+ class Founder(BaseModel):
14
+ name: str = Field(description="Full name of the founder")
15
+ role: Optional[str] = Field(None, description="Role or position")
16
+ background: Optional[str] = Field(None, description="Professional background")
17
+
18
+
19
+ class FoundersSchema(BaseModel):
20
+ founders: List[Founder] = Field(description="List of founders")
21
+
22
+
23
+ class TestAgent(unittest.TestCase):
24
+ """Integration tests for agent method."""
25
+
26
+ @patch('firecrawl.v2.utils.http_client.requests.post')
27
+ @patch('firecrawl.v2.utils.http_client.requests.get')
28
+ def test_agent_basic(self, mock_get, mock_post):
29
+ """Test basic agent call."""
30
+ # Mock start agent response
31
+ mock_start_response = MagicMock()
32
+ mock_start_response.ok = True
33
+ mock_start_response.status_code = 200
34
+ mock_start_response.json.return_value = {
35
+ "success": True,
36
+ "id": "test-agent-123",
37
+ "status": "processing"
38
+ }
39
+ mock_post.return_value = mock_start_response
40
+
41
+ # Mock get status response (completed)
42
+ mock_status_response = MagicMock()
43
+ mock_status_response.ok = True
44
+ mock_status_response.status_code = 200
45
+ mock_status_response.json.return_value = {
46
+ "success": True,
47
+ "id": "test-agent-123",
48
+ "status": "completed",
49
+ "data": {
50
+ "founders": [
51
+ {"name": "John Doe", "role": "CEO", "background": "Tech entrepreneur"},
52
+ {"name": "Jane Smith", "role": "CTO", "background": "Software engineer"}
53
+ ]
54
+ },
55
+ "creditsUsed": 10,
56
+ "expiresAt": "2024-01-01T00:00:00Z"
57
+ }
58
+ mock_get.return_value = mock_status_response
59
+
60
+ app = FirecrawlApp(api_key="test-api-key")
61
+ result = app.agent(
62
+ prompt="Find the founders of Firecrawl",
63
+ schema=FoundersSchema
64
+ )
65
+
66
+ # Verify post was called with correct URL and data
67
+ mock_post.assert_called_once()
68
+ post_call_args = mock_post.call_args
69
+ post_url = post_call_args[1]["url"] if "url" in post_call_args[1] else post_call_args[0][0]
70
+ assert "/v2/agent" in str(post_url)
71
+
72
+ # Check request body
73
+ request_body = post_call_args[1]["json"]
74
+ assert request_body["prompt"] == "Find the founders of Firecrawl"
75
+ assert "schema" in request_body
76
+ assert request_body["schema"]["type"] == "object"
77
+ assert "founders" in request_body["schema"]["properties"]
78
+
79
+ # Verify get was called to check status
80
+ mock_get.assert_called()
81
+
82
+ # Check result
83
+ assert result.status == "completed"
84
+ assert result.data is not None
85
+
86
+ @patch('firecrawl.v2.utils.http_client.requests.post')
87
+ def test_agent_with_urls(self, mock_post):
88
+ """Test agent call with URLs."""
89
+ mock_response = MagicMock()
90
+ mock_response.ok = True
91
+ mock_response.status_code = 200
92
+ mock_response.json.return_value = {
93
+ "success": True,
94
+ "status": "completed",
95
+ "data": {"result": "done"}
96
+ }
97
+ mock_post.return_value = mock_response
98
+
99
+ app = FirecrawlApp(api_key="test-api-key")
100
+ result = app.agent(
101
+ urls=["https://example.com", "https://test.com"],
102
+ prompt="Extract information",
103
+ schema={"type": "object", "properties": {"info": {"type": "string"}}}
104
+ )
105
+
106
+ # Check request body includes URLs
107
+ post_call_args = mock_post.call_args
108
+ request_body = post_call_args[1]["json"]
109
+ assert request_body["urls"] == ["https://example.com", "https://test.com"]
110
+ assert request_body["prompt"] == "Extract information"
111
+
112
+ @patch('firecrawl.v2.utils.http_client.requests.post')
113
+ def test_agent_with_dict_schema(self, mock_post):
114
+ """Test agent call with dict schema."""
115
+ mock_response = MagicMock()
116
+ mock_response.ok = True
117
+ mock_response.status_code = 200
118
+ mock_response.json.return_value = {
119
+ "success": True,
120
+ "status": "completed",
121
+ "data": {"result": "done"}
122
+ }
123
+ mock_post.return_value = mock_response
124
+
125
+ schema = {
126
+ "type": "object",
127
+ "properties": {
128
+ "name": {"type": "string"},
129
+ "age": {"type": "integer"}
130
+ }
131
+ }
132
+
133
+ app = FirecrawlApp(api_key="test-api-key")
134
+ result = app.agent(
135
+ prompt="Extract person data",
136
+ schema=schema
137
+ )
138
+
139
+ # Check request body includes schema
140
+ post_call_args = mock_post.call_args
141
+ request_body = post_call_args[1]["json"]
142
+ assert request_body["schema"] == schema
143
+
144
+ @patch('firecrawl.v2.utils.http_client.requests.post')
145
+ def test_agent_with_all_params(self, mock_post):
146
+ """Test agent call with all parameters."""
147
+ mock_response = MagicMock()
148
+ mock_response.ok = True
149
+ mock_response.status_code = 200
150
+ mock_response.json.return_value = {
151
+ "success": True,
152
+ "status": "completed",
153
+ "data": {"result": "done"}
154
+ }
155
+ mock_post.return_value = mock_response
156
+
157
+ schema = {"type": "object"}
158
+ urls = ["https://example.com"]
159
+
160
+ app = FirecrawlApp(api_key="test-api-key")
161
+ result = app.agent(
162
+ urls=urls,
163
+ prompt="Complete test",
164
+ schema=schema,
165
+ integration="test-integration",
166
+ max_credits=50,
167
+ strict_constrain_to_urls=True,
168
+ poll_interval=1,
169
+ timeout=30
170
+ )
171
+
172
+ # Check all parameters are in request body
173
+ post_call_args = mock_post.call_args
174
+ request_body = post_call_args[1]["json"]
175
+ assert request_body["prompt"] == "Complete test"
176
+ assert request_body["urls"] == urls
177
+ assert request_body["schema"] == schema
178
+ assert request_body["integration"] == "test-integration"
179
+ assert request_body["maxCredits"] == 50
180
+ assert request_body["strictConstrainToURLs"] is True
181
+
182
+ @patch('firecrawl.v2.utils.http_client.requests.post')
183
+ def test_agent_pydantic_schema_normalization(self, mock_post):
184
+ """Test that Pydantic schemas are properly normalized."""
185
+ mock_response = MagicMock()
186
+ mock_response.ok = True
187
+ mock_response.status_code = 200
188
+ mock_response.json.return_value = {
189
+ "success": True,
190
+ "status": "completed",
191
+ "data": {"result": "done"}
192
+ }
193
+ mock_post.return_value = mock_response
194
+
195
+ app = FirecrawlApp(api_key="test-api-key")
196
+ result = app.agent(
197
+ prompt="Find founders",
198
+ schema=FoundersSchema
199
+ )
200
+
201
+ # Check that schema was normalized to JSON schema format
202
+ post_call_args = mock_post.call_args
203
+ request_body = post_call_args[1]["json"]
204
+ assert "schema" in request_body
205
+ schema = request_body["schema"]
206
+ assert schema["type"] == "object"
207
+ assert "properties" in schema
208
+ assert "founders" in schema["properties"]
209
+ assert schema["properties"]["founders"]["type"] == "array"
210
+
211
+ @patch('firecrawl.v2.utils.http_client.requests.post')
212
+ @patch('firecrawl.v2.utils.http_client.requests.get')
213
+ def test_agent_url_construction(self, mock_get, mock_post):
214
+ """Test that agent requests are sent to correct URL."""
215
+ # Mock start agent response
216
+ mock_start_response = MagicMock()
217
+ mock_start_response.ok = True
218
+ mock_start_response.status_code = 200
219
+ mock_start_response.json.return_value = {
220
+ "success": True,
221
+ "id": "test-agent-123",
222
+ "status": "processing"
223
+ }
224
+ mock_post.return_value = mock_start_response
225
+
226
+ # Mock get status response
227
+ mock_status_response = MagicMock()
228
+ mock_status_response.ok = True
229
+ mock_status_response.status_code = 200
230
+ mock_status_response.json.return_value = {
231
+ "success": True,
232
+ "id": "test-agent-123",
233
+ "status": "completed",
234
+ "data": {"result": "done"}
235
+ }
236
+ mock_get.return_value = mock_status_response
237
+
238
+ app = FirecrawlApp(api_key="test-api-key", api_url="https://api.firecrawl.dev")
239
+ result = app.agent(prompt="Test prompt")
240
+
241
+ # Check POST URL - requests.post is called with url as keyword arg
242
+ post_call_args = mock_post.call_args
243
+ post_url = post_call_args[1].get("url") if "url" in post_call_args[1] else post_call_args[0][0]
244
+ assert "/v2/agent" in str(post_url)
245
+
246
+ # Check GET URL
247
+ get_call_args = mock_get.call_args
248
+ get_url = get_call_args[1].get("url") if "url" in get_call_args[1] else get_call_args[0][0]
249
+ assert "/v2/agent/test-agent-123" in str(get_url)
250
+
251
+ @patch('firecrawl.v2.utils.http_client.requests.post')
252
+ def test_agent_headers(self, mock_post):
253
+ """Test that agent requests include correct headers."""
254
+ mock_response = MagicMock()
255
+ mock_response.ok = True
256
+ mock_response.status_code = 200
257
+ mock_response.json.return_value = {
258
+ "success": True,
259
+ "status": "completed",
260
+ "data": {"result": "done"}
261
+ }
262
+ mock_post.return_value = mock_response
263
+
264
+ app = FirecrawlApp(api_key="test-api-key")
265
+ result = app.agent(prompt="Test prompt")
266
+
267
+ # Check headers
268
+ post_call_args = mock_post.call_args
269
+ headers = post_call_args[1]["headers"]
270
+ assert "Authorization" in headers
271
+ assert headers["Authorization"] == "Bearer test-api-key"
272
+ assert headers["Content-Type"] == "application/json"
273
+
274
+
275
+ if __name__ == '__main__':
276
+ unittest.main()
277
+