firecrawl 4.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. firecrawl/__init__.py +87 -0
  2. firecrawl/__tests__/e2e/v2/aio/conftest.py +62 -0
  3. firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +69 -0
  4. firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +189 -0
  5. firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +39 -0
  6. firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +41 -0
  7. firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +138 -0
  8. firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +249 -0
  9. firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +42 -0
  10. firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +43 -0
  11. firecrawl/__tests__/e2e/v2/conftest.py +73 -0
  12. firecrawl/__tests__/e2e/v2/test_async.py +73 -0
  13. firecrawl/__tests__/e2e/v2/test_batch_scrape.py +106 -0
  14. firecrawl/__tests__/e2e/v2/test_crawl.py +278 -0
  15. firecrawl/__tests__/e2e/v2/test_extract.py +55 -0
  16. firecrawl/__tests__/e2e/v2/test_map.py +61 -0
  17. firecrawl/__tests__/e2e/v2/test_scrape.py +191 -0
  18. firecrawl/__tests__/e2e/v2/test_search.py +270 -0
  19. firecrawl/__tests__/e2e/v2/test_usage.py +26 -0
  20. firecrawl/__tests__/e2e/v2/test_watcher.py +65 -0
  21. firecrawl/__tests__/unit/test_recursive_schema_v1.py +1209 -0
  22. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +12 -0
  23. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +79 -0
  24. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +12 -0
  25. firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +20 -0
  26. firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +50 -0
  27. firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +64 -0
  28. firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +28 -0
  29. firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +117 -0
  30. firecrawl/__tests__/unit/v2/methods/test_agent.py +367 -0
  31. firecrawl/__tests__/unit/v2/methods/test_agent_request_preparation.py +226 -0
  32. firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +90 -0
  33. firecrawl/__tests__/unit/v2/methods/test_branding.py +214 -0
  34. firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +70 -0
  35. firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +240 -0
  36. firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +107 -0
  37. firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +54 -0
  38. firecrawl/__tests__/unit/v2/methods/test_pagination.py +671 -0
  39. firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +109 -0
  40. firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +169 -0
  41. firecrawl/__tests__/unit/v2/methods/test_search_validation.py +236 -0
  42. firecrawl/__tests__/unit/v2/methods/test_usage_types.py +18 -0
  43. firecrawl/__tests__/unit/v2/methods/test_webhook.py +123 -0
  44. firecrawl/__tests__/unit/v2/utils/test_metadata_extras.py +94 -0
  45. firecrawl/__tests__/unit/v2/utils/test_metadata_extras_multivalue.py +22 -0
  46. firecrawl/__tests__/unit/v2/utils/test_recursive_schema.py +1133 -0
  47. firecrawl/__tests__/unit/v2/utils/test_validation.py +311 -0
  48. firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +332 -0
  49. firecrawl/client.py +281 -0
  50. firecrawl/firecrawl.backup.py +4635 -0
  51. firecrawl/types.py +167 -0
  52. firecrawl/v1/__init__.py +14 -0
  53. firecrawl/v1/client.py +5164 -0
  54. firecrawl/v2/__init__.py +4 -0
  55. firecrawl/v2/client.py +967 -0
  56. firecrawl/v2/client_async.py +408 -0
  57. firecrawl/v2/methods/agent.py +144 -0
  58. firecrawl/v2/methods/aio/__init__.py +1 -0
  59. firecrawl/v2/methods/aio/agent.py +137 -0
  60. firecrawl/v2/methods/aio/batch.py +188 -0
  61. firecrawl/v2/methods/aio/crawl.py +351 -0
  62. firecrawl/v2/methods/aio/extract.py +133 -0
  63. firecrawl/v2/methods/aio/map.py +65 -0
  64. firecrawl/v2/methods/aio/scrape.py +33 -0
  65. firecrawl/v2/methods/aio/search.py +176 -0
  66. firecrawl/v2/methods/aio/usage.py +89 -0
  67. firecrawl/v2/methods/batch.py +499 -0
  68. firecrawl/v2/methods/crawl.py +592 -0
  69. firecrawl/v2/methods/extract.py +161 -0
  70. firecrawl/v2/methods/map.py +83 -0
  71. firecrawl/v2/methods/scrape.py +64 -0
  72. firecrawl/v2/methods/search.py +215 -0
  73. firecrawl/v2/methods/usage.py +84 -0
  74. firecrawl/v2/types.py +1143 -0
  75. firecrawl/v2/utils/__init__.py +9 -0
  76. firecrawl/v2/utils/error_handler.py +107 -0
  77. firecrawl/v2/utils/get_version.py +15 -0
  78. firecrawl/v2/utils/http_client.py +178 -0
  79. firecrawl/v2/utils/http_client_async.py +69 -0
  80. firecrawl/v2/utils/normalize.py +125 -0
  81. firecrawl/v2/utils/validation.py +692 -0
  82. firecrawl/v2/watcher.py +301 -0
  83. firecrawl/v2/watcher_async.py +243 -0
  84. firecrawl-4.12.0.dist-info/METADATA +234 -0
  85. firecrawl-4.12.0.dist-info/RECORD +92 -0
  86. firecrawl-4.12.0.dist-info/WHEEL +5 -0
  87. firecrawl-4.12.0.dist-info/licenses/LICENSE +21 -0
  88. firecrawl-4.12.0.dist-info/top_level.txt +2 -0
  89. tests/test_agent_integration.py +277 -0
  90. tests/test_api_key_handling.py +44 -0
  91. tests/test_change_tracking.py +98 -0
  92. tests/test_timeout_conversion.py +117 -0
firecrawl/client.py ADDED
@@ -0,0 +1,281 @@
1
+ """
2
+ Firecrawl Client
3
+
4
+ A Firecrawl client that enables you to scrape content from websites, crawl entire sites, search the web, and extract structured data using AI.
5
+
6
+ The client supports both v1 and v2 API versions, providing access to features like:
7
+ - Web scraping with advanced options (screenshots, markdown conversion, etc.)
8
+ - Site crawling with configurable depth and limits
9
+ - Web search with content extraction
10
+ - Structured data extraction using AI models
11
+ - Deep research capabilities
12
+
13
+ Usage:
14
+ from firecrawl import Firecrawl
15
+ firecrawl = Firecrawl(api_key="your-api-key")
16
+ result = firecrawl.scrape("https://example.com")
17
+
18
+ Check example.py for other usage examples.
19
+ """
20
+
21
+ from typing import Any, Dict, Optional, List, Union
22
+ import logging
23
+
24
+
25
+ from .v1 import V1FirecrawlApp, AsyncV1FirecrawlApp
26
+ from .v2 import FirecrawlClient as V2FirecrawlClient
27
+ from .v2.client_async import AsyncFirecrawlClient
28
+ from .v2.types import Document
29
+
30
+ logger = logging.getLogger("firecrawl")
31
+
32
+ class V1Proxy:
33
+ """Type-annotated proxy for v1 client methods."""
34
+ _client: Optional[V1FirecrawlApp]
35
+
36
+ def __init__(self, client_instance: Optional[V1FirecrawlApp]):
37
+ self._client = client_instance
38
+
39
+ if client_instance:
40
+ self.scrape_url = client_instance.scrape_url
41
+ self.crawl_url = client_instance.crawl_url
42
+ self.batch_scrape_urls = client_instance.batch_scrape_urls
43
+ self.async_batch_scrape_urls = client_instance.async_batch_scrape_urls
44
+ self.async_crawl_url = client_instance.async_crawl_url
45
+ self.check_crawl_status = client_instance.check_crawl_status
46
+ self.map_url = client_instance.map_url
47
+ self.extract = client_instance.extract
48
+ self.deep_research = client_instance.deep_research
49
+ self.generate_llms_text = client_instance.generate_llms_text
50
+
51
+ class V2Proxy:
52
+ """Proxy class that forwards method calls to the appropriate version client."""
53
+ _client: Optional[V2FirecrawlClient]
54
+
55
+ def __init__(self, client_instance: Optional[V2FirecrawlClient]):
56
+ self._client = client_instance
57
+
58
+ if client_instance:
59
+ self.scrape = client_instance.scrape
60
+ self.search = client_instance.search
61
+ self.crawl = client_instance.crawl
62
+ self.start_crawl = client_instance.start_crawl
63
+ self.get_crawl_status = client_instance.get_crawl_status
64
+ self.cancel_crawl = client_instance.cancel_crawl
65
+ self.get_crawl_errors = client_instance.get_crawl_errors
66
+ self.get_active_crawls = client_instance.get_active_crawls
67
+ self.active_crawls = client_instance.active_crawls
68
+ self.crawl_params_preview = client_instance.crawl_params_preview
69
+
70
+ self.extract = client_instance.extract
71
+ self.start_extract = client_instance.start_extract
72
+ self.get_extract_status = client_instance.get_extract_status
73
+
74
+ self.agent = client_instance.agent
75
+ self.start_agent = client_instance.start_agent
76
+ self.get_agent_status = client_instance.get_agent_status
77
+ self.cancel_agent = client_instance.cancel_agent
78
+
79
+ self.start_batch_scrape = client_instance.start_batch_scrape
80
+ self.get_batch_scrape_status = client_instance.get_batch_scrape_status
81
+ self.cancel_batch_scrape = client_instance.cancel_batch_scrape
82
+ self.batch_scrape = client_instance.batch_scrape
83
+ self.get_batch_scrape_errors = client_instance.get_batch_scrape_errors
84
+
85
+ self.map = client_instance.map
86
+ self.get_concurrency = client_instance.get_concurrency
87
+ self.get_credit_usage = client_instance.get_credit_usage
88
+ self.get_token_usage = client_instance.get_token_usage
89
+ self.get_queue_status = client_instance.get_queue_status
90
+
91
+ self.watcher = client_instance.watcher
92
+
93
+ def __getattr__(self, name):
94
+ """Forward attribute access to the underlying client."""
95
+ return getattr(self._client, name)
96
+
97
+ class AsyncV1Proxy:
98
+ """Type-annotated proxy for v1 client methods."""
99
+ _client: Optional[AsyncV1FirecrawlApp]
100
+
101
+ def __init__(self, client_instance: Optional[AsyncV1FirecrawlApp]):
102
+ self._client = client_instance
103
+
104
+ if client_instance:
105
+ self.scrape_url = client_instance.scrape_url
106
+ self.crawl_url = client_instance.crawl_url
107
+ self.batch_scrape_urls = client_instance.batch_scrape_urls
108
+ self.async_batch_scrape_urls = client_instance.async_batch_scrape_urls
109
+ self.async_crawl_url = client_instance.async_crawl_url
110
+ self.check_crawl_status = client_instance.check_crawl_status
111
+ self.map_url = client_instance.map_url
112
+ self.extract = client_instance.extract
113
+ self.deep_research = client_instance.deep_research
114
+ self.generate_llms_text = client_instance.generate_llms_text
115
+
116
+ class AsyncV2Proxy:
117
+ """Proxy class that forwards method calls to the appropriate version client."""
118
+ _client: Optional[AsyncFirecrawlClient] = None
119
+
120
+ def __init__(self, client_instance: Optional[AsyncFirecrawlClient] = None):
121
+ self._client = client_instance
122
+
123
+ if client_instance:
124
+ self.scrape = client_instance.scrape
125
+ self.search = client_instance.search
126
+ self.crawl = client_instance.crawl
127
+ self.start_crawl = client_instance.start_crawl
128
+ self.wait_crawl = client_instance.wait_crawl
129
+ self.get_crawl_status = client_instance.get_crawl_status
130
+ self.cancel_crawl = client_instance.cancel_crawl
131
+ self.get_crawl_errors = client_instance.get_crawl_errors
132
+ self.get_active_crawls = client_instance.get_active_crawls
133
+ self.active_crawls = client_instance.active_crawls
134
+ self.crawl_params_preview = client_instance.crawl_params_preview
135
+
136
+ self.extract = client_instance.extract
137
+ self.start_extract = client_instance.start_extract
138
+ self.get_extract_status = client_instance.get_extract_status
139
+
140
+ self.agent = client_instance.agent
141
+ self.start_agent = client_instance.start_agent
142
+ self.get_agent_status = client_instance.get_agent_status
143
+ self.cancel_agent = client_instance.cancel_agent
144
+
145
+ self.start_batch_scrape = client_instance.start_batch_scrape
146
+ self.get_batch_scrape_status = client_instance.get_batch_scrape_status
147
+ self.cancel_batch_scrape = client_instance.cancel_batch_scrape
148
+ self.wait_batch_scrape = client_instance.wait_batch_scrape
149
+ self.batch_scrape = client_instance.batch_scrape
150
+ self.get_batch_scrape_errors = client_instance.get_batch_scrape_errors
151
+
152
+ self.map = client_instance.map
153
+ self.get_concurrency = client_instance.get_concurrency
154
+ self.get_credit_usage = client_instance.get_credit_usage
155
+ self.get_token_usage = client_instance.get_token_usage
156
+ self.get_queue_status = client_instance.get_queue_status
157
+
158
+ self.watcher = client_instance.watcher
159
+
160
+ def __getattr__(self, name):
161
+ """Forward attribute access to the underlying client."""
162
+ if self._client:
163
+ return getattr(self._client, name)
164
+ raise AttributeError(f"Async v2 client not implemented yet: {name}")
165
+
166
+
167
+ class Firecrawl:
168
+ """
169
+ Unified Firecrawl client (v2 by default, v1 under ``.v1``).
170
+
171
+ Provides a single entrypoint that exposes the latest API directly while
172
+ keeping a feature-frozen v1 available for incremental migration.
173
+ """
174
+
175
+ def __init__(self, api_key: str = None, api_url: str = "https://api.firecrawl.dev"):
176
+ """Initialize the unified client.
177
+
178
+ Args:
179
+ api_key: Firecrawl API key (or set ``FIRECRAWL_API_KEY``)
180
+ api_url: Base API URL (defaults to production)
181
+ """
182
+ self.api_key = api_key
183
+ self.api_url = api_url
184
+
185
+ # Initialize version-specific clients
186
+ self._v1_client = V1FirecrawlApp(api_key=api_key, api_url=api_url) if V1FirecrawlApp else None
187
+ self._v2_client = V2FirecrawlClient(api_key=api_key, api_url=api_url) if V2FirecrawlClient else None
188
+
189
+ # Create version-specific proxies
190
+ self.v1 = V1Proxy(self._v1_client) if self._v1_client else None
191
+ self.v2 = V2Proxy(self._v2_client)
192
+
193
+ self.scrape = self._v2_client.scrape
194
+ self.search = self._v2_client.search
195
+ self.map = self._v2_client.map
196
+
197
+ self.crawl = self._v2_client.crawl
198
+ self.start_crawl = self._v2_client.start_crawl
199
+ self.crawl_params_preview = self._v2_client.crawl_params_preview
200
+ self.get_crawl_status = self._v2_client.get_crawl_status
201
+ self.cancel_crawl = self._v2_client.cancel_crawl
202
+ self.get_crawl_errors = self._v2_client.get_crawl_errors
203
+ self.get_active_crawls = self._v2_client.get_active_crawls
204
+ self.active_crawls = self._v2_client.active_crawls
205
+
206
+ self.start_batch_scrape = self._v2_client.start_batch_scrape
207
+ self.get_batch_scrape_status = self._v2_client.get_batch_scrape_status
208
+ self.cancel_batch_scrape = self._v2_client.cancel_batch_scrape
209
+ self.batch_scrape = self._v2_client.batch_scrape
210
+ self.get_batch_scrape_errors = self._v2_client.get_batch_scrape_errors
211
+
212
+ self.start_extract = self._v2_client.start_extract
213
+ self.get_extract_status = self._v2_client.get_extract_status
214
+ self.extract = self._v2_client.extract
215
+
216
+ self.start_agent = self._v2_client.start_agent
217
+ self.get_agent_status = self._v2_client.get_agent_status
218
+ self.cancel_agent = self._v2_client.cancel_agent
219
+ self.agent = self._v2_client.agent
220
+
221
+ self.get_concurrency = self._v2_client.get_concurrency
222
+ self.get_credit_usage = self._v2_client.get_credit_usage
223
+ self.get_token_usage = self._v2_client.get_token_usage
224
+ self.get_queue_status = self._v2_client.get_queue_status
225
+
226
+ self.watcher = self._v2_client.watcher
227
+
228
+ class AsyncFirecrawl:
229
+ """Async unified Firecrawl client (v2 by default, v1 under ``.v1``)."""
230
+
231
+ def __init__(self, api_key: str = None, api_url: str = "https://api.firecrawl.dev"):
232
+ self.api_key = api_key
233
+ self.api_url = api_url
234
+
235
+ # Initialize version-specific clients
236
+ self._v1_client = AsyncV1FirecrawlApp(api_key=api_key, api_url=api_url) if AsyncV1FirecrawlApp else None
237
+ self._v2_client = AsyncFirecrawlClient(api_key=api_key, api_url=api_url) if AsyncFirecrawlClient else None
238
+
239
+ # Create version-specific proxies
240
+ self.v1 = AsyncV1Proxy(self._v1_client) if self._v1_client else None
241
+ self.v2 = AsyncV2Proxy(self._v2_client)
242
+
243
+ # Expose v2 async surface directly on the top-level client for ergonomic access
244
+ # Keep method names aligned with the sync client
245
+ self.scrape = self._v2_client.scrape
246
+ self.search = self._v2_client.search
247
+ self.map = self._v2_client.map
248
+
249
+ self.start_crawl = self._v2_client.start_crawl
250
+ self.get_crawl_status = self._v2_client.get_crawl_status
251
+ self.cancel_crawl = self._v2_client.cancel_crawl
252
+ self.crawl = self._v2_client.crawl
253
+ self.get_crawl_errors = self._v2_client.get_crawl_errors
254
+ self.active_crawls = self._v2_client.active_crawls
255
+ self.crawl_params_preview = self._v2_client.crawl_params_preview
256
+
257
+ self.start_batch_scrape = self._v2_client.start_batch_scrape
258
+ self.get_batch_scrape_status = self._v2_client.get_batch_scrape_status
259
+ self.cancel_batch_scrape = self._v2_client.cancel_batch_scrape
260
+ self.batch_scrape = self._v2_client.batch_scrape
261
+ self.get_batch_scrape_errors = self._v2_client.get_batch_scrape_errors
262
+
263
+ self.start_extract = self._v2_client.start_extract
264
+ self.get_extract_status = self._v2_client.get_extract_status
265
+ self.extract = self._v2_client.extract
266
+
267
+ self.start_agent = self._v2_client.start_agent
268
+ self.get_agent_status = self._v2_client.get_agent_status
269
+ self.cancel_agent = self._v2_client.cancel_agent
270
+ self.agent = self._v2_client.agent
271
+
272
+ self.get_concurrency = self._v2_client.get_concurrency
273
+ self.get_credit_usage = self._v2_client.get_credit_usage
274
+ self.get_token_usage = self._v2_client.get_token_usage
275
+ self.get_queue_status = self._v2_client.get_queue_status
276
+
277
+ self.watcher = self._v2_client.watcher
278
+
279
+ # Export Firecrawl as an alias for FirecrawlApp
280
+ FirecrawlApp = Firecrawl
281
+ AsyncFirecrawlApp = AsyncFirecrawl