firecrawl 4.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- firecrawl/__init__.py +87 -0
- firecrawl/__tests__/e2e/v2/aio/conftest.py +62 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +69 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +189 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +39 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +41 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +138 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +249 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +42 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +43 -0
- firecrawl/__tests__/e2e/v2/conftest.py +73 -0
- firecrawl/__tests__/e2e/v2/test_async.py +73 -0
- firecrawl/__tests__/e2e/v2/test_batch_scrape.py +106 -0
- firecrawl/__tests__/e2e/v2/test_crawl.py +278 -0
- firecrawl/__tests__/e2e/v2/test_extract.py +55 -0
- firecrawl/__tests__/e2e/v2/test_map.py +61 -0
- firecrawl/__tests__/e2e/v2/test_scrape.py +191 -0
- firecrawl/__tests__/e2e/v2/test_search.py +270 -0
- firecrawl/__tests__/e2e/v2/test_usage.py +26 -0
- firecrawl/__tests__/e2e/v2/test_watcher.py +65 -0
- firecrawl/__tests__/unit/test_recursive_schema_v1.py +1209 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +12 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +79 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +12 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +20 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +50 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +64 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +28 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +117 -0
- firecrawl/__tests__/unit/v2/methods/test_agent.py +367 -0
- firecrawl/__tests__/unit/v2/methods/test_agent_request_preparation.py +226 -0
- firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +90 -0
- firecrawl/__tests__/unit/v2/methods/test_branding.py +214 -0
- firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +70 -0
- firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +240 -0
- firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +107 -0
- firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +54 -0
- firecrawl/__tests__/unit/v2/methods/test_pagination.py +671 -0
- firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +109 -0
- firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +169 -0
- firecrawl/__tests__/unit/v2/methods/test_search_validation.py +236 -0
- firecrawl/__tests__/unit/v2/methods/test_usage_types.py +18 -0
- firecrawl/__tests__/unit/v2/methods/test_webhook.py +123 -0
- firecrawl/__tests__/unit/v2/utils/test_metadata_extras.py +94 -0
- firecrawl/__tests__/unit/v2/utils/test_metadata_extras_multivalue.py +22 -0
- firecrawl/__tests__/unit/v2/utils/test_recursive_schema.py +1133 -0
- firecrawl/__tests__/unit/v2/utils/test_validation.py +311 -0
- firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +332 -0
- firecrawl/client.py +281 -0
- firecrawl/firecrawl.backup.py +4635 -0
- firecrawl/types.py +167 -0
- firecrawl/v1/__init__.py +14 -0
- firecrawl/v1/client.py +5164 -0
- firecrawl/v2/__init__.py +4 -0
- firecrawl/v2/client.py +967 -0
- firecrawl/v2/client_async.py +408 -0
- firecrawl/v2/methods/agent.py +144 -0
- firecrawl/v2/methods/aio/__init__.py +1 -0
- firecrawl/v2/methods/aio/agent.py +137 -0
- firecrawl/v2/methods/aio/batch.py +188 -0
- firecrawl/v2/methods/aio/crawl.py +351 -0
- firecrawl/v2/methods/aio/extract.py +133 -0
- firecrawl/v2/methods/aio/map.py +65 -0
- firecrawl/v2/methods/aio/scrape.py +33 -0
- firecrawl/v2/methods/aio/search.py +176 -0
- firecrawl/v2/methods/aio/usage.py +89 -0
- firecrawl/v2/methods/batch.py +499 -0
- firecrawl/v2/methods/crawl.py +592 -0
- firecrawl/v2/methods/extract.py +161 -0
- firecrawl/v2/methods/map.py +83 -0
- firecrawl/v2/methods/scrape.py +64 -0
- firecrawl/v2/methods/search.py +215 -0
- firecrawl/v2/methods/usage.py +84 -0
- firecrawl/v2/types.py +1143 -0
- firecrawl/v2/utils/__init__.py +9 -0
- firecrawl/v2/utils/error_handler.py +107 -0
- firecrawl/v2/utils/get_version.py +15 -0
- firecrawl/v2/utils/http_client.py +178 -0
- firecrawl/v2/utils/http_client_async.py +69 -0
- firecrawl/v2/utils/normalize.py +125 -0
- firecrawl/v2/utils/validation.py +692 -0
- firecrawl/v2/watcher.py +301 -0
- firecrawl/v2/watcher_async.py +243 -0
- firecrawl-4.12.0.dist-info/METADATA +234 -0
- firecrawl-4.12.0.dist-info/RECORD +92 -0
- firecrawl-4.12.0.dist-info/WHEEL +5 -0
- firecrawl-4.12.0.dist-info/licenses/LICENSE +21 -0
- firecrawl-4.12.0.dist-info/top_level.txt +2 -0
- tests/test_agent_integration.py +277 -0
- tests/test_api_key_handling.py +44 -0
- tests/test_change_tracking.py +98 -0
- tests/test_timeout_conversion.py +117 -0
firecrawl/client.py
ADDED
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Firecrawl Client
|
|
3
|
+
|
|
4
|
+
A Firecrawl client that enables you to scrape content from websites, crawl entire sites, search the web, and extract structured data using AI.
|
|
5
|
+
|
|
6
|
+
The client supports both v1 and v2 API versions, providing access to features like:
|
|
7
|
+
- Web scraping with advanced options (screenshots, markdown conversion, etc.)
|
|
8
|
+
- Site crawling with configurable depth and limits
|
|
9
|
+
- Web search with content extraction
|
|
10
|
+
- Structured data extraction using AI models
|
|
11
|
+
- Deep research capabilities
|
|
12
|
+
|
|
13
|
+
Usage:
|
|
14
|
+
from firecrawl import Firecrawl
|
|
15
|
+
firecrawl = Firecrawl(api_key="your-api-key")
|
|
16
|
+
result = firecrawl.scrape("https://example.com")
|
|
17
|
+
|
|
18
|
+
Check example.py for other usage examples.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from typing import Any, Dict, Optional, List, Union
|
|
22
|
+
import logging
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
from .v1 import V1FirecrawlApp, AsyncV1FirecrawlApp
|
|
26
|
+
from .v2 import FirecrawlClient as V2FirecrawlClient
|
|
27
|
+
from .v2.client_async import AsyncFirecrawlClient
|
|
28
|
+
from .v2.types import Document
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger("firecrawl")
|
|
31
|
+
|
|
32
|
+
class V1Proxy:
|
|
33
|
+
"""Type-annotated proxy for v1 client methods."""
|
|
34
|
+
_client: Optional[V1FirecrawlApp]
|
|
35
|
+
|
|
36
|
+
def __init__(self, client_instance: Optional[V1FirecrawlApp]):
|
|
37
|
+
self._client = client_instance
|
|
38
|
+
|
|
39
|
+
if client_instance:
|
|
40
|
+
self.scrape_url = client_instance.scrape_url
|
|
41
|
+
self.crawl_url = client_instance.crawl_url
|
|
42
|
+
self.batch_scrape_urls = client_instance.batch_scrape_urls
|
|
43
|
+
self.async_batch_scrape_urls = client_instance.async_batch_scrape_urls
|
|
44
|
+
self.async_crawl_url = client_instance.async_crawl_url
|
|
45
|
+
self.check_crawl_status = client_instance.check_crawl_status
|
|
46
|
+
self.map_url = client_instance.map_url
|
|
47
|
+
self.extract = client_instance.extract
|
|
48
|
+
self.deep_research = client_instance.deep_research
|
|
49
|
+
self.generate_llms_text = client_instance.generate_llms_text
|
|
50
|
+
|
|
51
|
+
class V2Proxy:
|
|
52
|
+
"""Proxy class that forwards method calls to the appropriate version client."""
|
|
53
|
+
_client: Optional[V2FirecrawlClient]
|
|
54
|
+
|
|
55
|
+
def __init__(self, client_instance: Optional[V2FirecrawlClient]):
|
|
56
|
+
self._client = client_instance
|
|
57
|
+
|
|
58
|
+
if client_instance:
|
|
59
|
+
self.scrape = client_instance.scrape
|
|
60
|
+
self.search = client_instance.search
|
|
61
|
+
self.crawl = client_instance.crawl
|
|
62
|
+
self.start_crawl = client_instance.start_crawl
|
|
63
|
+
self.get_crawl_status = client_instance.get_crawl_status
|
|
64
|
+
self.cancel_crawl = client_instance.cancel_crawl
|
|
65
|
+
self.get_crawl_errors = client_instance.get_crawl_errors
|
|
66
|
+
self.get_active_crawls = client_instance.get_active_crawls
|
|
67
|
+
self.active_crawls = client_instance.active_crawls
|
|
68
|
+
self.crawl_params_preview = client_instance.crawl_params_preview
|
|
69
|
+
|
|
70
|
+
self.extract = client_instance.extract
|
|
71
|
+
self.start_extract = client_instance.start_extract
|
|
72
|
+
self.get_extract_status = client_instance.get_extract_status
|
|
73
|
+
|
|
74
|
+
self.agent = client_instance.agent
|
|
75
|
+
self.start_agent = client_instance.start_agent
|
|
76
|
+
self.get_agent_status = client_instance.get_agent_status
|
|
77
|
+
self.cancel_agent = client_instance.cancel_agent
|
|
78
|
+
|
|
79
|
+
self.start_batch_scrape = client_instance.start_batch_scrape
|
|
80
|
+
self.get_batch_scrape_status = client_instance.get_batch_scrape_status
|
|
81
|
+
self.cancel_batch_scrape = client_instance.cancel_batch_scrape
|
|
82
|
+
self.batch_scrape = client_instance.batch_scrape
|
|
83
|
+
self.get_batch_scrape_errors = client_instance.get_batch_scrape_errors
|
|
84
|
+
|
|
85
|
+
self.map = client_instance.map
|
|
86
|
+
self.get_concurrency = client_instance.get_concurrency
|
|
87
|
+
self.get_credit_usage = client_instance.get_credit_usage
|
|
88
|
+
self.get_token_usage = client_instance.get_token_usage
|
|
89
|
+
self.get_queue_status = client_instance.get_queue_status
|
|
90
|
+
|
|
91
|
+
self.watcher = client_instance.watcher
|
|
92
|
+
|
|
93
|
+
def __getattr__(self, name):
|
|
94
|
+
"""Forward attribute access to the underlying client."""
|
|
95
|
+
return getattr(self._client, name)
|
|
96
|
+
|
|
97
|
+
class AsyncV1Proxy:
|
|
98
|
+
"""Type-annotated proxy for v1 client methods."""
|
|
99
|
+
_client: Optional[AsyncV1FirecrawlApp]
|
|
100
|
+
|
|
101
|
+
def __init__(self, client_instance: Optional[AsyncV1FirecrawlApp]):
|
|
102
|
+
self._client = client_instance
|
|
103
|
+
|
|
104
|
+
if client_instance:
|
|
105
|
+
self.scrape_url = client_instance.scrape_url
|
|
106
|
+
self.crawl_url = client_instance.crawl_url
|
|
107
|
+
self.batch_scrape_urls = client_instance.batch_scrape_urls
|
|
108
|
+
self.async_batch_scrape_urls = client_instance.async_batch_scrape_urls
|
|
109
|
+
self.async_crawl_url = client_instance.async_crawl_url
|
|
110
|
+
self.check_crawl_status = client_instance.check_crawl_status
|
|
111
|
+
self.map_url = client_instance.map_url
|
|
112
|
+
self.extract = client_instance.extract
|
|
113
|
+
self.deep_research = client_instance.deep_research
|
|
114
|
+
self.generate_llms_text = client_instance.generate_llms_text
|
|
115
|
+
|
|
116
|
+
class AsyncV2Proxy:
|
|
117
|
+
"""Proxy class that forwards method calls to the appropriate version client."""
|
|
118
|
+
_client: Optional[AsyncFirecrawlClient] = None
|
|
119
|
+
|
|
120
|
+
def __init__(self, client_instance: Optional[AsyncFirecrawlClient] = None):
|
|
121
|
+
self._client = client_instance
|
|
122
|
+
|
|
123
|
+
if client_instance:
|
|
124
|
+
self.scrape = client_instance.scrape
|
|
125
|
+
self.search = client_instance.search
|
|
126
|
+
self.crawl = client_instance.crawl
|
|
127
|
+
self.start_crawl = client_instance.start_crawl
|
|
128
|
+
self.wait_crawl = client_instance.wait_crawl
|
|
129
|
+
self.get_crawl_status = client_instance.get_crawl_status
|
|
130
|
+
self.cancel_crawl = client_instance.cancel_crawl
|
|
131
|
+
self.get_crawl_errors = client_instance.get_crawl_errors
|
|
132
|
+
self.get_active_crawls = client_instance.get_active_crawls
|
|
133
|
+
self.active_crawls = client_instance.active_crawls
|
|
134
|
+
self.crawl_params_preview = client_instance.crawl_params_preview
|
|
135
|
+
|
|
136
|
+
self.extract = client_instance.extract
|
|
137
|
+
self.start_extract = client_instance.start_extract
|
|
138
|
+
self.get_extract_status = client_instance.get_extract_status
|
|
139
|
+
|
|
140
|
+
self.agent = client_instance.agent
|
|
141
|
+
self.start_agent = client_instance.start_agent
|
|
142
|
+
self.get_agent_status = client_instance.get_agent_status
|
|
143
|
+
self.cancel_agent = client_instance.cancel_agent
|
|
144
|
+
|
|
145
|
+
self.start_batch_scrape = client_instance.start_batch_scrape
|
|
146
|
+
self.get_batch_scrape_status = client_instance.get_batch_scrape_status
|
|
147
|
+
self.cancel_batch_scrape = client_instance.cancel_batch_scrape
|
|
148
|
+
self.wait_batch_scrape = client_instance.wait_batch_scrape
|
|
149
|
+
self.batch_scrape = client_instance.batch_scrape
|
|
150
|
+
self.get_batch_scrape_errors = client_instance.get_batch_scrape_errors
|
|
151
|
+
|
|
152
|
+
self.map = client_instance.map
|
|
153
|
+
self.get_concurrency = client_instance.get_concurrency
|
|
154
|
+
self.get_credit_usage = client_instance.get_credit_usage
|
|
155
|
+
self.get_token_usage = client_instance.get_token_usage
|
|
156
|
+
self.get_queue_status = client_instance.get_queue_status
|
|
157
|
+
|
|
158
|
+
self.watcher = client_instance.watcher
|
|
159
|
+
|
|
160
|
+
def __getattr__(self, name):
|
|
161
|
+
"""Forward attribute access to the underlying client."""
|
|
162
|
+
if self._client:
|
|
163
|
+
return getattr(self._client, name)
|
|
164
|
+
raise AttributeError(f"Async v2 client not implemented yet: {name}")
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
class Firecrawl:
|
|
168
|
+
"""
|
|
169
|
+
Unified Firecrawl client (v2 by default, v1 under ``.v1``).
|
|
170
|
+
|
|
171
|
+
Provides a single entrypoint that exposes the latest API directly while
|
|
172
|
+
keeping a feature-frozen v1 available for incremental migration.
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
def __init__(self, api_key: str = None, api_url: str = "https://api.firecrawl.dev"):
|
|
176
|
+
"""Initialize the unified client.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
api_key: Firecrawl API key (or set ``FIRECRAWL_API_KEY``)
|
|
180
|
+
api_url: Base API URL (defaults to production)
|
|
181
|
+
"""
|
|
182
|
+
self.api_key = api_key
|
|
183
|
+
self.api_url = api_url
|
|
184
|
+
|
|
185
|
+
# Initialize version-specific clients
|
|
186
|
+
self._v1_client = V1FirecrawlApp(api_key=api_key, api_url=api_url) if V1FirecrawlApp else None
|
|
187
|
+
self._v2_client = V2FirecrawlClient(api_key=api_key, api_url=api_url) if V2FirecrawlClient else None
|
|
188
|
+
|
|
189
|
+
# Create version-specific proxies
|
|
190
|
+
self.v1 = V1Proxy(self._v1_client) if self._v1_client else None
|
|
191
|
+
self.v2 = V2Proxy(self._v2_client)
|
|
192
|
+
|
|
193
|
+
self.scrape = self._v2_client.scrape
|
|
194
|
+
self.search = self._v2_client.search
|
|
195
|
+
self.map = self._v2_client.map
|
|
196
|
+
|
|
197
|
+
self.crawl = self._v2_client.crawl
|
|
198
|
+
self.start_crawl = self._v2_client.start_crawl
|
|
199
|
+
self.crawl_params_preview = self._v2_client.crawl_params_preview
|
|
200
|
+
self.get_crawl_status = self._v2_client.get_crawl_status
|
|
201
|
+
self.cancel_crawl = self._v2_client.cancel_crawl
|
|
202
|
+
self.get_crawl_errors = self._v2_client.get_crawl_errors
|
|
203
|
+
self.get_active_crawls = self._v2_client.get_active_crawls
|
|
204
|
+
self.active_crawls = self._v2_client.active_crawls
|
|
205
|
+
|
|
206
|
+
self.start_batch_scrape = self._v2_client.start_batch_scrape
|
|
207
|
+
self.get_batch_scrape_status = self._v2_client.get_batch_scrape_status
|
|
208
|
+
self.cancel_batch_scrape = self._v2_client.cancel_batch_scrape
|
|
209
|
+
self.batch_scrape = self._v2_client.batch_scrape
|
|
210
|
+
self.get_batch_scrape_errors = self._v2_client.get_batch_scrape_errors
|
|
211
|
+
|
|
212
|
+
self.start_extract = self._v2_client.start_extract
|
|
213
|
+
self.get_extract_status = self._v2_client.get_extract_status
|
|
214
|
+
self.extract = self._v2_client.extract
|
|
215
|
+
|
|
216
|
+
self.start_agent = self._v2_client.start_agent
|
|
217
|
+
self.get_agent_status = self._v2_client.get_agent_status
|
|
218
|
+
self.cancel_agent = self._v2_client.cancel_agent
|
|
219
|
+
self.agent = self._v2_client.agent
|
|
220
|
+
|
|
221
|
+
self.get_concurrency = self._v2_client.get_concurrency
|
|
222
|
+
self.get_credit_usage = self._v2_client.get_credit_usage
|
|
223
|
+
self.get_token_usage = self._v2_client.get_token_usage
|
|
224
|
+
self.get_queue_status = self._v2_client.get_queue_status
|
|
225
|
+
|
|
226
|
+
self.watcher = self._v2_client.watcher
|
|
227
|
+
|
|
228
|
+
class AsyncFirecrawl:
|
|
229
|
+
"""Async unified Firecrawl client (v2 by default, v1 under ``.v1``)."""
|
|
230
|
+
|
|
231
|
+
def __init__(self, api_key: str = None, api_url: str = "https://api.firecrawl.dev"):
|
|
232
|
+
self.api_key = api_key
|
|
233
|
+
self.api_url = api_url
|
|
234
|
+
|
|
235
|
+
# Initialize version-specific clients
|
|
236
|
+
self._v1_client = AsyncV1FirecrawlApp(api_key=api_key, api_url=api_url) if AsyncV1FirecrawlApp else None
|
|
237
|
+
self._v2_client = AsyncFirecrawlClient(api_key=api_key, api_url=api_url) if AsyncFirecrawlClient else None
|
|
238
|
+
|
|
239
|
+
# Create version-specific proxies
|
|
240
|
+
self.v1 = AsyncV1Proxy(self._v1_client) if self._v1_client else None
|
|
241
|
+
self.v2 = AsyncV2Proxy(self._v2_client)
|
|
242
|
+
|
|
243
|
+
# Expose v2 async surface directly on the top-level client for ergonomic access
|
|
244
|
+
# Keep method names aligned with the sync client
|
|
245
|
+
self.scrape = self._v2_client.scrape
|
|
246
|
+
self.search = self._v2_client.search
|
|
247
|
+
self.map = self._v2_client.map
|
|
248
|
+
|
|
249
|
+
self.start_crawl = self._v2_client.start_crawl
|
|
250
|
+
self.get_crawl_status = self._v2_client.get_crawl_status
|
|
251
|
+
self.cancel_crawl = self._v2_client.cancel_crawl
|
|
252
|
+
self.crawl = self._v2_client.crawl
|
|
253
|
+
self.get_crawl_errors = self._v2_client.get_crawl_errors
|
|
254
|
+
self.active_crawls = self._v2_client.active_crawls
|
|
255
|
+
self.crawl_params_preview = self._v2_client.crawl_params_preview
|
|
256
|
+
|
|
257
|
+
self.start_batch_scrape = self._v2_client.start_batch_scrape
|
|
258
|
+
self.get_batch_scrape_status = self._v2_client.get_batch_scrape_status
|
|
259
|
+
self.cancel_batch_scrape = self._v2_client.cancel_batch_scrape
|
|
260
|
+
self.batch_scrape = self._v2_client.batch_scrape
|
|
261
|
+
self.get_batch_scrape_errors = self._v2_client.get_batch_scrape_errors
|
|
262
|
+
|
|
263
|
+
self.start_extract = self._v2_client.start_extract
|
|
264
|
+
self.get_extract_status = self._v2_client.get_extract_status
|
|
265
|
+
self.extract = self._v2_client.extract
|
|
266
|
+
|
|
267
|
+
self.start_agent = self._v2_client.start_agent
|
|
268
|
+
self.get_agent_status = self._v2_client.get_agent_status
|
|
269
|
+
self.cancel_agent = self._v2_client.cancel_agent
|
|
270
|
+
self.agent = self._v2_client.agent
|
|
271
|
+
|
|
272
|
+
self.get_concurrency = self._v2_client.get_concurrency
|
|
273
|
+
self.get_credit_usage = self._v2_client.get_credit_usage
|
|
274
|
+
self.get_token_usage = self._v2_client.get_token_usage
|
|
275
|
+
self.get_queue_status = self._v2_client.get_queue_status
|
|
276
|
+
|
|
277
|
+
self.watcher = self._v2_client.watcher
|
|
278
|
+
|
|
279
|
+
# Export Firecrawl as an alias for FirecrawlApp
|
|
280
|
+
FirecrawlApp = Firecrawl
|
|
281
|
+
AsyncFirecrawlApp = AsyncFirecrawl
|