firecrawl 2.16.5__py3-none-any.whl → 3.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of firecrawl might be problematic. Click here for more details.
- firecrawl/__init__.py +27 -19
- firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +79 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +189 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +38 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +40 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +137 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +183 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +35 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +43 -0
- firecrawl/__tests__/e2e/v2/conftest.py +73 -0
- firecrawl/__tests__/e2e/v2/test_async.py +73 -0
- firecrawl/__tests__/e2e/v2/test_batch_scrape.py +105 -0
- firecrawl/__tests__/e2e/v2/test_crawl.py +276 -0
- firecrawl/__tests__/e2e/v2/test_extract.py +54 -0
- firecrawl/__tests__/e2e/v2/test_map.py +60 -0
- firecrawl/__tests__/e2e/v2/test_scrape.py +154 -0
- firecrawl/__tests__/e2e/v2/test_search.py +265 -0
- firecrawl/__tests__/e2e/v2/test_usage.py +26 -0
- firecrawl/__tests__/e2e/v2/test_watcher.py +65 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +12 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +61 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +12 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +19 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +50 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +63 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +28 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +117 -0
- firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +90 -0
- firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +70 -0
- firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +240 -0
- firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +107 -0
- firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +53 -0
- firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +92 -0
- firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +167 -0
- firecrawl/__tests__/unit/v2/methods/test_search_validation.py +206 -0
- firecrawl/__tests__/unit/v2/methods/test_usage_types.py +18 -0
- firecrawl/__tests__/unit/v2/methods/test_webhook.py +123 -0
- firecrawl/__tests__/unit/v2/utils/test_validation.py +290 -0
- firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +332 -0
- firecrawl/client.py +241 -0
- firecrawl/{firecrawl.py → firecrawl.backup.py} +17 -15
- firecrawl/types.py +157 -0
- firecrawl/v1/__init__.py +14 -0
- firecrawl/v1/client.py +4653 -0
- firecrawl/v2/__init__.py +4 -0
- firecrawl/v2/client.py +802 -0
- firecrawl/v2/client_async.py +250 -0
- firecrawl/v2/methods/aio/__init__.py +1 -0
- firecrawl/v2/methods/aio/batch.py +85 -0
- firecrawl/v2/methods/aio/crawl.py +174 -0
- firecrawl/v2/methods/aio/extract.py +126 -0
- firecrawl/v2/methods/aio/map.py +59 -0
- firecrawl/v2/methods/aio/scrape.py +36 -0
- firecrawl/v2/methods/aio/search.py +58 -0
- firecrawl/v2/methods/aio/usage.py +42 -0
- firecrawl/v2/methods/batch.py +420 -0
- firecrawl/v2/methods/crawl.py +468 -0
- firecrawl/v2/methods/extract.py +131 -0
- firecrawl/v2/methods/map.py +77 -0
- firecrawl/v2/methods/scrape.py +68 -0
- firecrawl/v2/methods/search.py +173 -0
- firecrawl/v2/methods/usage.py +41 -0
- firecrawl/v2/types.py +546 -0
- firecrawl/v2/utils/__init__.py +9 -0
- firecrawl/v2/utils/error_handler.py +107 -0
- firecrawl/v2/utils/get_version.py +15 -0
- firecrawl/v2/utils/http_client.py +153 -0
- firecrawl/v2/utils/http_client_async.py +64 -0
- firecrawl/v2/utils/validation.py +324 -0
- firecrawl/v2/watcher.py +312 -0
- firecrawl/v2/watcher_async.py +245 -0
- {firecrawl-2.16.5.dist-info → firecrawl-3.0.3.dist-info}/LICENSE +0 -0
- {firecrawl-2.16.5.dist-info → firecrawl-3.0.3.dist-info}/METADATA +49 -32
- firecrawl-3.0.3.dist-info/RECORD +78 -0
- tests/test_timeout_conversion.py +117 -0
- firecrawl/__tests__/e2e_withAuth/__init__.py +0 -0
- firecrawl/__tests__/e2e_withAuth/test.py +0 -170
- firecrawl/__tests__/v1/e2e_withAuth/__init__.py +0 -0
- firecrawl/__tests__/v1/e2e_withAuth/test.py +0 -465
- firecrawl-2.16.5.dist-info/RECORD +0 -12
- {firecrawl-2.16.5.dist-info → firecrawl-3.0.3.dist-info}/WHEEL +0 -0
- {firecrawl-2.16.5.dist-info → firecrawl-3.0.3.dist-info}/top_level.txt +0 -0
firecrawl/client.py
ADDED
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Firecrawl Client
|
|
3
|
+
|
|
4
|
+
A Firecrawl client that enables you to scrape content from websites, crawl entire sites, search the web, and extract structured data using AI.
|
|
5
|
+
|
|
6
|
+
The client supports both v1 and v2 API versions, providing access to features like:
|
|
7
|
+
- Web scraping with advanced options (screenshots, markdown conversion, etc.)
|
|
8
|
+
- Site crawling with configurable depth and limits
|
|
9
|
+
- Web search with content extraction
|
|
10
|
+
- Structured data extraction using AI models
|
|
11
|
+
- Deep research capabilities
|
|
12
|
+
|
|
13
|
+
Usage:
|
|
14
|
+
from firecrawl import Firecrawl
|
|
15
|
+
firecrawl = Firecrawl(api_key="your-api-key")
|
|
16
|
+
result = firecrawl.scrape("https://example.com")
|
|
17
|
+
|
|
18
|
+
Check example.py for other usage examples.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from typing import Any, Dict, Optional, List, Union
|
|
22
|
+
import logging
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
from .v1 import V1FirecrawlApp, AsyncV1FirecrawlApp
|
|
26
|
+
from .v2 import FirecrawlClient as V2FirecrawlClient
|
|
27
|
+
from .v2.client_async import AsyncFirecrawlClient
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger("firecrawl")
|
|
30
|
+
|
|
31
|
+
class V1Proxy:
|
|
32
|
+
"""Type-annotated proxy for v1 client methods."""
|
|
33
|
+
_client: Optional[V1FirecrawlApp]
|
|
34
|
+
|
|
35
|
+
def __init__(self, client_instance: Optional[V1FirecrawlApp]):
|
|
36
|
+
self._client = client_instance
|
|
37
|
+
|
|
38
|
+
if client_instance:
|
|
39
|
+
self.scrape_url = client_instance.scrape_url
|
|
40
|
+
self.crawl_url = client_instance.crawl_url
|
|
41
|
+
self.batch_scrape_urls = client_instance.batch_scrape_urls
|
|
42
|
+
self.async_batch_scrape_urls = client_instance.async_batch_scrape_urls
|
|
43
|
+
self.async_crawl_url = client_instance.async_crawl_url
|
|
44
|
+
self.check_crawl_status = client_instance.check_crawl_status
|
|
45
|
+
self.map_url = client_instance.map_url
|
|
46
|
+
self.extract = client_instance.extract
|
|
47
|
+
self.deep_research = client_instance.deep_research
|
|
48
|
+
self.generate_llms_text = client_instance.generate_llms_text
|
|
49
|
+
|
|
50
|
+
class V2Proxy:
|
|
51
|
+
"""Proxy class that forwards method calls to the appropriate version client."""
|
|
52
|
+
_client: Optional[V2FirecrawlClient]
|
|
53
|
+
|
|
54
|
+
def __init__(self, client_instance: Optional[V2FirecrawlClient]):
|
|
55
|
+
self._client = client_instance
|
|
56
|
+
|
|
57
|
+
if client_instance:
|
|
58
|
+
# self.scrape = client_instance.scrape
|
|
59
|
+
self.search = client_instance.search
|
|
60
|
+
self.crawl = client_instance.crawl
|
|
61
|
+
self.get_crawl_status = client_instance.get_crawl_status
|
|
62
|
+
self.cancel_crawl = client_instance.cancel_crawl
|
|
63
|
+
self.start_crawl = client_instance.start_crawl
|
|
64
|
+
self.crawl_params_preview = client_instance.crawl_params_preview
|
|
65
|
+
self.extract = client_instance.extract
|
|
66
|
+
self.start_batch_scrape = client_instance.start_batch_scrape
|
|
67
|
+
self.get_batch_scrape_status = client_instance.get_batch_scrape_status
|
|
68
|
+
self.cancel_batch_scrape = client_instance.cancel_batch_scrape
|
|
69
|
+
self.batch_scrape = client_instance.batch_scrape
|
|
70
|
+
self.get_batch_scrape_errors = client_instance.get_batch_scrape_errors
|
|
71
|
+
self.get_extract_status = client_instance.get_extract_status
|
|
72
|
+
self.map = client_instance.map
|
|
73
|
+
self.get_concurrency = client_instance.get_concurrency
|
|
74
|
+
self.get_credit_usage = client_instance.get_credit_usage
|
|
75
|
+
self.get_token_usage = client_instance.get_token_usage
|
|
76
|
+
|
|
77
|
+
def __getattr__(self, name):
|
|
78
|
+
"""Forward attribute access to the underlying client."""
|
|
79
|
+
return getattr(self._client, name)
|
|
80
|
+
|
|
81
|
+
class AsyncV1Proxy:
|
|
82
|
+
"""Type-annotated proxy for v1 client methods."""
|
|
83
|
+
_client: Optional[AsyncV1FirecrawlApp]
|
|
84
|
+
|
|
85
|
+
def __init__(self, client_instance: Optional[AsyncV1FirecrawlApp]):
|
|
86
|
+
self._client = client_instance
|
|
87
|
+
|
|
88
|
+
if client_instance:
|
|
89
|
+
self.scrape_url = client_instance.scrape_url
|
|
90
|
+
self.crawl_url = client_instance.crawl_url
|
|
91
|
+
self.batch_scrape_urls = client_instance.batch_scrape_urls
|
|
92
|
+
self.async_batch_scrape_urls = client_instance.async_batch_scrape_urls
|
|
93
|
+
self.async_crawl_url = client_instance.async_crawl_url
|
|
94
|
+
self.check_crawl_status = client_instance.check_crawl_status
|
|
95
|
+
self.map_url = client_instance.map_url
|
|
96
|
+
self.extract = client_instance.extract
|
|
97
|
+
self.deep_research = client_instance.deep_research
|
|
98
|
+
self.generate_llms_text = client_instance.generate_llms_text
|
|
99
|
+
|
|
100
|
+
class AsyncV2Proxy:
|
|
101
|
+
"""Proxy class that forwards method calls to the appropriate version client."""
|
|
102
|
+
_client: Optional[Any] = None
|
|
103
|
+
|
|
104
|
+
def __init__(self, client_instance: Optional[Any] = None):
|
|
105
|
+
self._client = client_instance
|
|
106
|
+
|
|
107
|
+
if client_instance:
|
|
108
|
+
self.scrape = client_instance.scrape
|
|
109
|
+
self.search = client_instance.search
|
|
110
|
+
self.crawl = client_instance.crawl
|
|
111
|
+
self.start_crawl = client_instance.start_crawl
|
|
112
|
+
self.wait_crawl = client_instance.wait_crawl
|
|
113
|
+
self.get_crawl_status = client_instance.get_crawl_status
|
|
114
|
+
self.cancel_crawl = client_instance.cancel_crawl
|
|
115
|
+
self.get_crawl_errors = client_instance.get_crawl_errors
|
|
116
|
+
self.get_active_crawls = client_instance.get_active_crawls
|
|
117
|
+
self.active_crawls = client_instance.active_crawls
|
|
118
|
+
self.crawl_params_preview = client_instance.crawl_params_preview
|
|
119
|
+
|
|
120
|
+
self.extract = client_instance.extract
|
|
121
|
+
self.start_extract = client_instance.start_extract
|
|
122
|
+
self.get_extract_status = client_instance.get_extract_status
|
|
123
|
+
|
|
124
|
+
self.start_batch_scrape = client_instance.start_batch_scrape
|
|
125
|
+
self.get_batch_scrape_status = client_instance.get_batch_scrape_status
|
|
126
|
+
self.cancel_batch_scrape = client_instance.cancel_batch_scrape
|
|
127
|
+
self.wait_batch_scrape = client_instance.wait_batch_scrape
|
|
128
|
+
self.batch_scrape = client_instance.batch_scrape
|
|
129
|
+
self.get_batch_scrape_errors = client_instance.get_batch_scrape_errors
|
|
130
|
+
|
|
131
|
+
self.map = client_instance.map
|
|
132
|
+
self.get_concurrency = client_instance.get_concurrency
|
|
133
|
+
self.get_credit_usage = client_instance.get_credit_usage
|
|
134
|
+
self.get_token_usage = client_instance.get_token_usage
|
|
135
|
+
self.watcher = client_instance.watcher
|
|
136
|
+
|
|
137
|
+
def __getattr__(self, name):
|
|
138
|
+
"""Forward attribute access to the underlying client."""
|
|
139
|
+
if self._client:
|
|
140
|
+
return getattr(self._client, name)
|
|
141
|
+
raise AttributeError(f"Async v2 client not implemented yet: {name}")
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class Firecrawl:
|
|
145
|
+
"""
|
|
146
|
+
Unified Firecrawl client (v2 by default, v1 under ``.v1``).
|
|
147
|
+
|
|
148
|
+
Provides a single entrypoint that exposes the latest API directly while
|
|
149
|
+
keeping a feature-frozen v1 available for incremental migration.
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
def __init__(self, api_key: str = None, api_url: str = "https://api.firecrawl.dev"):
|
|
153
|
+
"""Initialize the unified client.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
api_key: Firecrawl API key (or set ``FIRECRAWL_API_KEY``)
|
|
157
|
+
api_url: Base API URL (defaults to production)
|
|
158
|
+
"""
|
|
159
|
+
self.api_key = api_key
|
|
160
|
+
self.api_url = api_url
|
|
161
|
+
|
|
162
|
+
# Initialize version-specific clients
|
|
163
|
+
self._v1_client = V1FirecrawlApp(api_key=api_key, api_url=api_url) if V1FirecrawlApp else None
|
|
164
|
+
self._v2_client = V2FirecrawlClient(api_key=api_key, api_url=api_url) if V2FirecrawlClient else None
|
|
165
|
+
|
|
166
|
+
# Create version-specific proxies
|
|
167
|
+
self.v1 = V1Proxy(self._v1_client) if self._v1_client else None
|
|
168
|
+
self.v2 = V2Proxy(self._v2_client)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
self.scrape = self._v2_client.scrape
|
|
172
|
+
self.crawl = self._v2_client.crawl
|
|
173
|
+
self.start_crawl = self._v2_client.start_crawl
|
|
174
|
+
self.crawl_params_preview = self._v2_client.crawl_params_preview
|
|
175
|
+
self.get_crawl_status = self._v2_client.get_crawl_status
|
|
176
|
+
self.cancel_crawl = self._v2_client.cancel_crawl
|
|
177
|
+
self.get_crawl_errors = self._v2_client.get_crawl_errors
|
|
178
|
+
self.active_crawls = self._v2_client.active_crawls
|
|
179
|
+
|
|
180
|
+
self.start_batch_scrape = self._v2_client.start_batch_scrape
|
|
181
|
+
self.get_batch_scrape_status = self._v2_client.get_batch_scrape_status
|
|
182
|
+
self.cancel_batch_scrape = self._v2_client.cancel_batch_scrape
|
|
183
|
+
self.batch_scrape = self._v2_client.batch_scrape
|
|
184
|
+
self.get_batch_scrape_errors = self._v2_client.get_batch_scrape_errors
|
|
185
|
+
self.get_extract_status = self._v2_client.get_extract_status
|
|
186
|
+
self.map = self._v2_client.map
|
|
187
|
+
self.search = self._v2_client.search
|
|
188
|
+
self.extract = self._v2_client.extract
|
|
189
|
+
self.get_concurrency = self._v2_client.get_concurrency
|
|
190
|
+
self.get_credit_usage = self._v2_client.get_credit_usage
|
|
191
|
+
self.get_token_usage = self._v2_client.get_token_usage
|
|
192
|
+
self.watcher = self._v2_client.watcher
|
|
193
|
+
|
|
194
|
+
class AsyncFirecrawl:
|
|
195
|
+
"""Async unified Firecrawl client (v2 by default, v1 under ``.v1``)."""
|
|
196
|
+
|
|
197
|
+
def __init__(self, api_key: str = None, api_url: str = "https://api.firecrawl.dev"):
|
|
198
|
+
self.api_key = api_key
|
|
199
|
+
self.api_url = api_url
|
|
200
|
+
|
|
201
|
+
# Initialize version-specific clients
|
|
202
|
+
self._v1_client = AsyncV1FirecrawlApp(api_key=api_key, api_url=api_url) if AsyncV1FirecrawlApp else None
|
|
203
|
+
self._v2_client = AsyncFirecrawlClient(api_key=api_key, api_url=api_url) if AsyncFirecrawlClient else None
|
|
204
|
+
|
|
205
|
+
# Create version-specific proxies
|
|
206
|
+
self.v1 = AsyncV1Proxy(self._v1_client) if self._v1_client else None
|
|
207
|
+
self.v2 = AsyncV2Proxy(self._v2_client)
|
|
208
|
+
|
|
209
|
+
# Expose v2 async surface directly on the top-level client for ergonomic access
|
|
210
|
+
# Keep method names aligned with the sync client
|
|
211
|
+
self.scrape = self._v2_client.scrape
|
|
212
|
+
self.search = self._v2_client.search
|
|
213
|
+
self.map = self._v2_client.map
|
|
214
|
+
|
|
215
|
+
self.start_crawl = self._v2_client.start_crawl
|
|
216
|
+
self.get_crawl_status = self._v2_client.get_crawl_status
|
|
217
|
+
self.cancel_crawl = self._v2_client.cancel_crawl
|
|
218
|
+
self.crawl = self._v2_client.crawl
|
|
219
|
+
self.get_crawl_errors = self._v2_client.get_crawl_errors
|
|
220
|
+
self.active_crawls = self._v2_client.active_crawls
|
|
221
|
+
self.crawl_params_preview = self._v2_client.crawl_params_preview
|
|
222
|
+
|
|
223
|
+
self.start_batch_scrape = self._v2_client.start_batch_scrape
|
|
224
|
+
self.get_batch_scrape_status = self._v2_client.get_batch_scrape_status
|
|
225
|
+
self.cancel_batch_scrape = self._v2_client.cancel_batch_scrape
|
|
226
|
+
self.batch_scrape = self._v2_client.batch_scrape
|
|
227
|
+
self.get_batch_scrape_errors = self._v2_client.get_batch_scrape_errors
|
|
228
|
+
|
|
229
|
+
self.start_extract = self._v2_client.start_extract
|
|
230
|
+
self.get_extract_status = self._v2_client.get_extract_status
|
|
231
|
+
self.extract = self._v2_client.extract
|
|
232
|
+
|
|
233
|
+
self.get_concurrency = self._v2_client.get_concurrency
|
|
234
|
+
self.get_credit_usage = self._v2_client.get_credit_usage
|
|
235
|
+
self.get_token_usage = self._v2_client.get_token_usage
|
|
236
|
+
|
|
237
|
+
self.watcher = self._v2_client.watcher
|
|
238
|
+
|
|
239
|
+
# Export Firecrawl as an alias for FirecrawlApp
|
|
240
|
+
FirecrawlApp = Firecrawl
|
|
241
|
+
AsyncFirecrawlApp = AsyncFirecrawl
|
|
@@ -145,7 +145,7 @@ class ScrapeOptions(pydantic.BaseModel):
|
|
|
145
145
|
excludeTags: Optional[List[str]] = None
|
|
146
146
|
onlyMainContent: Optional[bool] = None
|
|
147
147
|
waitFor: Optional[int] = None
|
|
148
|
-
timeout: Optional[int] =
|
|
148
|
+
timeout: Optional[int] = 30000
|
|
149
149
|
location: Optional[LocationConfig] = None
|
|
150
150
|
mobile: Optional[bool] = None
|
|
151
151
|
skipTlsVerification: Optional[bool] = None
|
|
@@ -258,6 +258,7 @@ class CrawlParams(pydantic.BaseModel):
|
|
|
258
258
|
maxDiscoveryDepth: Optional[int] = None
|
|
259
259
|
limit: Optional[int] = None
|
|
260
260
|
allowBackwardLinks: Optional[bool] = None
|
|
261
|
+
crawlEntireDomain: Optional[bool] = None
|
|
261
262
|
allowExternalLinks: Optional[bool] = None
|
|
262
263
|
ignoreSitemap: Optional[bool] = None
|
|
263
264
|
scrapeOptions: Optional[ScrapeOptions] = None
|
|
@@ -299,7 +300,7 @@ class MapParams(pydantic.BaseModel):
|
|
|
299
300
|
includeSubdomains: Optional[bool] = None
|
|
300
301
|
sitemapOnly: Optional[bool] = None
|
|
301
302
|
limit: Optional[int] = None
|
|
302
|
-
timeout: Optional[int] =
|
|
303
|
+
timeout: Optional[int] = 30000
|
|
303
304
|
useIndex: Optional[bool] = None
|
|
304
305
|
|
|
305
306
|
class MapResponse(pydantic.BaseModel):
|
|
@@ -463,7 +464,7 @@ class FirecrawlApp:
|
|
|
463
464
|
exclude_tags: Optional[List[str]] = None,
|
|
464
465
|
only_main_content: Optional[bool] = None,
|
|
465
466
|
wait_for: Optional[int] = None,
|
|
466
|
-
timeout: Optional[int] =
|
|
467
|
+
timeout: Optional[int] = 30000,
|
|
467
468
|
location: Optional[LocationConfig] = None,
|
|
468
469
|
mobile: Optional[bool] = None,
|
|
469
470
|
skip_tls_verification: Optional[bool] = None,
|
|
@@ -591,7 +592,7 @@ class FirecrawlApp:
|
|
|
591
592
|
f'{self.api_url}/v1/scrape',
|
|
592
593
|
headers=_headers,
|
|
593
594
|
json=scrape_params,
|
|
594
|
-
timeout=(timeout +
|
|
595
|
+
timeout=(timeout / 1000.0 + 5 if timeout is not None else None)
|
|
595
596
|
)
|
|
596
597
|
|
|
597
598
|
if response.status_code == 200:
|
|
@@ -618,7 +619,7 @@ class FirecrawlApp:
|
|
|
618
619
|
lang: Optional[str] = None,
|
|
619
620
|
country: Optional[str] = None,
|
|
620
621
|
location: Optional[str] = None,
|
|
621
|
-
timeout: Optional[int] =
|
|
622
|
+
timeout: Optional[int] = 30000,
|
|
622
623
|
scrape_options: Optional[ScrapeOptions] = None,
|
|
623
624
|
**kwargs) -> SearchResponse:
|
|
624
625
|
"""
|
|
@@ -1159,6 +1160,7 @@ class FirecrawlApp:
|
|
|
1159
1160
|
max_discovery_depth=max_discovery_depth,
|
|
1160
1161
|
limit=limit,
|
|
1161
1162
|
allow_backward_links=allow_backward_links,
|
|
1163
|
+
crawl_entire_domain=crawl_entire_domain,
|
|
1162
1164
|
allow_external_links=allow_external_links,
|
|
1163
1165
|
ignore_sitemap=ignore_sitemap,
|
|
1164
1166
|
scrape_options=scrape_options,
|
|
@@ -1187,7 +1189,7 @@ class FirecrawlApp:
|
|
|
1187
1189
|
include_subdomains: Optional[bool] = None,
|
|
1188
1190
|
sitemap_only: Optional[bool] = None,
|
|
1189
1191
|
limit: Optional[int] = None,
|
|
1190
|
-
timeout: Optional[int] =
|
|
1192
|
+
timeout: Optional[int] = 30000,
|
|
1191
1193
|
use_index: Optional[bool] = None,
|
|
1192
1194
|
**kwargs) -> MapResponse:
|
|
1193
1195
|
"""
|
|
@@ -1278,7 +1280,7 @@ class FirecrawlApp:
|
|
|
1278
1280
|
exclude_tags: Optional[List[str]] = None,
|
|
1279
1281
|
only_main_content: Optional[bool] = None,
|
|
1280
1282
|
wait_for: Optional[int] = None,
|
|
1281
|
-
timeout: Optional[int] =
|
|
1283
|
+
timeout: Optional[int] = 30000,
|
|
1282
1284
|
location: Optional[LocationConfig] = None,
|
|
1283
1285
|
mobile: Optional[bool] = None,
|
|
1284
1286
|
skip_tls_verification: Optional[bool] = None,
|
|
@@ -1419,7 +1421,7 @@ class FirecrawlApp:
|
|
|
1419
1421
|
exclude_tags: Optional[List[str]] = None,
|
|
1420
1422
|
only_main_content: Optional[bool] = None,
|
|
1421
1423
|
wait_for: Optional[int] = None,
|
|
1422
|
-
timeout: Optional[int] =
|
|
1424
|
+
timeout: Optional[int] = 30000,
|
|
1423
1425
|
location: Optional[LocationConfig] = None,
|
|
1424
1426
|
mobile: Optional[bool] = None,
|
|
1425
1427
|
skip_tls_verification: Optional[bool] = None,
|
|
@@ -1559,7 +1561,7 @@ class FirecrawlApp:
|
|
|
1559
1561
|
exclude_tags: Optional[List[str]] = None,
|
|
1560
1562
|
only_main_content: Optional[bool] = None,
|
|
1561
1563
|
wait_for: Optional[int] = None,
|
|
1562
|
-
timeout: Optional[int] =
|
|
1564
|
+
timeout: Optional[int] = 30000,
|
|
1563
1565
|
location: Optional[LocationConfig] = None,
|
|
1564
1566
|
mobile: Optional[bool] = None,
|
|
1565
1567
|
skip_tls_verification: Optional[bool] = None,
|
|
@@ -2202,7 +2204,7 @@ class FirecrawlApp:
|
|
|
2202
2204
|
requests.RequestException: If the request fails after the specified retries.
|
|
2203
2205
|
"""
|
|
2204
2206
|
for attempt in range(retries):
|
|
2205
|
-
response = requests.post(url, headers=headers, json=data, timeout=((data["timeout"] +
|
|
2207
|
+
response = requests.post(url, headers=headers, json=data, timeout=((data["timeout"] / 1000.0 + 5) if "timeout" in data and data["timeout"] is not None else None))
|
|
2206
2208
|
if response.status_code == 502:
|
|
2207
2209
|
time.sleep(backoff_factor * (2 ** attempt))
|
|
2208
2210
|
else:
|
|
@@ -2985,7 +2987,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
2985
2987
|
exclude_tags: Optional[List[str]] = None,
|
|
2986
2988
|
only_main_content: Optional[bool] = None,
|
|
2987
2989
|
wait_for: Optional[int] = None,
|
|
2988
|
-
timeout: Optional[int] =
|
|
2990
|
+
timeout: Optional[int] = 30000,
|
|
2989
2991
|
location: Optional[LocationConfig] = None,
|
|
2990
2992
|
mobile: Optional[bool] = None,
|
|
2991
2993
|
skip_tls_verification: Optional[bool] = None,
|
|
@@ -3123,7 +3125,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3123
3125
|
exclude_tags: Optional[List[str]] = None,
|
|
3124
3126
|
only_main_content: Optional[bool] = None,
|
|
3125
3127
|
wait_for: Optional[int] = None,
|
|
3126
|
-
timeout: Optional[int] =
|
|
3128
|
+
timeout: Optional[int] = 30000,
|
|
3127
3129
|
location: Optional[LocationConfig] = None,
|
|
3128
3130
|
mobile: Optional[bool] = None,
|
|
3129
3131
|
skip_tls_verification: Optional[bool] = None,
|
|
@@ -3262,7 +3264,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3262
3264
|
exclude_tags: Optional[List[str]] = None,
|
|
3263
3265
|
only_main_content: Optional[bool] = None,
|
|
3264
3266
|
wait_for: Optional[int] = None,
|
|
3265
|
-
timeout: Optional[int] =
|
|
3267
|
+
timeout: Optional[int] = 30000,
|
|
3266
3268
|
location: Optional[LocationConfig] = None,
|
|
3267
3269
|
mobile: Optional[bool] = None,
|
|
3268
3270
|
skip_tls_verification: Optional[bool] = None,
|
|
@@ -3747,7 +3749,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3747
3749
|
include_subdomains: Optional[bool] = None,
|
|
3748
3750
|
sitemap_only: Optional[bool] = None,
|
|
3749
3751
|
limit: Optional[int] = None,
|
|
3750
|
-
timeout: Optional[int] =
|
|
3752
|
+
timeout: Optional[int] = 30000,
|
|
3751
3753
|
params: Optional[MapParams] = None) -> MapResponse:
|
|
3752
3754
|
"""
|
|
3753
3755
|
Asynchronously map and discover links from a URL.
|
|
@@ -4481,7 +4483,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
4481
4483
|
lang: Optional[str] = None,
|
|
4482
4484
|
country: Optional[str] = None,
|
|
4483
4485
|
location: Optional[str] = None,
|
|
4484
|
-
timeout: Optional[int] =
|
|
4486
|
+
timeout: Optional[int] = 30000,
|
|
4485
4487
|
scrape_options: Optional[ScrapeOptions] = None,
|
|
4486
4488
|
params: Optional[Union[Dict[str, Any], SearchParams]] = None,
|
|
4487
4489
|
**kwargs) -> SearchResponse:
|
firecrawl/types.py
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Unified Firecrawl Types
|
|
3
|
+
|
|
4
|
+
This module provides unified access to Firecrawl types across all API versions.
|
|
5
|
+
Currently exports v2 types as the primary interface.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .v2.types import (
|
|
9
|
+
# Base types
|
|
10
|
+
BaseResponse,
|
|
11
|
+
|
|
12
|
+
# Document types
|
|
13
|
+
Document,
|
|
14
|
+
DocumentMetadata,
|
|
15
|
+
|
|
16
|
+
# Scrape types
|
|
17
|
+
ScrapeFormats,
|
|
18
|
+
ScrapeOptions,
|
|
19
|
+
ScrapeRequest,
|
|
20
|
+
ScrapeData,
|
|
21
|
+
ScrapeResponse,
|
|
22
|
+
|
|
23
|
+
# Crawl types
|
|
24
|
+
CrawlRequest,
|
|
25
|
+
CrawlJob,
|
|
26
|
+
CrawlResponse,
|
|
27
|
+
CrawlParamsRequest,
|
|
28
|
+
CrawlParamsData,
|
|
29
|
+
CrawlParamsResponse,
|
|
30
|
+
CrawlErrorsResponse,
|
|
31
|
+
ActiveCrawlsResponse,
|
|
32
|
+
|
|
33
|
+
# Batch scrape types
|
|
34
|
+
BatchScrapeRequest,
|
|
35
|
+
BatchScrapeJob,
|
|
36
|
+
BatchScrapeResponse,
|
|
37
|
+
|
|
38
|
+
# Map types
|
|
39
|
+
MapOptions,
|
|
40
|
+
MapRequest,
|
|
41
|
+
MapData,
|
|
42
|
+
MapResponse,
|
|
43
|
+
|
|
44
|
+
# Search types
|
|
45
|
+
Source,
|
|
46
|
+
SourceOption,
|
|
47
|
+
Format,
|
|
48
|
+
JsonFormat,
|
|
49
|
+
FormatOption,
|
|
50
|
+
SearchRequest,
|
|
51
|
+
SearchResult,
|
|
52
|
+
SearchData,
|
|
53
|
+
SearchResponse,
|
|
54
|
+
|
|
55
|
+
# Action types
|
|
56
|
+
WaitAction,
|
|
57
|
+
ScreenshotAction,
|
|
58
|
+
ClickAction,
|
|
59
|
+
WriteAction,
|
|
60
|
+
PressAction,
|
|
61
|
+
ScrollAction,
|
|
62
|
+
ScrapeAction,
|
|
63
|
+
ExecuteJavascriptAction,
|
|
64
|
+
PDFAction,
|
|
65
|
+
|
|
66
|
+
# Location and format types
|
|
67
|
+
Location,
|
|
68
|
+
|
|
69
|
+
# Error types
|
|
70
|
+
ErrorDetails,
|
|
71
|
+
ErrorResponse,
|
|
72
|
+
|
|
73
|
+
# Job management types
|
|
74
|
+
JobStatus,
|
|
75
|
+
|
|
76
|
+
# Webhook types
|
|
77
|
+
WebhookData,
|
|
78
|
+
|
|
79
|
+
# Configuration types
|
|
80
|
+
ClientConfig,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
__all__ = [
|
|
84
|
+
# Base types
|
|
85
|
+
'BaseResponse',
|
|
86
|
+
|
|
87
|
+
# Document types
|
|
88
|
+
'Document',
|
|
89
|
+
'DocumentMetadata',
|
|
90
|
+
|
|
91
|
+
# Scrape types
|
|
92
|
+
'ScrapeFormats',
|
|
93
|
+
'ScrapeOptions',
|
|
94
|
+
'ScrapeRequest',
|
|
95
|
+
'ScrapeData',
|
|
96
|
+
'ScrapeResponse',
|
|
97
|
+
|
|
98
|
+
# Crawl types
|
|
99
|
+
'CrawlRequest',
|
|
100
|
+
'CrawlJob',
|
|
101
|
+
'CrawlJobData',
|
|
102
|
+
'CrawlResponse',
|
|
103
|
+
'CrawlParamsRequest',
|
|
104
|
+
'CrawlParamsData',
|
|
105
|
+
'CrawlParamsResponse',
|
|
106
|
+
'CrawlErrorsResponse',
|
|
107
|
+
'ActiveCrawlsResponse',
|
|
108
|
+
|
|
109
|
+
# Batch scrape types
|
|
110
|
+
'BatchScrapeRequest',
|
|
111
|
+
'BatchScrapeJob',
|
|
112
|
+
'BatchScrapeResponse',
|
|
113
|
+
|
|
114
|
+
# Map types
|
|
115
|
+
'MapOptions',
|
|
116
|
+
'MapRequest',
|
|
117
|
+
'MapData',
|
|
118
|
+
'MapResponse',
|
|
119
|
+
|
|
120
|
+
# Search types
|
|
121
|
+
'Source',
|
|
122
|
+
'SourceOption',
|
|
123
|
+
'Format',
|
|
124
|
+
'JsonFormat',
|
|
125
|
+
'FormatOption',
|
|
126
|
+
'SearchRequest',
|
|
127
|
+
'SearchResult',
|
|
128
|
+
'SearchData',
|
|
129
|
+
'SearchResponse',
|
|
130
|
+
|
|
131
|
+
# Action types
|
|
132
|
+
'WaitAction',
|
|
133
|
+
'ScreenshotAction',
|
|
134
|
+
'ClickAction',
|
|
135
|
+
'WriteAction',
|
|
136
|
+
'PressAction',
|
|
137
|
+
'ScrollAction',
|
|
138
|
+
'ScrapeAction',
|
|
139
|
+
'ExecuteJavascriptAction',
|
|
140
|
+
'PDFAction',
|
|
141
|
+
|
|
142
|
+
# Location and format types
|
|
143
|
+
'Location',
|
|
144
|
+
|
|
145
|
+
# Error types
|
|
146
|
+
'ErrorDetails',
|
|
147
|
+
'ErrorResponse',
|
|
148
|
+
|
|
149
|
+
# Job management types
|
|
150
|
+
'JobStatus',
|
|
151
|
+
|
|
152
|
+
# Webhook types
|
|
153
|
+
'WebhookData',
|
|
154
|
+
|
|
155
|
+
# Configuration types
|
|
156
|
+
'ClientConfig',
|
|
157
|
+
]
|
firecrawl/v1/__init__.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Firecrawl v1 API (Legacy)
|
|
3
|
+
|
|
4
|
+
This module provides the legacy v1 API for backward compatibility.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
from firecrawl.v1 import V1FirecrawlApp
|
|
8
|
+
app = V1FirecrawlApp(api_key="your-api-key")
|
|
9
|
+
result = app.scrape_url("https://example.com")
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from .client import V1FirecrawlApp, AsyncV1FirecrawlApp, V1JsonConfig, V1ScrapeOptions, V1ChangeTrackingOptions
|
|
13
|
+
|
|
14
|
+
__all__ = ['V1FirecrawlApp', 'AsyncV1FirecrawlApp', 'V1JsonConfig', 'V1ScrapeOptions', 'V1ChangeTrackingOptions']
|