firecrawl-py 2.16.3__py3-none-any.whl → 3.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- firecrawl/__init__.py +27 -19
- firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +79 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +189 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +38 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +40 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +137 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +183 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +35 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +43 -0
- firecrawl/__tests__/e2e/v2/conftest.py +73 -0
- firecrawl/__tests__/e2e/v2/test_async.py +73 -0
- firecrawl/__tests__/e2e/v2/test_batch_scrape.py +105 -0
- firecrawl/__tests__/e2e/v2/test_crawl.py +276 -0
- firecrawl/__tests__/e2e/v2/test_extract.py +54 -0
- firecrawl/__tests__/e2e/v2/test_map.py +60 -0
- firecrawl/__tests__/e2e/v2/test_scrape.py +154 -0
- firecrawl/__tests__/e2e/v2/test_search.py +265 -0
- firecrawl/__tests__/e2e/v2/test_usage.py +26 -0
- firecrawl/__tests__/e2e/v2/test_watcher.py +65 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +12 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +61 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +12 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +19 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +50 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +63 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +28 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +117 -0
- firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +90 -0
- firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +70 -0
- firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +240 -0
- firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +107 -0
- firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +53 -0
- firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +92 -0
- firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +167 -0
- firecrawl/__tests__/unit/v2/methods/test_search_validation.py +206 -0
- firecrawl/__tests__/unit/v2/methods/test_usage_types.py +18 -0
- firecrawl/__tests__/unit/v2/methods/test_webhook.py +123 -0
- firecrawl/__tests__/unit/v2/utils/test_validation.py +290 -0
- firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +332 -0
- firecrawl/client.py +241 -0
- build/lib/firecrawl/firecrawl.py → firecrawl/firecrawl.backup.py +108 -92
- firecrawl/types.py +157 -0
- firecrawl/v1/__init__.py +14 -0
- firecrawl/{firecrawl.py → v1/client.py} +405 -371
- firecrawl/v2/__init__.py +4 -0
- firecrawl/v2/client.py +802 -0
- firecrawl/v2/client_async.py +250 -0
- firecrawl/v2/methods/aio/__init__.py +1 -0
- firecrawl/v2/methods/aio/batch.py +85 -0
- firecrawl/v2/methods/aio/crawl.py +174 -0
- firecrawl/v2/methods/aio/extract.py +126 -0
- firecrawl/v2/methods/aio/map.py +59 -0
- firecrawl/v2/methods/aio/scrape.py +36 -0
- firecrawl/v2/methods/aio/search.py +58 -0
- firecrawl/v2/methods/aio/usage.py +42 -0
- firecrawl/v2/methods/batch.py +420 -0
- firecrawl/v2/methods/crawl.py +468 -0
- firecrawl/v2/methods/extract.py +131 -0
- firecrawl/v2/methods/map.py +77 -0
- firecrawl/v2/methods/scrape.py +68 -0
- firecrawl/v2/methods/search.py +173 -0
- firecrawl/v2/methods/usage.py +41 -0
- firecrawl/v2/types.py +546 -0
- firecrawl/v2/utils/__init__.py +9 -0
- firecrawl/v2/utils/error_handler.py +107 -0
- firecrawl/v2/utils/get_version.py +15 -0
- firecrawl/v2/utils/http_client.py +153 -0
- firecrawl/v2/utils/http_client_async.py +64 -0
- firecrawl/v2/utils/validation.py +324 -0
- firecrawl/v2/watcher.py +312 -0
- firecrawl/v2/watcher_async.py +245 -0
- {firecrawl_py-2.16.3.dist-info → firecrawl_py-3.0.2.dist-info}/LICENSE +0 -0
- {firecrawl_py-2.16.3.dist-info → firecrawl_py-3.0.2.dist-info}/METADATA +49 -32
- firecrawl_py-3.0.2.dist-info/RECORD +78 -0
- {firecrawl_py-2.16.3.dist-info → firecrawl_py-3.0.2.dist-info}/top_level.txt +0 -2
- tests/test_timeout_conversion.py +117 -0
- build/lib/firecrawl/__init__.py +0 -79
- build/lib/firecrawl/__tests__/e2e_withAuth/__init__.py +0 -0
- build/lib/firecrawl/__tests__/e2e_withAuth/test.py +0 -170
- build/lib/firecrawl/__tests__/v1/e2e_withAuth/__init__.py +0 -0
- build/lib/firecrawl/__tests__/v1/e2e_withAuth/test.py +0 -465
- build/lib/tests/test_change_tracking.py +0 -98
- firecrawl/__tests__/e2e_withAuth/__init__.py +0 -0
- firecrawl/__tests__/e2e_withAuth/test.py +0 -170
- firecrawl/__tests__/v1/e2e_withAuth/__init__.py +0 -0
- firecrawl/__tests__/v1/e2e_withAuth/test.py +0 -465
- firecrawl_py-2.16.3.dist-info/RECORD +0 -19
- {firecrawl_py-2.16.3.dist-info → firecrawl_py-3.0.2.dist-info}/WHEEL +0 -0
|
@@ -24,12 +24,6 @@ import aiohttp
|
|
|
24
24
|
import asyncio
|
|
25
25
|
from pydantic import Field
|
|
26
26
|
|
|
27
|
-
# Suppress Pydantic warnings about attribute shadowing
|
|
28
|
-
warnings.filterwarnings("ignore", message="Field name \"json\" in \"FirecrawlDocument\" shadows an attribute in parent \"BaseModel\"")
|
|
29
|
-
warnings.filterwarnings("ignore", message="Field name \"json\" in \"ChangeTrackingData\" shadows an attribute in parent \"BaseModel\"")
|
|
30
|
-
warnings.filterwarnings("ignore", message="Field name \"schema\" in \"JsonConfig\" shadows an attribute in parent \"BaseModel\"")
|
|
31
|
-
warnings.filterwarnings("ignore", message="Field name \"schema\" in \"ExtractParams\" shadows an attribute in parent \"BaseModel\"")
|
|
32
|
-
warnings.filterwarnings("ignore", message="Field name \"schema\" in \"ChangeTrackingOptions\" shadows an attribute in parent \"BaseModel\"")
|
|
33
27
|
|
|
34
28
|
def get_version():
|
|
35
29
|
try:
|
|
@@ -106,7 +100,7 @@ class ChangeTrackingData(pydantic.BaseModel):
|
|
|
106
100
|
changeStatus: str # "new" | "same" | "changed" | "removed"
|
|
107
101
|
visibility: str # "visible" | "hidden"
|
|
108
102
|
diff: Optional[Dict[str, Any]] = None
|
|
109
|
-
|
|
103
|
+
json_field: Optional[Any] = pydantic.Field(None, alias='json')
|
|
110
104
|
|
|
111
105
|
class FirecrawlDocument(pydantic.BaseModel, Generic[T]):
|
|
112
106
|
"""Document retrieved or processed by Firecrawl."""
|
|
@@ -116,7 +110,7 @@ class FirecrawlDocument(pydantic.BaseModel, Generic[T]):
|
|
|
116
110
|
rawHtml: Optional[str] = None
|
|
117
111
|
links: Optional[List[str]] = None
|
|
118
112
|
extract: Optional[T] = None
|
|
119
|
-
|
|
113
|
+
json_field: Optional[T] = pydantic.Field(None, alias='json')
|
|
120
114
|
screenshot: Optional[str] = None
|
|
121
115
|
metadata: Optional[Any] = None
|
|
122
116
|
actions: Optional[ActionsResult] = None
|
|
@@ -139,7 +133,7 @@ class WebhookConfig(pydantic.BaseModel):
|
|
|
139
133
|
class ChangeTrackingOptions(pydantic.BaseModel):
|
|
140
134
|
"""Configuration for change tracking."""
|
|
141
135
|
modes: Optional[List[Literal["git-diff", "json"]]] = None
|
|
142
|
-
|
|
136
|
+
schema_field: Optional[Any] = pydantic.Field(None, alias='schema')
|
|
143
137
|
prompt: Optional[str] = None
|
|
144
138
|
tag: Optional[str] = None
|
|
145
139
|
|
|
@@ -151,7 +145,7 @@ class ScrapeOptions(pydantic.BaseModel):
|
|
|
151
145
|
excludeTags: Optional[List[str]] = None
|
|
152
146
|
onlyMainContent: Optional[bool] = None
|
|
153
147
|
waitFor: Optional[int] = None
|
|
154
|
-
timeout: Optional[int] =
|
|
148
|
+
timeout: Optional[int] = 30000
|
|
155
149
|
location: Optional[LocationConfig] = None
|
|
156
150
|
mobile: Optional[bool] = None
|
|
157
151
|
skipTlsVerification: Optional[bool] = None
|
|
@@ -219,7 +213,7 @@ class ExtractAgent(pydantic.BaseModel):
|
|
|
219
213
|
class JsonConfig(pydantic.BaseModel):
|
|
220
214
|
"""Configuration for extraction."""
|
|
221
215
|
prompt: Optional[str] = None
|
|
222
|
-
|
|
216
|
+
schema_field: Optional[Any] = pydantic.Field(None, alias='schema')
|
|
223
217
|
systemPrompt: Optional[str] = None
|
|
224
218
|
agent: Optional[ExtractAgent] = None
|
|
225
219
|
|
|
@@ -264,6 +258,7 @@ class CrawlParams(pydantic.BaseModel):
|
|
|
264
258
|
maxDiscoveryDepth: Optional[int] = None
|
|
265
259
|
limit: Optional[int] = None
|
|
266
260
|
allowBackwardLinks: Optional[bool] = None
|
|
261
|
+
crawlEntireDomain: Optional[bool] = None
|
|
267
262
|
allowExternalLinks: Optional[bool] = None
|
|
268
263
|
ignoreSitemap: Optional[bool] = None
|
|
269
264
|
scrapeOptions: Optional[ScrapeOptions] = None
|
|
@@ -305,7 +300,7 @@ class MapParams(pydantic.BaseModel):
|
|
|
305
300
|
includeSubdomains: Optional[bool] = None
|
|
306
301
|
sitemapOnly: Optional[bool] = None
|
|
307
302
|
limit: Optional[int] = None
|
|
308
|
-
timeout: Optional[int] =
|
|
303
|
+
timeout: Optional[int] = 30000
|
|
309
304
|
useIndex: Optional[bool] = None
|
|
310
305
|
|
|
311
306
|
class MapResponse(pydantic.BaseModel):
|
|
@@ -317,7 +312,7 @@ class MapResponse(pydantic.BaseModel):
|
|
|
317
312
|
class ExtractParams(pydantic.BaseModel):
|
|
318
313
|
"""Parameters for extracting information from URLs."""
|
|
319
314
|
prompt: Optional[str] = None
|
|
320
|
-
|
|
315
|
+
schema_field: Optional[Any] = pydantic.Field(None, alias='schema')
|
|
321
316
|
systemPrompt: Optional[str] = None
|
|
322
317
|
allowExternalLinks: Optional[bool] = None
|
|
323
318
|
enableWebSearch: Optional[bool] = None
|
|
@@ -431,7 +426,7 @@ class ExtractParams(pydantic.BaseModel):
|
|
|
431
426
|
Parameters for the extract operation.
|
|
432
427
|
"""
|
|
433
428
|
prompt: Optional[str] = None
|
|
434
|
-
|
|
429
|
+
schema_field: Optional[Any] = pydantic.Field(None, alias='schema')
|
|
435
430
|
system_prompt: Optional[str] = None
|
|
436
431
|
allow_external_links: Optional[bool] = False
|
|
437
432
|
enable_web_search: Optional[bool] = False
|
|
@@ -469,7 +464,7 @@ class FirecrawlApp:
|
|
|
469
464
|
exclude_tags: Optional[List[str]] = None,
|
|
470
465
|
only_main_content: Optional[bool] = None,
|
|
471
466
|
wait_for: Optional[int] = None,
|
|
472
|
-
timeout: Optional[int] =
|
|
467
|
+
timeout: Optional[int] = 30000,
|
|
473
468
|
location: Optional[LocationConfig] = None,
|
|
474
469
|
mobile: Optional[bool] = None,
|
|
475
470
|
skip_tls_verification: Optional[bool] = None,
|
|
@@ -484,6 +479,7 @@ class FirecrawlApp:
|
|
|
484
479
|
max_age: Optional[int] = None,
|
|
485
480
|
store_in_cache: Optional[bool] = None,
|
|
486
481
|
zero_data_retention: Optional[bool] = None,
|
|
482
|
+
agent: Optional[AgentOptions] = None,
|
|
487
483
|
**kwargs) -> ScrapeResponse[Any]:
|
|
488
484
|
"""
|
|
489
485
|
Scrape and extract content from a URL.
|
|
@@ -508,6 +504,7 @@ class FirecrawlApp:
|
|
|
508
504
|
actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction, PDFAction]]]): Actions to perform
|
|
509
505
|
change_tracking_options (Optional[ChangeTrackingOptions]): Change tracking settings
|
|
510
506
|
zero_data_retention (Optional[bool]): Whether to delete data after scrape is done
|
|
507
|
+
agent (Optional[AgentOptions]): Agent configuration for FIRE-1 model
|
|
511
508
|
|
|
512
509
|
|
|
513
510
|
Returns:
|
|
@@ -547,7 +544,7 @@ class FirecrawlApp:
|
|
|
547
544
|
if timeout:
|
|
548
545
|
scrape_params['timeout'] = timeout
|
|
549
546
|
if location:
|
|
550
|
-
scrape_params['location'] = location.dict(exclude_none=True)
|
|
547
|
+
scrape_params['location'] = location.dict(by_alias=True, exclude_none=True)
|
|
551
548
|
if mobile is not None:
|
|
552
549
|
scrape_params['mobile'] = mobile
|
|
553
550
|
if skip_tls_verification is not None:
|
|
@@ -564,22 +561,24 @@ class FirecrawlApp:
|
|
|
564
561
|
extract = self._ensure_schema_dict(extract)
|
|
565
562
|
if isinstance(extract, dict) and "schema" in extract:
|
|
566
563
|
extract["schema"] = self._ensure_schema_dict(extract["schema"])
|
|
567
|
-
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
|
|
564
|
+
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(by_alias=True, exclude_none=True)
|
|
568
565
|
if json_options is not None:
|
|
569
566
|
json_options = self._ensure_schema_dict(json_options)
|
|
570
567
|
if isinstance(json_options, dict) and "schema" in json_options:
|
|
571
568
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
|
572
|
-
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
|
569
|
+
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(by_alias=True, exclude_none=True)
|
|
573
570
|
if actions:
|
|
574
|
-
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(exclude_none=True) for action in actions]
|
|
571
|
+
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(by_alias=True, exclude_none=True) for action in actions]
|
|
575
572
|
if change_tracking_options:
|
|
576
|
-
scrape_params['changeTrackingOptions'] = change_tracking_options if isinstance(change_tracking_options, dict) else change_tracking_options.dict(exclude_none=True)
|
|
573
|
+
scrape_params['changeTrackingOptions'] = change_tracking_options if isinstance(change_tracking_options, dict) else change_tracking_options.dict(by_alias=True, exclude_none=True)
|
|
577
574
|
if max_age is not None:
|
|
578
575
|
scrape_params['maxAge'] = max_age
|
|
579
576
|
if store_in_cache is not None:
|
|
580
577
|
scrape_params['storeInCache'] = store_in_cache
|
|
581
578
|
if zero_data_retention is not None:
|
|
582
579
|
scrape_params['zeroDataRetention'] = zero_data_retention
|
|
580
|
+
if agent is not None:
|
|
581
|
+
scrape_params['agent'] = agent.dict(by_alias=True, exclude_none=True)
|
|
583
582
|
|
|
584
583
|
scrape_params.update(kwargs)
|
|
585
584
|
|
|
@@ -593,7 +592,7 @@ class FirecrawlApp:
|
|
|
593
592
|
f'{self.api_url}/v1/scrape',
|
|
594
593
|
headers=_headers,
|
|
595
594
|
json=scrape_params,
|
|
596
|
-
timeout=(timeout +
|
|
595
|
+
timeout=(timeout / 1000.0 + 5 if timeout is not None else None)
|
|
597
596
|
)
|
|
598
597
|
|
|
599
598
|
if response.status_code == 200:
|
|
@@ -620,7 +619,7 @@ class FirecrawlApp:
|
|
|
620
619
|
lang: Optional[str] = None,
|
|
621
620
|
country: Optional[str] = None,
|
|
622
621
|
location: Optional[str] = None,
|
|
623
|
-
timeout: Optional[int] =
|
|
622
|
+
timeout: Optional[int] = 30000,
|
|
624
623
|
scrape_options: Optional[ScrapeOptions] = None,
|
|
625
624
|
**kwargs) -> SearchResponse:
|
|
626
625
|
"""
|
|
@@ -670,7 +669,7 @@ class FirecrawlApp:
|
|
|
670
669
|
if timeout is not None:
|
|
671
670
|
search_params['timeout'] = timeout
|
|
672
671
|
if scrape_options is not None:
|
|
673
|
-
search_params['scrapeOptions'] = scrape_options.dict(exclude_none=True)
|
|
672
|
+
search_params['scrapeOptions'] = scrape_options.dict(by_alias=True, exclude_none=True)
|
|
674
673
|
|
|
675
674
|
# Add any additional kwargs
|
|
676
675
|
search_params.update(kwargs)
|
|
@@ -678,7 +677,7 @@ class FirecrawlApp:
|
|
|
678
677
|
|
|
679
678
|
# Create final params object
|
|
680
679
|
final_params = SearchParams(query=query, **search_params)
|
|
681
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
680
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
682
681
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
683
682
|
|
|
684
683
|
if _integration:
|
|
@@ -792,7 +791,7 @@ class FirecrawlApp:
|
|
|
792
791
|
if ignore_sitemap is not None:
|
|
793
792
|
crawl_params['ignoreSitemap'] = ignore_sitemap
|
|
794
793
|
if scrape_options is not None:
|
|
795
|
-
crawl_params['scrapeOptions'] = scrape_options.dict(exclude_none=True)
|
|
794
|
+
crawl_params['scrapeOptions'] = scrape_options.dict(by_alias=True, exclude_none=True)
|
|
796
795
|
if webhook is not None:
|
|
797
796
|
crawl_params['webhook'] = webhook
|
|
798
797
|
if deduplicate_similar_urls is not None:
|
|
@@ -815,7 +814,7 @@ class FirecrawlApp:
|
|
|
815
814
|
|
|
816
815
|
# Create final params object
|
|
817
816
|
final_params = CrawlParams(**crawl_params)
|
|
818
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
817
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
819
818
|
params_dict['url'] = url
|
|
820
819
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
821
820
|
|
|
@@ -921,7 +920,7 @@ class FirecrawlApp:
|
|
|
921
920
|
if ignore_sitemap is not None:
|
|
922
921
|
crawl_params['ignoreSitemap'] = ignore_sitemap
|
|
923
922
|
if scrape_options is not None:
|
|
924
|
-
crawl_params['scrapeOptions'] = scrape_options.dict(exclude_none=True)
|
|
923
|
+
crawl_params['scrapeOptions'] = scrape_options.dict(by_alias=True, exclude_none=True)
|
|
925
924
|
if webhook is not None:
|
|
926
925
|
crawl_params['webhook'] = webhook
|
|
927
926
|
if deduplicate_similar_urls is not None:
|
|
@@ -943,7 +942,7 @@ class FirecrawlApp:
|
|
|
943
942
|
|
|
944
943
|
# Create final params object
|
|
945
944
|
final_params = CrawlParams(**crawl_params)
|
|
946
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
945
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
947
946
|
params_dict['url'] = url
|
|
948
947
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
949
948
|
|
|
@@ -1161,6 +1160,7 @@ class FirecrawlApp:
|
|
|
1161
1160
|
max_discovery_depth=max_discovery_depth,
|
|
1162
1161
|
limit=limit,
|
|
1163
1162
|
allow_backward_links=allow_backward_links,
|
|
1163
|
+
crawl_entire_domain=crawl_entire_domain,
|
|
1164
1164
|
allow_external_links=allow_external_links,
|
|
1165
1165
|
ignore_sitemap=ignore_sitemap,
|
|
1166
1166
|
scrape_options=scrape_options,
|
|
@@ -1189,7 +1189,7 @@ class FirecrawlApp:
|
|
|
1189
1189
|
include_subdomains: Optional[bool] = None,
|
|
1190
1190
|
sitemap_only: Optional[bool] = None,
|
|
1191
1191
|
limit: Optional[int] = None,
|
|
1192
|
-
timeout: Optional[int] =
|
|
1192
|
+
timeout: Optional[int] = 30000,
|
|
1193
1193
|
use_index: Optional[bool] = None,
|
|
1194
1194
|
**kwargs) -> MapResponse:
|
|
1195
1195
|
"""
|
|
@@ -1242,7 +1242,7 @@ class FirecrawlApp:
|
|
|
1242
1242
|
|
|
1243
1243
|
# Create final params object
|
|
1244
1244
|
final_params = MapParams(**map_params)
|
|
1245
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
1245
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
1246
1246
|
params_dict['url'] = url
|
|
1247
1247
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
1248
1248
|
|
|
@@ -1280,7 +1280,7 @@ class FirecrawlApp:
|
|
|
1280
1280
|
exclude_tags: Optional[List[str]] = None,
|
|
1281
1281
|
only_main_content: Optional[bool] = None,
|
|
1282
1282
|
wait_for: Optional[int] = None,
|
|
1283
|
-
timeout: Optional[int] =
|
|
1283
|
+
timeout: Optional[int] = 30000,
|
|
1284
1284
|
location: Optional[LocationConfig] = None,
|
|
1285
1285
|
mobile: Optional[bool] = None,
|
|
1286
1286
|
skip_tls_verification: Optional[bool] = None,
|
|
@@ -1354,7 +1354,7 @@ class FirecrawlApp:
|
|
|
1354
1354
|
if timeout is not None:
|
|
1355
1355
|
scrape_params['timeout'] = timeout
|
|
1356
1356
|
if location is not None:
|
|
1357
|
-
scrape_params['location'] = location.dict(exclude_none=True)
|
|
1357
|
+
scrape_params['location'] = location.dict(by_alias=True, exclude_none=True)
|
|
1358
1358
|
if mobile is not None:
|
|
1359
1359
|
scrape_params['mobile'] = mobile
|
|
1360
1360
|
if skip_tls_verification is not None:
|
|
@@ -1369,16 +1369,16 @@ class FirecrawlApp:
|
|
|
1369
1369
|
extract = self._ensure_schema_dict(extract)
|
|
1370
1370
|
if isinstance(extract, dict) and "schema" in extract:
|
|
1371
1371
|
extract["schema"] = self._ensure_schema_dict(extract["schema"])
|
|
1372
|
-
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
|
|
1372
|
+
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(by_alias=True, exclude_none=True)
|
|
1373
1373
|
if json_options is not None:
|
|
1374
1374
|
json_options = self._ensure_schema_dict(json_options)
|
|
1375
1375
|
if isinstance(json_options, dict) and "schema" in json_options:
|
|
1376
1376
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
|
1377
|
-
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
|
1377
|
+
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(by_alias=True, exclude_none=True)
|
|
1378
1378
|
if actions:
|
|
1379
|
-
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(exclude_none=True) for action in actions]
|
|
1379
|
+
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(by_alias=True, exclude_none=True) for action in actions]
|
|
1380
1380
|
if agent is not None:
|
|
1381
|
-
scrape_params['agent'] = agent.dict(exclude_none=True)
|
|
1381
|
+
scrape_params['agent'] = agent.dict(by_alias=True, exclude_none=True)
|
|
1382
1382
|
if max_concurrency is not None:
|
|
1383
1383
|
scrape_params['maxConcurrency'] = max_concurrency
|
|
1384
1384
|
if zero_data_retention is not None:
|
|
@@ -1389,7 +1389,7 @@ class FirecrawlApp:
|
|
|
1389
1389
|
|
|
1390
1390
|
# Create final params object
|
|
1391
1391
|
final_params = ScrapeParams(**scrape_params)
|
|
1392
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
1392
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
1393
1393
|
params_dict['urls'] = urls
|
|
1394
1394
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
1395
1395
|
|
|
@@ -1421,7 +1421,7 @@ class FirecrawlApp:
|
|
|
1421
1421
|
exclude_tags: Optional[List[str]] = None,
|
|
1422
1422
|
only_main_content: Optional[bool] = None,
|
|
1423
1423
|
wait_for: Optional[int] = None,
|
|
1424
|
-
timeout: Optional[int] =
|
|
1424
|
+
timeout: Optional[int] = 30000,
|
|
1425
1425
|
location: Optional[LocationConfig] = None,
|
|
1426
1426
|
mobile: Optional[bool] = None,
|
|
1427
1427
|
skip_tls_verification: Optional[bool] = None,
|
|
@@ -1495,7 +1495,7 @@ class FirecrawlApp:
|
|
|
1495
1495
|
if timeout is not None:
|
|
1496
1496
|
scrape_params['timeout'] = timeout
|
|
1497
1497
|
if location is not None:
|
|
1498
|
-
scrape_params['location'] = location.dict(exclude_none=True)
|
|
1498
|
+
scrape_params['location'] = location.dict(by_alias=True, exclude_none=True)
|
|
1499
1499
|
if mobile is not None:
|
|
1500
1500
|
scrape_params['mobile'] = mobile
|
|
1501
1501
|
if skip_tls_verification is not None:
|
|
@@ -1510,16 +1510,16 @@ class FirecrawlApp:
|
|
|
1510
1510
|
extract = self._ensure_schema_dict(extract)
|
|
1511
1511
|
if isinstance(extract, dict) and "schema" in extract:
|
|
1512
1512
|
extract["schema"] = self._ensure_schema_dict(extract["schema"])
|
|
1513
|
-
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
|
|
1513
|
+
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(by_alias=True, exclude_none=True)
|
|
1514
1514
|
if json_options is not None:
|
|
1515
1515
|
json_options = self._ensure_schema_dict(json_options)
|
|
1516
1516
|
if isinstance(json_options, dict) and "schema" in json_options:
|
|
1517
1517
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
|
1518
|
-
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
|
1518
|
+
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(by_alias=True, exclude_none=True)
|
|
1519
1519
|
if actions:
|
|
1520
|
-
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(exclude_none=True) for action in actions]
|
|
1520
|
+
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(by_alias=True, exclude_none=True) for action in actions]
|
|
1521
1521
|
if agent is not None:
|
|
1522
|
-
scrape_params['agent'] = agent.dict(exclude_none=True)
|
|
1522
|
+
scrape_params['agent'] = agent.dict(by_alias=True, exclude_none=True)
|
|
1523
1523
|
if max_concurrency is not None:
|
|
1524
1524
|
scrape_params['maxConcurrency'] = max_concurrency
|
|
1525
1525
|
if zero_data_retention is not None:
|
|
@@ -1530,7 +1530,7 @@ class FirecrawlApp:
|
|
|
1530
1530
|
|
|
1531
1531
|
# Create final params object
|
|
1532
1532
|
final_params = ScrapeParams(**scrape_params)
|
|
1533
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
1533
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
1534
1534
|
params_dict['urls'] = urls
|
|
1535
1535
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
1536
1536
|
|
|
@@ -1561,7 +1561,7 @@ class FirecrawlApp:
|
|
|
1561
1561
|
exclude_tags: Optional[List[str]] = None,
|
|
1562
1562
|
only_main_content: Optional[bool] = None,
|
|
1563
1563
|
wait_for: Optional[int] = None,
|
|
1564
|
-
timeout: Optional[int] =
|
|
1564
|
+
timeout: Optional[int] = 30000,
|
|
1565
1565
|
location: Optional[LocationConfig] = None,
|
|
1566
1566
|
mobile: Optional[bool] = None,
|
|
1567
1567
|
skip_tls_verification: Optional[bool] = None,
|
|
@@ -1631,7 +1631,7 @@ class FirecrawlApp:
|
|
|
1631
1631
|
if timeout is not None:
|
|
1632
1632
|
scrape_params['timeout'] = timeout
|
|
1633
1633
|
if location is not None:
|
|
1634
|
-
scrape_params['location'] = location.dict(exclude_none=True)
|
|
1634
|
+
scrape_params['location'] = location.dict(by_alias=True, exclude_none=True)
|
|
1635
1635
|
if mobile is not None:
|
|
1636
1636
|
scrape_params['mobile'] = mobile
|
|
1637
1637
|
if skip_tls_verification is not None:
|
|
@@ -1646,16 +1646,16 @@ class FirecrawlApp:
|
|
|
1646
1646
|
extract = self._ensure_schema_dict(extract)
|
|
1647
1647
|
if isinstance(extract, dict) and "schema" in extract:
|
|
1648
1648
|
extract["schema"] = self._ensure_schema_dict(extract["schema"])
|
|
1649
|
-
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
|
|
1649
|
+
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(by_alias=True, exclude_none=True)
|
|
1650
1650
|
if json_options is not None:
|
|
1651
1651
|
json_options = self._ensure_schema_dict(json_options)
|
|
1652
1652
|
if isinstance(json_options, dict) and "schema" in json_options:
|
|
1653
1653
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
|
1654
|
-
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
|
1654
|
+
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(by_alias=True, exclude_none=True)
|
|
1655
1655
|
if actions:
|
|
1656
|
-
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(exclude_none=True) for action in actions]
|
|
1656
|
+
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(by_alias=True, exclude_none=True) for action in actions]
|
|
1657
1657
|
if agent is not None:
|
|
1658
|
-
scrape_params['agent'] = agent.dict(exclude_none=True)
|
|
1658
|
+
scrape_params['agent'] = agent.dict(by_alias=True, exclude_none=True)
|
|
1659
1659
|
if max_concurrency is not None:
|
|
1660
1660
|
scrape_params['maxConcurrency'] = max_concurrency
|
|
1661
1661
|
if zero_data_retention is not None:
|
|
@@ -1666,7 +1666,7 @@ class FirecrawlApp:
|
|
|
1666
1666
|
|
|
1667
1667
|
# Create final params object
|
|
1668
1668
|
final_params = ScrapeParams(**scrape_params)
|
|
1669
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
1669
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
1670
1670
|
params_dict['urls'] = urls
|
|
1671
1671
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
1672
1672
|
|
|
@@ -2095,7 +2095,7 @@ class FirecrawlApp:
|
|
|
2095
2095
|
)
|
|
2096
2096
|
|
|
2097
2097
|
headers = self._prepare_headers()
|
|
2098
|
-
json_data = {'url': url, **params.dict(exclude_none=True)}
|
|
2098
|
+
json_data = {'url': url, **params.dict(by_alias=True, exclude_none=True)}
|
|
2099
2099
|
json_data['origin'] = f"python-sdk@{version}"
|
|
2100
2100
|
|
|
2101
2101
|
try:
|
|
@@ -2204,7 +2204,7 @@ class FirecrawlApp:
|
|
|
2204
2204
|
requests.RequestException: If the request fails after the specified retries.
|
|
2205
2205
|
"""
|
|
2206
2206
|
for attempt in range(retries):
|
|
2207
|
-
response = requests.post(url, headers=headers, json=data, timeout=((data["timeout"] +
|
|
2207
|
+
response = requests.post(url, headers=headers, json=data, timeout=((data["timeout"] / 1000.0 + 5) if "timeout" in data and data["timeout"] is not None else None))
|
|
2208
2208
|
if response.status_code == 502:
|
|
2209
2209
|
time.sleep(backoff_factor * (2 ** attempt))
|
|
2210
2210
|
else:
|
|
@@ -2336,10 +2336,22 @@ class FirecrawlApp:
|
|
|
2336
2336
|
Exception: An exception with a message containing the status code and error details from the response.
|
|
2337
2337
|
"""
|
|
2338
2338
|
try:
|
|
2339
|
-
|
|
2340
|
-
|
|
2339
|
+
response_json = response.json()
|
|
2340
|
+
error_message = response_json.get('error', 'No error message provided.')
|
|
2341
|
+
error_details = response_json.get('details', 'No additional error details provided.')
|
|
2341
2342
|
except:
|
|
2342
|
-
|
|
2343
|
+
# If we can't parse JSON, provide a helpful error message with response content
|
|
2344
|
+
try:
|
|
2345
|
+
response_text = response.text[:500] # Limit to first 500 chars
|
|
2346
|
+
if response_text.strip():
|
|
2347
|
+
error_message = f"Server returned non-JSON response: {response_text}"
|
|
2348
|
+
error_details = f"Full response status: {response.status_code}"
|
|
2349
|
+
else:
|
|
2350
|
+
error_message = f"Server returned empty response with status {response.status_code}"
|
|
2351
|
+
error_details = "No additional details available"
|
|
2352
|
+
except ValueError:
|
|
2353
|
+
error_message = f"Server returned unreadable response with status {response.status_code}"
|
|
2354
|
+
error_details = "No additional details available"
|
|
2343
2355
|
|
|
2344
2356
|
message = self._get_error_message(response.status_code, action, error_message, error_details)
|
|
2345
2357
|
|
|
@@ -2362,7 +2374,7 @@ class FirecrawlApp:
|
|
|
2362
2374
|
if status_code == 402:
|
|
2363
2375
|
return f"Payment Required: Failed to {action}. {error_message} - {error_details}"
|
|
2364
2376
|
elif status_code == 403:
|
|
2365
|
-
|
|
2377
|
+
return f"Website Not Supported: Failed to {action}. {error_message} - {error_details}"
|
|
2366
2378
|
elif status_code == 408:
|
|
2367
2379
|
return f"Request Timeout: Failed to {action} as the request timed out. {error_message} - {error_details}"
|
|
2368
2380
|
elif status_code == 409:
|
|
@@ -2516,7 +2528,7 @@ class FirecrawlApp:
|
|
|
2516
2528
|
|
|
2517
2529
|
headers = self._prepare_headers()
|
|
2518
2530
|
|
|
2519
|
-
json_data = {'query': query, **research_params.dict(exclude_none=True)}
|
|
2531
|
+
json_data = {'query': query, **research_params.dict(by_alias=True, exclude_none=True)}
|
|
2520
2532
|
json_data['origin'] = f"python-sdk@{version}"
|
|
2521
2533
|
|
|
2522
2534
|
# Handle json options schema if present
|
|
@@ -2600,7 +2612,7 @@ class FirecrawlApp:
|
|
|
2600
2612
|
method_params = {
|
|
2601
2613
|
"scrape_url": {"formats", "include_tags", "exclude_tags", "only_main_content", "wait_for",
|
|
2602
2614
|
"timeout", "location", "mobile", "skip_tls_verification", "remove_base64_images",
|
|
2603
|
-
"block_ads", "proxy", "extract", "json_options", "actions", "change_tracking_options", "max_age", "integration"},
|
|
2615
|
+
"block_ads", "proxy", "extract", "json_options", "actions", "change_tracking_options", "max_age", "agent", "integration"},
|
|
2604
2616
|
"search": {"limit", "tbs", "filter", "lang", "country", "location", "timeout", "scrape_options", "integration"},
|
|
2605
2617
|
"crawl_url": {"include_paths", "exclude_paths", "max_depth", "max_discovery_depth", "limit",
|
|
2606
2618
|
"allow_backward_links", "allow_external_links", "ignore_sitemap", "scrape_options",
|
|
@@ -2975,7 +2987,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
2975
2987
|
exclude_tags: Optional[List[str]] = None,
|
|
2976
2988
|
only_main_content: Optional[bool] = None,
|
|
2977
2989
|
wait_for: Optional[int] = None,
|
|
2978
|
-
timeout: Optional[int] =
|
|
2990
|
+
timeout: Optional[int] = 30000,
|
|
2979
2991
|
location: Optional[LocationConfig] = None,
|
|
2980
2992
|
mobile: Optional[bool] = None,
|
|
2981
2993
|
skip_tls_verification: Optional[bool] = None,
|
|
@@ -2986,6 +2998,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
2986
2998
|
extract: Optional[JsonConfig] = None,
|
|
2987
2999
|
json_options: Optional[JsonConfig] = None,
|
|
2988
3000
|
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction, PDFAction]]] = None,
|
|
3001
|
+
agent: Optional[AgentOptions] = None,
|
|
2989
3002
|
**kwargs) -> ScrapeResponse[Any]:
|
|
2990
3003
|
"""
|
|
2991
3004
|
Scrape a single URL asynchronously.
|
|
@@ -3008,6 +3021,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3008
3021
|
extract (Optional[JsonConfig]): Content extraction settings
|
|
3009
3022
|
json_options (Optional[JsonConfig]): JSON extraction settings
|
|
3010
3023
|
actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction, PDFAction]]]): Actions to perform
|
|
3024
|
+
agent (Optional[AgentOptions]): Agent configuration for FIRE-1 model
|
|
3011
3025
|
**kwargs: Additional parameters to pass to the API
|
|
3012
3026
|
|
|
3013
3027
|
Returns:
|
|
@@ -3052,7 +3066,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3052
3066
|
if timeout:
|
|
3053
3067
|
scrape_params['timeout'] = timeout
|
|
3054
3068
|
if location:
|
|
3055
|
-
scrape_params['location'] = location.dict(exclude_none=True)
|
|
3069
|
+
scrape_params['location'] = location.dict(by_alias=True, exclude_none=True)
|
|
3056
3070
|
if mobile is not None:
|
|
3057
3071
|
scrape_params['mobile'] = mobile
|
|
3058
3072
|
if skip_tls_verification is not None:
|
|
@@ -3069,14 +3083,16 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3069
3083
|
extract = self._ensure_schema_dict(extract)
|
|
3070
3084
|
if isinstance(extract, dict) and "schema" in extract:
|
|
3071
3085
|
extract["schema"] = self._ensure_schema_dict(extract["schema"])
|
|
3072
|
-
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
|
|
3086
|
+
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(by_alias=True, exclude_none=True)
|
|
3073
3087
|
if json_options is not None:
|
|
3074
3088
|
json_options = self._ensure_schema_dict(json_options)
|
|
3075
3089
|
if isinstance(json_options, dict) and "schema" in json_options:
|
|
3076
3090
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
|
3077
|
-
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
|
3091
|
+
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(by_alias=True, exclude_none=True)
|
|
3078
3092
|
if actions:
|
|
3079
|
-
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(exclude_none=True) for action in actions]
|
|
3093
|
+
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(by_alias=True, exclude_none=True) for action in actions]
|
|
3094
|
+
if agent is not None:
|
|
3095
|
+
scrape_params['agent'] = agent.dict(by_alias=True, exclude_none=True)
|
|
3080
3096
|
if 'extract' in scrape_params and scrape_params['extract'] and 'schema' in scrape_params['extract']:
|
|
3081
3097
|
scrape_params['extract']['schema'] = self._ensure_schema_dict(scrape_params['extract']['schema'])
|
|
3082
3098
|
if 'jsonOptions' in scrape_params and scrape_params['jsonOptions'] and 'schema' in scrape_params['jsonOptions']:
|
|
@@ -3109,7 +3125,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3109
3125
|
exclude_tags: Optional[List[str]] = None,
|
|
3110
3126
|
only_main_content: Optional[bool] = None,
|
|
3111
3127
|
wait_for: Optional[int] = None,
|
|
3112
|
-
timeout: Optional[int] =
|
|
3128
|
+
timeout: Optional[int] = 30000,
|
|
3113
3129
|
location: Optional[LocationConfig] = None,
|
|
3114
3130
|
mobile: Optional[bool] = None,
|
|
3115
3131
|
skip_tls_verification: Optional[bool] = None,
|
|
@@ -3180,7 +3196,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3180
3196
|
if timeout is not None:
|
|
3181
3197
|
scrape_params['timeout'] = timeout
|
|
3182
3198
|
if location is not None:
|
|
3183
|
-
scrape_params['location'] = location.dict(exclude_none=True)
|
|
3199
|
+
scrape_params['location'] = location.dict(by_alias=True, exclude_none=True)
|
|
3184
3200
|
if mobile is not None:
|
|
3185
3201
|
scrape_params['mobile'] = mobile
|
|
3186
3202
|
if skip_tls_verification is not None:
|
|
@@ -3195,22 +3211,23 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3195
3211
|
extract = self._ensure_schema_dict(extract)
|
|
3196
3212
|
if isinstance(extract, dict) and "schema" in extract:
|
|
3197
3213
|
extract["schema"] = self._ensure_schema_dict(extract["schema"])
|
|
3198
|
-
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
|
|
3214
|
+
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(by_alias=True, exclude_none=True)
|
|
3199
3215
|
if json_options is not None:
|
|
3200
3216
|
json_options = self._ensure_schema_dict(json_options)
|
|
3201
3217
|
if isinstance(json_options, dict) and "schema" in json_options:
|
|
3202
3218
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
|
3203
|
-
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
|
3204
|
-
|
|
3219
|
+
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(by_alias=True, exclude_none=True)
|
|
3220
|
+
if actions is not None:
|
|
3221
|
+
scrape_params['actions'] = [action.dict(by_alias=True, exclude_none=True) for action in actions]
|
|
3205
3222
|
if agent is not None:
|
|
3206
|
-
scrape_params['agent'] = agent.dict(exclude_none=True)
|
|
3223
|
+
scrape_params['agent'] = agent.dict(by_alias=True, exclude_none=True)
|
|
3207
3224
|
|
|
3208
3225
|
# Add any additional kwargs
|
|
3209
3226
|
scrape_params.update(kwargs)
|
|
3210
3227
|
|
|
3211
3228
|
# Create final params object
|
|
3212
3229
|
final_params = ScrapeParams(**scrape_params)
|
|
3213
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
3230
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
3214
3231
|
params_dict['urls'] = urls
|
|
3215
3232
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
3216
3233
|
|
|
@@ -3247,7 +3264,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3247
3264
|
exclude_tags: Optional[List[str]] = None,
|
|
3248
3265
|
only_main_content: Optional[bool] = None,
|
|
3249
3266
|
wait_for: Optional[int] = None,
|
|
3250
|
-
timeout: Optional[int] =
|
|
3267
|
+
timeout: Optional[int] = 30000,
|
|
3251
3268
|
location: Optional[LocationConfig] = None,
|
|
3252
3269
|
mobile: Optional[bool] = None,
|
|
3253
3270
|
skip_tls_verification: Optional[bool] = None,
|
|
@@ -3319,7 +3336,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3319
3336
|
if timeout is not None:
|
|
3320
3337
|
scrape_params['timeout'] = timeout
|
|
3321
3338
|
if location is not None:
|
|
3322
|
-
scrape_params['location'] = location.dict(exclude_none=True)
|
|
3339
|
+
scrape_params['location'] = location.dict(by_alias=True, exclude_none=True)
|
|
3323
3340
|
if mobile is not None:
|
|
3324
3341
|
scrape_params['mobile'] = mobile
|
|
3325
3342
|
if skip_tls_verification is not None:
|
|
@@ -3334,16 +3351,16 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3334
3351
|
extract = self._ensure_schema_dict(extract)
|
|
3335
3352
|
if isinstance(extract, dict) and "schema" in extract:
|
|
3336
3353
|
extract["schema"] = self._ensure_schema_dict(extract["schema"])
|
|
3337
|
-
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
|
|
3354
|
+
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(by_alias=True, exclude_none=True)
|
|
3338
3355
|
if json_options is not None:
|
|
3339
3356
|
json_options = self._ensure_schema_dict(json_options)
|
|
3340
3357
|
if isinstance(json_options, dict) and "schema" in json_options:
|
|
3341
3358
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
|
3342
|
-
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
|
3359
|
+
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(by_alias=True, exclude_none=True)
|
|
3343
3360
|
if actions:
|
|
3344
|
-
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(exclude_none=True) for action in actions]
|
|
3361
|
+
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(by_alias=True, exclude_none=True) for action in actions]
|
|
3345
3362
|
if agent is not None:
|
|
3346
|
-
scrape_params['agent'] = agent.dict(exclude_none=True)
|
|
3363
|
+
scrape_params['agent'] = agent.dict(by_alias=True, exclude_none=True)
|
|
3347
3364
|
if zero_data_retention is not None:
|
|
3348
3365
|
scrape_params['zeroDataRetention'] = zero_data_retention
|
|
3349
3366
|
|
|
@@ -3352,7 +3369,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3352
3369
|
|
|
3353
3370
|
# Create final params object
|
|
3354
3371
|
final_params = ScrapeParams(**scrape_params)
|
|
3355
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
3372
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
3356
3373
|
params_dict['urls'] = urls
|
|
3357
3374
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
3358
3375
|
|
|
@@ -3460,7 +3477,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3460
3477
|
if ignore_sitemap is not None:
|
|
3461
3478
|
crawl_params['ignoreSitemap'] = ignore_sitemap
|
|
3462
3479
|
if scrape_options is not None:
|
|
3463
|
-
crawl_params['scrapeOptions'] = scrape_options.dict(exclude_none=True)
|
|
3480
|
+
crawl_params['scrapeOptions'] = scrape_options.dict(by_alias=True, exclude_none=True)
|
|
3464
3481
|
if webhook is not None:
|
|
3465
3482
|
crawl_params['webhook'] = webhook
|
|
3466
3483
|
if deduplicate_similar_urls is not None:
|
|
@@ -3479,7 +3496,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3479
3496
|
|
|
3480
3497
|
# Create final params object
|
|
3481
3498
|
final_params = CrawlParams(**crawl_params)
|
|
3482
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
3499
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
3483
3500
|
params_dict['url'] = url
|
|
3484
3501
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
3485
3502
|
# Make request
|
|
@@ -3575,7 +3592,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3575
3592
|
if ignore_sitemap is not None:
|
|
3576
3593
|
crawl_params['ignoreSitemap'] = ignore_sitemap
|
|
3577
3594
|
if scrape_options is not None:
|
|
3578
|
-
crawl_params['scrapeOptions'] = scrape_options.dict(exclude_none=True)
|
|
3595
|
+
crawl_params['scrapeOptions'] = scrape_options.dict(by_alias=True, exclude_none=True)
|
|
3579
3596
|
if webhook is not None:
|
|
3580
3597
|
crawl_params['webhook'] = webhook
|
|
3581
3598
|
if deduplicate_similar_urls is not None:
|
|
@@ -3594,7 +3611,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3594
3611
|
|
|
3595
3612
|
# Create final params object
|
|
3596
3613
|
final_params = CrawlParams(**crawl_params)
|
|
3597
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
3614
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
3598
3615
|
params_dict['url'] = url
|
|
3599
3616
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
3600
3617
|
|
|
@@ -3732,7 +3749,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3732
3749
|
include_subdomains: Optional[bool] = None,
|
|
3733
3750
|
sitemap_only: Optional[bool] = None,
|
|
3734
3751
|
limit: Optional[int] = None,
|
|
3735
|
-
timeout: Optional[int] =
|
|
3752
|
+
timeout: Optional[int] = 30000,
|
|
3736
3753
|
params: Optional[MapParams] = None) -> MapResponse:
|
|
3737
3754
|
"""
|
|
3738
3755
|
Asynchronously map and discover links from a URL.
|
|
@@ -3760,7 +3777,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3760
3777
|
"""
|
|
3761
3778
|
map_params = {}
|
|
3762
3779
|
if params:
|
|
3763
|
-
map_params.update(params.dict(exclude_none=True))
|
|
3780
|
+
map_params.update(params.dict(by_alias=True, exclude_none=True))
|
|
3764
3781
|
|
|
3765
3782
|
# Add individual parameters
|
|
3766
3783
|
if search is not None:
|
|
@@ -3778,7 +3795,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3778
3795
|
|
|
3779
3796
|
# Create final params object
|
|
3780
3797
|
final_params = MapParams(**map_params)
|
|
3781
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
3798
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
3782
3799
|
params_dict['url'] = url
|
|
3783
3800
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
3784
3801
|
|
|
@@ -4162,7 +4179,6 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
4162
4179
|
url,
|
|
4163
4180
|
max_urls=max_urls,
|
|
4164
4181
|
show_full_text=show_full_text,
|
|
4165
|
-
cache=cache,
|
|
4166
4182
|
experimental_stream=experimental_stream
|
|
4167
4183
|
)
|
|
4168
4184
|
if not response.get('success') or 'id' not in response:
|
|
@@ -4226,7 +4242,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
4226
4242
|
)
|
|
4227
4243
|
|
|
4228
4244
|
headers = self._prepare_headers()
|
|
4229
|
-
json_data = {'url': url, **params.dict(exclude_none=True)}
|
|
4245
|
+
json_data = {'url': url, **params.dict(by_alias=True, exclude_none=True)}
|
|
4230
4246
|
json_data['origin'] = f"python-sdk@{version}"
|
|
4231
4247
|
|
|
4232
4248
|
try:
|
|
@@ -4411,7 +4427,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
4411
4427
|
|
|
4412
4428
|
headers = self._prepare_headers()
|
|
4413
4429
|
|
|
4414
|
-
json_data = {'query': query, **research_params.dict(exclude_none=True)}
|
|
4430
|
+
json_data = {'query': query, **research_params.dict(by_alias=True, exclude_none=True)}
|
|
4415
4431
|
json_data['origin'] = f"python-sdk@{version}"
|
|
4416
4432
|
|
|
4417
4433
|
try:
|
|
@@ -4467,7 +4483,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
4467
4483
|
lang: Optional[str] = None,
|
|
4468
4484
|
country: Optional[str] = None,
|
|
4469
4485
|
location: Optional[str] = None,
|
|
4470
|
-
timeout: Optional[int] =
|
|
4486
|
+
timeout: Optional[int] = 30000,
|
|
4471
4487
|
scrape_options: Optional[ScrapeOptions] = None,
|
|
4472
4488
|
params: Optional[Union[Dict[str, Any], SearchParams]] = None,
|
|
4473
4489
|
**kwargs) -> SearchResponse:
|
|
@@ -4503,7 +4519,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
4503
4519
|
if isinstance(params, dict):
|
|
4504
4520
|
search_params.update(params)
|
|
4505
4521
|
else:
|
|
4506
|
-
search_params.update(params.dict(exclude_none=True))
|
|
4522
|
+
search_params.update(params.dict(by_alias=True, exclude_none=True))
|
|
4507
4523
|
|
|
4508
4524
|
# Add individual parameters
|
|
4509
4525
|
if limit is not None:
|
|
@@ -4521,14 +4537,14 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
4521
4537
|
if timeout is not None:
|
|
4522
4538
|
search_params['timeout'] = timeout
|
|
4523
4539
|
if scrape_options is not None:
|
|
4524
|
-
search_params['scrapeOptions'] = scrape_options.dict(exclude_none=True)
|
|
4540
|
+
search_params['scrapeOptions'] = scrape_options.dict(by_alias=True, exclude_none=True)
|
|
4525
4541
|
|
|
4526
4542
|
# Add any additional kwargs
|
|
4527
4543
|
search_params.update(kwargs)
|
|
4528
4544
|
|
|
4529
4545
|
# Create final params object
|
|
4530
4546
|
final_params = SearchParams(query=query, **search_params)
|
|
4531
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
4547
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
4532
4548
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
4533
4549
|
|
|
4534
4550
|
return await self._async_post_request(
|