firecrawl 2.16.2__tar.gz → 2.16.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of firecrawl might be problematic. Click here for more details.
- {firecrawl-2.16.2 → firecrawl-2.16.5}/PKG-INFO +1 -1
- {firecrawl-2.16.2 → firecrawl-2.16.5}/firecrawl/__init__.py +1 -1
- {firecrawl-2.16.2 → firecrawl-2.16.5}/firecrawl/firecrawl.py +94 -77
- {firecrawl-2.16.2 → firecrawl-2.16.5}/firecrawl.egg-info/PKG-INFO +1 -1
- {firecrawl-2.16.2 → firecrawl-2.16.5}/LICENSE +0 -0
- {firecrawl-2.16.2 → firecrawl-2.16.5}/README.md +0 -0
- {firecrawl-2.16.2 → firecrawl-2.16.5}/firecrawl/__tests__/e2e_withAuth/__init__.py +0 -0
- {firecrawl-2.16.2 → firecrawl-2.16.5}/firecrawl/__tests__/e2e_withAuth/test.py +0 -0
- {firecrawl-2.16.2 → firecrawl-2.16.5}/firecrawl/__tests__/v1/e2e_withAuth/__init__.py +0 -0
- {firecrawl-2.16.2 → firecrawl-2.16.5}/firecrawl/__tests__/v1/e2e_withAuth/test.py +0 -0
- {firecrawl-2.16.2 → firecrawl-2.16.5}/firecrawl.egg-info/SOURCES.txt +0 -0
- {firecrawl-2.16.2 → firecrawl-2.16.5}/firecrawl.egg-info/dependency_links.txt +0 -0
- {firecrawl-2.16.2 → firecrawl-2.16.5}/firecrawl.egg-info/requires.txt +0 -0
- {firecrawl-2.16.2 → firecrawl-2.16.5}/firecrawl.egg-info/top_level.txt +0 -0
- {firecrawl-2.16.2 → firecrawl-2.16.5}/pyproject.toml +0 -0
- {firecrawl-2.16.2 → firecrawl-2.16.5}/setup.cfg +0 -0
- {firecrawl-2.16.2 → firecrawl-2.16.5}/setup.py +0 -0
- {firecrawl-2.16.2 → firecrawl-2.16.5}/tests/test_change_tracking.py +0 -0
|
@@ -13,7 +13,7 @@ import os
|
|
|
13
13
|
|
|
14
14
|
from .firecrawl import FirecrawlApp, AsyncFirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
|
|
15
15
|
|
|
16
|
-
__version__ = "2.16.
|
|
16
|
+
__version__ = "2.16.5"
|
|
17
17
|
|
|
18
18
|
# Define the logger for the Firecrawl project
|
|
19
19
|
logger: logging.Logger = logging.getLogger("firecrawl")
|
|
@@ -24,12 +24,6 @@ import aiohttp
|
|
|
24
24
|
import asyncio
|
|
25
25
|
from pydantic import Field
|
|
26
26
|
|
|
27
|
-
# Suppress Pydantic warnings about attribute shadowing
|
|
28
|
-
warnings.filterwarnings("ignore", message="Field name \"json\" in \"FirecrawlDocument\" shadows an attribute in parent \"BaseModel\"")
|
|
29
|
-
warnings.filterwarnings("ignore", message="Field name \"json\" in \"ChangeTrackingData\" shadows an attribute in parent \"BaseModel\"")
|
|
30
|
-
warnings.filterwarnings("ignore", message="Field name \"schema\" in \"JsonConfig\" shadows an attribute in parent \"BaseModel\"")
|
|
31
|
-
warnings.filterwarnings("ignore", message="Field name \"schema\" in \"ExtractParams\" shadows an attribute in parent \"BaseModel\"")
|
|
32
|
-
warnings.filterwarnings("ignore", message="Field name \"schema\" in \"ChangeTrackingOptions\" shadows an attribute in parent \"BaseModel\"")
|
|
33
27
|
|
|
34
28
|
def get_version():
|
|
35
29
|
try:
|
|
@@ -106,7 +100,7 @@ class ChangeTrackingData(pydantic.BaseModel):
|
|
|
106
100
|
changeStatus: str # "new" | "same" | "changed" | "removed"
|
|
107
101
|
visibility: str # "visible" | "hidden"
|
|
108
102
|
diff: Optional[Dict[str, Any]] = None
|
|
109
|
-
|
|
103
|
+
json_field: Optional[Any] = pydantic.Field(None, alias='json')
|
|
110
104
|
|
|
111
105
|
class FirecrawlDocument(pydantic.BaseModel, Generic[T]):
|
|
112
106
|
"""Document retrieved or processed by Firecrawl."""
|
|
@@ -116,7 +110,7 @@ class FirecrawlDocument(pydantic.BaseModel, Generic[T]):
|
|
|
116
110
|
rawHtml: Optional[str] = None
|
|
117
111
|
links: Optional[List[str]] = None
|
|
118
112
|
extract: Optional[T] = None
|
|
119
|
-
|
|
113
|
+
json_field: Optional[T] = pydantic.Field(None, alias='json')
|
|
120
114
|
screenshot: Optional[str] = None
|
|
121
115
|
metadata: Optional[Any] = None
|
|
122
116
|
actions: Optional[ActionsResult] = None
|
|
@@ -139,7 +133,7 @@ class WebhookConfig(pydantic.BaseModel):
|
|
|
139
133
|
class ChangeTrackingOptions(pydantic.BaseModel):
|
|
140
134
|
"""Configuration for change tracking."""
|
|
141
135
|
modes: Optional[List[Literal["git-diff", "json"]]] = None
|
|
142
|
-
|
|
136
|
+
schema_field: Optional[Any] = pydantic.Field(None, alias='schema')
|
|
143
137
|
prompt: Optional[str] = None
|
|
144
138
|
tag: Optional[str] = None
|
|
145
139
|
|
|
@@ -219,7 +213,7 @@ class ExtractAgent(pydantic.BaseModel):
|
|
|
219
213
|
class JsonConfig(pydantic.BaseModel):
|
|
220
214
|
"""Configuration for extraction."""
|
|
221
215
|
prompt: Optional[str] = None
|
|
222
|
-
|
|
216
|
+
schema_field: Optional[Any] = pydantic.Field(None, alias='schema')
|
|
223
217
|
systemPrompt: Optional[str] = None
|
|
224
218
|
agent: Optional[ExtractAgent] = None
|
|
225
219
|
|
|
@@ -317,7 +311,7 @@ class MapResponse(pydantic.BaseModel):
|
|
|
317
311
|
class ExtractParams(pydantic.BaseModel):
|
|
318
312
|
"""Parameters for extracting information from URLs."""
|
|
319
313
|
prompt: Optional[str] = None
|
|
320
|
-
|
|
314
|
+
schema_field: Optional[Any] = pydantic.Field(None, alias='schema')
|
|
321
315
|
systemPrompt: Optional[str] = None
|
|
322
316
|
allowExternalLinks: Optional[bool] = None
|
|
323
317
|
enableWebSearch: Optional[bool] = None
|
|
@@ -431,7 +425,7 @@ class ExtractParams(pydantic.BaseModel):
|
|
|
431
425
|
Parameters for the extract operation.
|
|
432
426
|
"""
|
|
433
427
|
prompt: Optional[str] = None
|
|
434
|
-
|
|
428
|
+
schema_field: Optional[Any] = pydantic.Field(None, alias='schema')
|
|
435
429
|
system_prompt: Optional[str] = None
|
|
436
430
|
allow_external_links: Optional[bool] = False
|
|
437
431
|
enable_web_search: Optional[bool] = False
|
|
@@ -484,6 +478,7 @@ class FirecrawlApp:
|
|
|
484
478
|
max_age: Optional[int] = None,
|
|
485
479
|
store_in_cache: Optional[bool] = None,
|
|
486
480
|
zero_data_retention: Optional[bool] = None,
|
|
481
|
+
agent: Optional[AgentOptions] = None,
|
|
487
482
|
**kwargs) -> ScrapeResponse[Any]:
|
|
488
483
|
"""
|
|
489
484
|
Scrape and extract content from a URL.
|
|
@@ -508,6 +503,7 @@ class FirecrawlApp:
|
|
|
508
503
|
actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction, PDFAction]]]): Actions to perform
|
|
509
504
|
change_tracking_options (Optional[ChangeTrackingOptions]): Change tracking settings
|
|
510
505
|
zero_data_retention (Optional[bool]): Whether to delete data after scrape is done
|
|
506
|
+
agent (Optional[AgentOptions]): Agent configuration for FIRE-1 model
|
|
511
507
|
|
|
512
508
|
|
|
513
509
|
Returns:
|
|
@@ -520,6 +516,9 @@ class FirecrawlApp:
|
|
|
520
516
|
Raises:
|
|
521
517
|
Exception: If scraping fails
|
|
522
518
|
"""
|
|
519
|
+
# Validate any additional kwargs
|
|
520
|
+
self._validate_kwargs(kwargs, "scrape_url")
|
|
521
|
+
|
|
523
522
|
_headers = self._prepare_headers()
|
|
524
523
|
|
|
525
524
|
# Build scrape parameters
|
|
@@ -544,7 +543,7 @@ class FirecrawlApp:
|
|
|
544
543
|
if timeout:
|
|
545
544
|
scrape_params['timeout'] = timeout
|
|
546
545
|
if location:
|
|
547
|
-
scrape_params['location'] = location.dict(exclude_none=True)
|
|
546
|
+
scrape_params['location'] = location.dict(by_alias=True, exclude_none=True)
|
|
548
547
|
if mobile is not None:
|
|
549
548
|
scrape_params['mobile'] = mobile
|
|
550
549
|
if skip_tls_verification is not None:
|
|
@@ -561,22 +560,24 @@ class FirecrawlApp:
|
|
|
561
560
|
extract = self._ensure_schema_dict(extract)
|
|
562
561
|
if isinstance(extract, dict) and "schema" in extract:
|
|
563
562
|
extract["schema"] = self._ensure_schema_dict(extract["schema"])
|
|
564
|
-
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
|
|
563
|
+
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(by_alias=True, exclude_none=True)
|
|
565
564
|
if json_options is not None:
|
|
566
565
|
json_options = self._ensure_schema_dict(json_options)
|
|
567
566
|
if isinstance(json_options, dict) and "schema" in json_options:
|
|
568
567
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
|
569
|
-
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
|
568
|
+
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(by_alias=True, exclude_none=True)
|
|
570
569
|
if actions:
|
|
571
|
-
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(exclude_none=True) for action in actions]
|
|
570
|
+
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(by_alias=True, exclude_none=True) for action in actions]
|
|
572
571
|
if change_tracking_options:
|
|
573
|
-
scrape_params['changeTrackingOptions'] = change_tracking_options if isinstance(change_tracking_options, dict) else change_tracking_options.dict(exclude_none=True)
|
|
572
|
+
scrape_params['changeTrackingOptions'] = change_tracking_options if isinstance(change_tracking_options, dict) else change_tracking_options.dict(by_alias=True, exclude_none=True)
|
|
574
573
|
if max_age is not None:
|
|
575
574
|
scrape_params['maxAge'] = max_age
|
|
576
575
|
if store_in_cache is not None:
|
|
577
576
|
scrape_params['storeInCache'] = store_in_cache
|
|
578
577
|
if zero_data_retention is not None:
|
|
579
578
|
scrape_params['zeroDataRetention'] = zero_data_retention
|
|
579
|
+
if agent is not None:
|
|
580
|
+
scrape_params['agent'] = agent.dict(by_alias=True, exclude_none=True)
|
|
580
581
|
|
|
581
582
|
scrape_params.update(kwargs)
|
|
582
583
|
|
|
@@ -667,7 +668,7 @@ class FirecrawlApp:
|
|
|
667
668
|
if timeout is not None:
|
|
668
669
|
search_params['timeout'] = timeout
|
|
669
670
|
if scrape_options is not None:
|
|
670
|
-
search_params['scrapeOptions'] = scrape_options.dict(exclude_none=True)
|
|
671
|
+
search_params['scrapeOptions'] = scrape_options.dict(by_alias=True, exclude_none=True)
|
|
671
672
|
|
|
672
673
|
# Add any additional kwargs
|
|
673
674
|
search_params.update(kwargs)
|
|
@@ -675,7 +676,7 @@ class FirecrawlApp:
|
|
|
675
676
|
|
|
676
677
|
# Create final params object
|
|
677
678
|
final_params = SearchParams(query=query, **search_params)
|
|
678
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
679
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
679
680
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
680
681
|
|
|
681
682
|
if _integration:
|
|
@@ -789,7 +790,7 @@ class FirecrawlApp:
|
|
|
789
790
|
if ignore_sitemap is not None:
|
|
790
791
|
crawl_params['ignoreSitemap'] = ignore_sitemap
|
|
791
792
|
if scrape_options is not None:
|
|
792
|
-
crawl_params['scrapeOptions'] = scrape_options.dict(exclude_none=True)
|
|
793
|
+
crawl_params['scrapeOptions'] = scrape_options.dict(by_alias=True, exclude_none=True)
|
|
793
794
|
if webhook is not None:
|
|
794
795
|
crawl_params['webhook'] = webhook
|
|
795
796
|
if deduplicate_similar_urls is not None:
|
|
@@ -812,7 +813,7 @@ class FirecrawlApp:
|
|
|
812
813
|
|
|
813
814
|
# Create final params object
|
|
814
815
|
final_params = CrawlParams(**crawl_params)
|
|
815
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
816
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
816
817
|
params_dict['url'] = url
|
|
817
818
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
818
819
|
|
|
@@ -918,7 +919,7 @@ class FirecrawlApp:
|
|
|
918
919
|
if ignore_sitemap is not None:
|
|
919
920
|
crawl_params['ignoreSitemap'] = ignore_sitemap
|
|
920
921
|
if scrape_options is not None:
|
|
921
|
-
crawl_params['scrapeOptions'] = scrape_options.dict(exclude_none=True)
|
|
922
|
+
crawl_params['scrapeOptions'] = scrape_options.dict(by_alias=True, exclude_none=True)
|
|
922
923
|
if webhook is not None:
|
|
923
924
|
crawl_params['webhook'] = webhook
|
|
924
925
|
if deduplicate_similar_urls is not None:
|
|
@@ -940,7 +941,7 @@ class FirecrawlApp:
|
|
|
940
941
|
|
|
941
942
|
# Create final params object
|
|
942
943
|
final_params = CrawlParams(**crawl_params)
|
|
943
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
944
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
944
945
|
params_dict['url'] = url
|
|
945
946
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
946
947
|
|
|
@@ -1239,7 +1240,7 @@ class FirecrawlApp:
|
|
|
1239
1240
|
|
|
1240
1241
|
# Create final params object
|
|
1241
1242
|
final_params = MapParams(**map_params)
|
|
1242
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
1243
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
1243
1244
|
params_dict['url'] = url
|
|
1244
1245
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
1245
1246
|
|
|
@@ -1351,7 +1352,7 @@ class FirecrawlApp:
|
|
|
1351
1352
|
if timeout is not None:
|
|
1352
1353
|
scrape_params['timeout'] = timeout
|
|
1353
1354
|
if location is not None:
|
|
1354
|
-
scrape_params['location'] = location.dict(exclude_none=True)
|
|
1355
|
+
scrape_params['location'] = location.dict(by_alias=True, exclude_none=True)
|
|
1355
1356
|
if mobile is not None:
|
|
1356
1357
|
scrape_params['mobile'] = mobile
|
|
1357
1358
|
if skip_tls_verification is not None:
|
|
@@ -1366,16 +1367,16 @@ class FirecrawlApp:
|
|
|
1366
1367
|
extract = self._ensure_schema_dict(extract)
|
|
1367
1368
|
if isinstance(extract, dict) and "schema" in extract:
|
|
1368
1369
|
extract["schema"] = self._ensure_schema_dict(extract["schema"])
|
|
1369
|
-
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
|
|
1370
|
+
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(by_alias=True, exclude_none=True)
|
|
1370
1371
|
if json_options is not None:
|
|
1371
1372
|
json_options = self._ensure_schema_dict(json_options)
|
|
1372
1373
|
if isinstance(json_options, dict) and "schema" in json_options:
|
|
1373
1374
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
|
1374
|
-
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
|
1375
|
+
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(by_alias=True, exclude_none=True)
|
|
1375
1376
|
if actions:
|
|
1376
|
-
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(exclude_none=True) for action in actions]
|
|
1377
|
+
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(by_alias=True, exclude_none=True) for action in actions]
|
|
1377
1378
|
if agent is not None:
|
|
1378
|
-
scrape_params['agent'] = agent.dict(exclude_none=True)
|
|
1379
|
+
scrape_params['agent'] = agent.dict(by_alias=True, exclude_none=True)
|
|
1379
1380
|
if max_concurrency is not None:
|
|
1380
1381
|
scrape_params['maxConcurrency'] = max_concurrency
|
|
1381
1382
|
if zero_data_retention is not None:
|
|
@@ -1386,7 +1387,7 @@ class FirecrawlApp:
|
|
|
1386
1387
|
|
|
1387
1388
|
# Create final params object
|
|
1388
1389
|
final_params = ScrapeParams(**scrape_params)
|
|
1389
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
1390
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
1390
1391
|
params_dict['urls'] = urls
|
|
1391
1392
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
1392
1393
|
|
|
@@ -1492,7 +1493,7 @@ class FirecrawlApp:
|
|
|
1492
1493
|
if timeout is not None:
|
|
1493
1494
|
scrape_params['timeout'] = timeout
|
|
1494
1495
|
if location is not None:
|
|
1495
|
-
scrape_params['location'] = location.dict(exclude_none=True)
|
|
1496
|
+
scrape_params['location'] = location.dict(by_alias=True, exclude_none=True)
|
|
1496
1497
|
if mobile is not None:
|
|
1497
1498
|
scrape_params['mobile'] = mobile
|
|
1498
1499
|
if skip_tls_verification is not None:
|
|
@@ -1507,16 +1508,16 @@ class FirecrawlApp:
|
|
|
1507
1508
|
extract = self._ensure_schema_dict(extract)
|
|
1508
1509
|
if isinstance(extract, dict) and "schema" in extract:
|
|
1509
1510
|
extract["schema"] = self._ensure_schema_dict(extract["schema"])
|
|
1510
|
-
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
|
|
1511
|
+
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(by_alias=True, exclude_none=True)
|
|
1511
1512
|
if json_options is not None:
|
|
1512
1513
|
json_options = self._ensure_schema_dict(json_options)
|
|
1513
1514
|
if isinstance(json_options, dict) and "schema" in json_options:
|
|
1514
1515
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
|
1515
|
-
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
|
1516
|
+
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(by_alias=True, exclude_none=True)
|
|
1516
1517
|
if actions:
|
|
1517
|
-
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(exclude_none=True) for action in actions]
|
|
1518
|
+
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(by_alias=True, exclude_none=True) for action in actions]
|
|
1518
1519
|
if agent is not None:
|
|
1519
|
-
scrape_params['agent'] = agent.dict(exclude_none=True)
|
|
1520
|
+
scrape_params['agent'] = agent.dict(by_alias=True, exclude_none=True)
|
|
1520
1521
|
if max_concurrency is not None:
|
|
1521
1522
|
scrape_params['maxConcurrency'] = max_concurrency
|
|
1522
1523
|
if zero_data_retention is not None:
|
|
@@ -1527,7 +1528,7 @@ class FirecrawlApp:
|
|
|
1527
1528
|
|
|
1528
1529
|
# Create final params object
|
|
1529
1530
|
final_params = ScrapeParams(**scrape_params)
|
|
1530
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
1531
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
1531
1532
|
params_dict['urls'] = urls
|
|
1532
1533
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
1533
1534
|
|
|
@@ -1628,7 +1629,7 @@ class FirecrawlApp:
|
|
|
1628
1629
|
if timeout is not None:
|
|
1629
1630
|
scrape_params['timeout'] = timeout
|
|
1630
1631
|
if location is not None:
|
|
1631
|
-
scrape_params['location'] = location.dict(exclude_none=True)
|
|
1632
|
+
scrape_params['location'] = location.dict(by_alias=True, exclude_none=True)
|
|
1632
1633
|
if mobile is not None:
|
|
1633
1634
|
scrape_params['mobile'] = mobile
|
|
1634
1635
|
if skip_tls_verification is not None:
|
|
@@ -1643,16 +1644,16 @@ class FirecrawlApp:
|
|
|
1643
1644
|
extract = self._ensure_schema_dict(extract)
|
|
1644
1645
|
if isinstance(extract, dict) and "schema" in extract:
|
|
1645
1646
|
extract["schema"] = self._ensure_schema_dict(extract["schema"])
|
|
1646
|
-
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
|
|
1647
|
+
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(by_alias=True, exclude_none=True)
|
|
1647
1648
|
if json_options is not None:
|
|
1648
1649
|
json_options = self._ensure_schema_dict(json_options)
|
|
1649
1650
|
if isinstance(json_options, dict) and "schema" in json_options:
|
|
1650
1651
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
|
1651
|
-
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
|
1652
|
+
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(by_alias=True, exclude_none=True)
|
|
1652
1653
|
if actions:
|
|
1653
|
-
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(exclude_none=True) for action in actions]
|
|
1654
|
+
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(by_alias=True, exclude_none=True) for action in actions]
|
|
1654
1655
|
if agent is not None:
|
|
1655
|
-
scrape_params['agent'] = agent.dict(exclude_none=True)
|
|
1656
|
+
scrape_params['agent'] = agent.dict(by_alias=True, exclude_none=True)
|
|
1656
1657
|
if max_concurrency is not None:
|
|
1657
1658
|
scrape_params['maxConcurrency'] = max_concurrency
|
|
1658
1659
|
if zero_data_retention is not None:
|
|
@@ -1663,7 +1664,7 @@ class FirecrawlApp:
|
|
|
1663
1664
|
|
|
1664
1665
|
# Create final params object
|
|
1665
1666
|
final_params = ScrapeParams(**scrape_params)
|
|
1666
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
1667
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
1667
1668
|
params_dict['urls'] = urls
|
|
1668
1669
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
1669
1670
|
|
|
@@ -2092,7 +2093,7 @@ class FirecrawlApp:
|
|
|
2092
2093
|
)
|
|
2093
2094
|
|
|
2094
2095
|
headers = self._prepare_headers()
|
|
2095
|
-
json_data = {'url': url, **params.dict(exclude_none=True)}
|
|
2096
|
+
json_data = {'url': url, **params.dict(by_alias=True, exclude_none=True)}
|
|
2096
2097
|
json_data['origin'] = f"python-sdk@{version}"
|
|
2097
2098
|
|
|
2098
2099
|
try:
|
|
@@ -2333,10 +2334,22 @@ class FirecrawlApp:
|
|
|
2333
2334
|
Exception: An exception with a message containing the status code and error details from the response.
|
|
2334
2335
|
"""
|
|
2335
2336
|
try:
|
|
2336
|
-
|
|
2337
|
-
|
|
2337
|
+
response_json = response.json()
|
|
2338
|
+
error_message = response_json.get('error', 'No error message provided.')
|
|
2339
|
+
error_details = response_json.get('details', 'No additional error details provided.')
|
|
2338
2340
|
except:
|
|
2339
|
-
|
|
2341
|
+
# If we can't parse JSON, provide a helpful error message with response content
|
|
2342
|
+
try:
|
|
2343
|
+
response_text = response.text[:500] # Limit to first 500 chars
|
|
2344
|
+
if response_text.strip():
|
|
2345
|
+
error_message = f"Server returned non-JSON response: {response_text}"
|
|
2346
|
+
error_details = f"Full response status: {response.status_code}"
|
|
2347
|
+
else:
|
|
2348
|
+
error_message = f"Server returned empty response with status {response.status_code}"
|
|
2349
|
+
error_details = "No additional details available"
|
|
2350
|
+
except ValueError:
|
|
2351
|
+
error_message = f"Server returned unreadable response with status {response.status_code}"
|
|
2352
|
+
error_details = "No additional details available"
|
|
2340
2353
|
|
|
2341
2354
|
message = self._get_error_message(response.status_code, action, error_message, error_details)
|
|
2342
2355
|
|
|
@@ -2359,7 +2372,7 @@ class FirecrawlApp:
|
|
|
2359
2372
|
if status_code == 402:
|
|
2360
2373
|
return f"Payment Required: Failed to {action}. {error_message} - {error_details}"
|
|
2361
2374
|
elif status_code == 403:
|
|
2362
|
-
|
|
2375
|
+
return f"Website Not Supported: Failed to {action}. {error_message} - {error_details}"
|
|
2363
2376
|
elif status_code == 408:
|
|
2364
2377
|
return f"Request Timeout: Failed to {action} as the request timed out. {error_message} - {error_details}"
|
|
2365
2378
|
elif status_code == 409:
|
|
@@ -2513,7 +2526,7 @@ class FirecrawlApp:
|
|
|
2513
2526
|
|
|
2514
2527
|
headers = self._prepare_headers()
|
|
2515
2528
|
|
|
2516
|
-
json_data = {'query': query, **research_params.dict(exclude_none=True)}
|
|
2529
|
+
json_data = {'query': query, **research_params.dict(by_alias=True, exclude_none=True)}
|
|
2517
2530
|
json_data['origin'] = f"python-sdk@{version}"
|
|
2518
2531
|
|
|
2519
2532
|
# Handle json options schema if present
|
|
@@ -2597,7 +2610,7 @@ class FirecrawlApp:
|
|
|
2597
2610
|
method_params = {
|
|
2598
2611
|
"scrape_url": {"formats", "include_tags", "exclude_tags", "only_main_content", "wait_for",
|
|
2599
2612
|
"timeout", "location", "mobile", "skip_tls_verification", "remove_base64_images",
|
|
2600
|
-
"block_ads", "proxy", "extract", "json_options", "actions", "change_tracking_options", "integration"},
|
|
2613
|
+
"block_ads", "proxy", "extract", "json_options", "actions", "change_tracking_options", "max_age", "agent", "integration"},
|
|
2601
2614
|
"search": {"limit", "tbs", "filter", "lang", "country", "location", "timeout", "scrape_options", "integration"},
|
|
2602
2615
|
"crawl_url": {"include_paths", "exclude_paths", "max_depth", "max_discovery_depth", "limit",
|
|
2603
2616
|
"allow_backward_links", "allow_external_links", "ignore_sitemap", "scrape_options",
|
|
@@ -2983,6 +2996,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
2983
2996
|
extract: Optional[JsonConfig] = None,
|
|
2984
2997
|
json_options: Optional[JsonConfig] = None,
|
|
2985
2998
|
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction, PDFAction]]] = None,
|
|
2999
|
+
agent: Optional[AgentOptions] = None,
|
|
2986
3000
|
**kwargs) -> ScrapeResponse[Any]:
|
|
2987
3001
|
"""
|
|
2988
3002
|
Scrape a single URL asynchronously.
|
|
@@ -3005,6 +3019,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3005
3019
|
extract (Optional[JsonConfig]): Content extraction settings
|
|
3006
3020
|
json_options (Optional[JsonConfig]): JSON extraction settings
|
|
3007
3021
|
actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction, PDFAction]]]): Actions to perform
|
|
3022
|
+
agent (Optional[AgentOptions]): Agent configuration for FIRE-1 model
|
|
3008
3023
|
**kwargs: Additional parameters to pass to the API
|
|
3009
3024
|
|
|
3010
3025
|
Returns:
|
|
@@ -3049,7 +3064,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3049
3064
|
if timeout:
|
|
3050
3065
|
scrape_params['timeout'] = timeout
|
|
3051
3066
|
if location:
|
|
3052
|
-
scrape_params['location'] = location.dict(exclude_none=True)
|
|
3067
|
+
scrape_params['location'] = location.dict(by_alias=True, exclude_none=True)
|
|
3053
3068
|
if mobile is not None:
|
|
3054
3069
|
scrape_params['mobile'] = mobile
|
|
3055
3070
|
if skip_tls_verification is not None:
|
|
@@ -3066,14 +3081,16 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3066
3081
|
extract = self._ensure_schema_dict(extract)
|
|
3067
3082
|
if isinstance(extract, dict) and "schema" in extract:
|
|
3068
3083
|
extract["schema"] = self._ensure_schema_dict(extract["schema"])
|
|
3069
|
-
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
|
|
3084
|
+
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(by_alias=True, exclude_none=True)
|
|
3070
3085
|
if json_options is not None:
|
|
3071
3086
|
json_options = self._ensure_schema_dict(json_options)
|
|
3072
3087
|
if isinstance(json_options, dict) and "schema" in json_options:
|
|
3073
3088
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
|
3074
|
-
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
|
3089
|
+
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(by_alias=True, exclude_none=True)
|
|
3075
3090
|
if actions:
|
|
3076
|
-
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(exclude_none=True) for action in actions]
|
|
3091
|
+
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(by_alias=True, exclude_none=True) for action in actions]
|
|
3092
|
+
if agent is not None:
|
|
3093
|
+
scrape_params['agent'] = agent.dict(by_alias=True, exclude_none=True)
|
|
3077
3094
|
if 'extract' in scrape_params and scrape_params['extract'] and 'schema' in scrape_params['extract']:
|
|
3078
3095
|
scrape_params['extract']['schema'] = self._ensure_schema_dict(scrape_params['extract']['schema'])
|
|
3079
3096
|
if 'jsonOptions' in scrape_params and scrape_params['jsonOptions'] and 'schema' in scrape_params['jsonOptions']:
|
|
@@ -3177,7 +3194,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3177
3194
|
if timeout is not None:
|
|
3178
3195
|
scrape_params['timeout'] = timeout
|
|
3179
3196
|
if location is not None:
|
|
3180
|
-
scrape_params['location'] = location.dict(exclude_none=True)
|
|
3197
|
+
scrape_params['location'] = location.dict(by_alias=True, exclude_none=True)
|
|
3181
3198
|
if mobile is not None:
|
|
3182
3199
|
scrape_params['mobile'] = mobile
|
|
3183
3200
|
if skip_tls_verification is not None:
|
|
@@ -3192,22 +3209,23 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3192
3209
|
extract = self._ensure_schema_dict(extract)
|
|
3193
3210
|
if isinstance(extract, dict) and "schema" in extract:
|
|
3194
3211
|
extract["schema"] = self._ensure_schema_dict(extract["schema"])
|
|
3195
|
-
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
|
|
3212
|
+
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(by_alias=True, exclude_none=True)
|
|
3196
3213
|
if json_options is not None:
|
|
3197
3214
|
json_options = self._ensure_schema_dict(json_options)
|
|
3198
3215
|
if isinstance(json_options, dict) and "schema" in json_options:
|
|
3199
3216
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
|
3200
|
-
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
|
3201
|
-
|
|
3217
|
+
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(by_alias=True, exclude_none=True)
|
|
3218
|
+
if actions is not None:
|
|
3219
|
+
scrape_params['actions'] = [action.dict(by_alias=True, exclude_none=True) for action in actions]
|
|
3202
3220
|
if agent is not None:
|
|
3203
|
-
scrape_params['agent'] = agent.dict(exclude_none=True)
|
|
3221
|
+
scrape_params['agent'] = agent.dict(by_alias=True, exclude_none=True)
|
|
3204
3222
|
|
|
3205
3223
|
# Add any additional kwargs
|
|
3206
3224
|
scrape_params.update(kwargs)
|
|
3207
3225
|
|
|
3208
3226
|
# Create final params object
|
|
3209
3227
|
final_params = ScrapeParams(**scrape_params)
|
|
3210
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
3228
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
3211
3229
|
params_dict['urls'] = urls
|
|
3212
3230
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
3213
3231
|
|
|
@@ -3316,7 +3334,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3316
3334
|
if timeout is not None:
|
|
3317
3335
|
scrape_params['timeout'] = timeout
|
|
3318
3336
|
if location is not None:
|
|
3319
|
-
scrape_params['location'] = location.dict(exclude_none=True)
|
|
3337
|
+
scrape_params['location'] = location.dict(by_alias=True, exclude_none=True)
|
|
3320
3338
|
if mobile is not None:
|
|
3321
3339
|
scrape_params['mobile'] = mobile
|
|
3322
3340
|
if skip_tls_verification is not None:
|
|
@@ -3331,16 +3349,16 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3331
3349
|
extract = self._ensure_schema_dict(extract)
|
|
3332
3350
|
if isinstance(extract, dict) and "schema" in extract:
|
|
3333
3351
|
extract["schema"] = self._ensure_schema_dict(extract["schema"])
|
|
3334
|
-
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
|
|
3352
|
+
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(by_alias=True, exclude_none=True)
|
|
3335
3353
|
if json_options is not None:
|
|
3336
3354
|
json_options = self._ensure_schema_dict(json_options)
|
|
3337
3355
|
if isinstance(json_options, dict) and "schema" in json_options:
|
|
3338
3356
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
|
3339
|
-
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
|
3357
|
+
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(by_alias=True, exclude_none=True)
|
|
3340
3358
|
if actions:
|
|
3341
|
-
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(exclude_none=True) for action in actions]
|
|
3359
|
+
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(by_alias=True, exclude_none=True) for action in actions]
|
|
3342
3360
|
if agent is not None:
|
|
3343
|
-
scrape_params['agent'] = agent.dict(exclude_none=True)
|
|
3361
|
+
scrape_params['agent'] = agent.dict(by_alias=True, exclude_none=True)
|
|
3344
3362
|
if zero_data_retention is not None:
|
|
3345
3363
|
scrape_params['zeroDataRetention'] = zero_data_retention
|
|
3346
3364
|
|
|
@@ -3349,7 +3367,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3349
3367
|
|
|
3350
3368
|
# Create final params object
|
|
3351
3369
|
final_params = ScrapeParams(**scrape_params)
|
|
3352
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
3370
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
3353
3371
|
params_dict['urls'] = urls
|
|
3354
3372
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
3355
3373
|
|
|
@@ -3457,7 +3475,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3457
3475
|
if ignore_sitemap is not None:
|
|
3458
3476
|
crawl_params['ignoreSitemap'] = ignore_sitemap
|
|
3459
3477
|
if scrape_options is not None:
|
|
3460
|
-
crawl_params['scrapeOptions'] = scrape_options.dict(exclude_none=True)
|
|
3478
|
+
crawl_params['scrapeOptions'] = scrape_options.dict(by_alias=True, exclude_none=True)
|
|
3461
3479
|
if webhook is not None:
|
|
3462
3480
|
crawl_params['webhook'] = webhook
|
|
3463
3481
|
if deduplicate_similar_urls is not None:
|
|
@@ -3476,7 +3494,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3476
3494
|
|
|
3477
3495
|
# Create final params object
|
|
3478
3496
|
final_params = CrawlParams(**crawl_params)
|
|
3479
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
3497
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
3480
3498
|
params_dict['url'] = url
|
|
3481
3499
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
3482
3500
|
# Make request
|
|
@@ -3572,7 +3590,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3572
3590
|
if ignore_sitemap is not None:
|
|
3573
3591
|
crawl_params['ignoreSitemap'] = ignore_sitemap
|
|
3574
3592
|
if scrape_options is not None:
|
|
3575
|
-
crawl_params['scrapeOptions'] = scrape_options.dict(exclude_none=True)
|
|
3593
|
+
crawl_params['scrapeOptions'] = scrape_options.dict(by_alias=True, exclude_none=True)
|
|
3576
3594
|
if webhook is not None:
|
|
3577
3595
|
crawl_params['webhook'] = webhook
|
|
3578
3596
|
if deduplicate_similar_urls is not None:
|
|
@@ -3591,7 +3609,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3591
3609
|
|
|
3592
3610
|
# Create final params object
|
|
3593
3611
|
final_params = CrawlParams(**crawl_params)
|
|
3594
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
3612
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
3595
3613
|
params_dict['url'] = url
|
|
3596
3614
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
3597
3615
|
|
|
@@ -3757,7 +3775,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3757
3775
|
"""
|
|
3758
3776
|
map_params = {}
|
|
3759
3777
|
if params:
|
|
3760
|
-
map_params.update(params.dict(exclude_none=True))
|
|
3778
|
+
map_params.update(params.dict(by_alias=True, exclude_none=True))
|
|
3761
3779
|
|
|
3762
3780
|
# Add individual parameters
|
|
3763
3781
|
if search is not None:
|
|
@@ -3775,7 +3793,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3775
3793
|
|
|
3776
3794
|
# Create final params object
|
|
3777
3795
|
final_params = MapParams(**map_params)
|
|
3778
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
3796
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
3779
3797
|
params_dict['url'] = url
|
|
3780
3798
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
3781
3799
|
|
|
@@ -4159,7 +4177,6 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
4159
4177
|
url,
|
|
4160
4178
|
max_urls=max_urls,
|
|
4161
4179
|
show_full_text=show_full_text,
|
|
4162
|
-
cache=cache,
|
|
4163
4180
|
experimental_stream=experimental_stream
|
|
4164
4181
|
)
|
|
4165
4182
|
if not response.get('success') or 'id' not in response:
|
|
@@ -4223,7 +4240,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
4223
4240
|
)
|
|
4224
4241
|
|
|
4225
4242
|
headers = self._prepare_headers()
|
|
4226
|
-
json_data = {'url': url, **params.dict(exclude_none=True)}
|
|
4243
|
+
json_data = {'url': url, **params.dict(by_alias=True, exclude_none=True)}
|
|
4227
4244
|
json_data['origin'] = f"python-sdk@{version}"
|
|
4228
4245
|
|
|
4229
4246
|
try:
|
|
@@ -4408,7 +4425,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
4408
4425
|
|
|
4409
4426
|
headers = self._prepare_headers()
|
|
4410
4427
|
|
|
4411
|
-
json_data = {'query': query, **research_params.dict(exclude_none=True)}
|
|
4428
|
+
json_data = {'query': query, **research_params.dict(by_alias=True, exclude_none=True)}
|
|
4412
4429
|
json_data['origin'] = f"python-sdk@{version}"
|
|
4413
4430
|
|
|
4414
4431
|
try:
|
|
@@ -4500,7 +4517,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
4500
4517
|
if isinstance(params, dict):
|
|
4501
4518
|
search_params.update(params)
|
|
4502
4519
|
else:
|
|
4503
|
-
search_params.update(params.dict(exclude_none=True))
|
|
4520
|
+
search_params.update(params.dict(by_alias=True, exclude_none=True))
|
|
4504
4521
|
|
|
4505
4522
|
# Add individual parameters
|
|
4506
4523
|
if limit is not None:
|
|
@@ -4518,14 +4535,14 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
4518
4535
|
if timeout is not None:
|
|
4519
4536
|
search_params['timeout'] = timeout
|
|
4520
4537
|
if scrape_options is not None:
|
|
4521
|
-
search_params['scrapeOptions'] = scrape_options.dict(exclude_none=True)
|
|
4538
|
+
search_params['scrapeOptions'] = scrape_options.dict(by_alias=True, exclude_none=True)
|
|
4522
4539
|
|
|
4523
4540
|
# Add any additional kwargs
|
|
4524
4541
|
search_params.update(kwargs)
|
|
4525
4542
|
|
|
4526
4543
|
# Create final params object
|
|
4527
4544
|
final_params = SearchParams(query=query, **search_params)
|
|
4528
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
4545
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
4529
4546
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
4530
4547
|
|
|
4531
4548
|
return await self._async_post_request(
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|