firecrawl 2.16.3__py3-none-any.whl → 2.16.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of firecrawl might be problematic. Click here for more details.
- firecrawl/__init__.py +1 -1
- firecrawl/firecrawl.py +91 -77
- {firecrawl-2.16.3.dist-info → firecrawl-2.16.5.dist-info}/METADATA +1 -1
- {firecrawl-2.16.3.dist-info → firecrawl-2.16.5.dist-info}/RECORD +7 -7
- {firecrawl-2.16.3.dist-info → firecrawl-2.16.5.dist-info}/LICENSE +0 -0
- {firecrawl-2.16.3.dist-info → firecrawl-2.16.5.dist-info}/WHEEL +0 -0
- {firecrawl-2.16.3.dist-info → firecrawl-2.16.5.dist-info}/top_level.txt +0 -0
firecrawl/__init__.py
CHANGED
|
@@ -13,7 +13,7 @@ import os
|
|
|
13
13
|
|
|
14
14
|
from .firecrawl import FirecrawlApp, AsyncFirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
|
|
15
15
|
|
|
16
|
-
__version__ = "2.16.
|
|
16
|
+
__version__ = "2.16.5"
|
|
17
17
|
|
|
18
18
|
# Define the logger for the Firecrawl project
|
|
19
19
|
logger: logging.Logger = logging.getLogger("firecrawl")
|
firecrawl/firecrawl.py
CHANGED
|
@@ -24,12 +24,6 @@ import aiohttp
|
|
|
24
24
|
import asyncio
|
|
25
25
|
from pydantic import Field
|
|
26
26
|
|
|
27
|
-
# Suppress Pydantic warnings about attribute shadowing
|
|
28
|
-
warnings.filterwarnings("ignore", message="Field name \"json\" in \"FirecrawlDocument\" shadows an attribute in parent \"BaseModel\"")
|
|
29
|
-
warnings.filterwarnings("ignore", message="Field name \"json\" in \"ChangeTrackingData\" shadows an attribute in parent \"BaseModel\"")
|
|
30
|
-
warnings.filterwarnings("ignore", message="Field name \"schema\" in \"JsonConfig\" shadows an attribute in parent \"BaseModel\"")
|
|
31
|
-
warnings.filterwarnings("ignore", message="Field name \"schema\" in \"ExtractParams\" shadows an attribute in parent \"BaseModel\"")
|
|
32
|
-
warnings.filterwarnings("ignore", message="Field name \"schema\" in \"ChangeTrackingOptions\" shadows an attribute in parent \"BaseModel\"")
|
|
33
27
|
|
|
34
28
|
def get_version():
|
|
35
29
|
try:
|
|
@@ -106,7 +100,7 @@ class ChangeTrackingData(pydantic.BaseModel):
|
|
|
106
100
|
changeStatus: str # "new" | "same" | "changed" | "removed"
|
|
107
101
|
visibility: str # "visible" | "hidden"
|
|
108
102
|
diff: Optional[Dict[str, Any]] = None
|
|
109
|
-
|
|
103
|
+
json_field: Optional[Any] = pydantic.Field(None, alias='json')
|
|
110
104
|
|
|
111
105
|
class FirecrawlDocument(pydantic.BaseModel, Generic[T]):
|
|
112
106
|
"""Document retrieved or processed by Firecrawl."""
|
|
@@ -116,7 +110,7 @@ class FirecrawlDocument(pydantic.BaseModel, Generic[T]):
|
|
|
116
110
|
rawHtml: Optional[str] = None
|
|
117
111
|
links: Optional[List[str]] = None
|
|
118
112
|
extract: Optional[T] = None
|
|
119
|
-
|
|
113
|
+
json_field: Optional[T] = pydantic.Field(None, alias='json')
|
|
120
114
|
screenshot: Optional[str] = None
|
|
121
115
|
metadata: Optional[Any] = None
|
|
122
116
|
actions: Optional[ActionsResult] = None
|
|
@@ -139,7 +133,7 @@ class WebhookConfig(pydantic.BaseModel):
|
|
|
139
133
|
class ChangeTrackingOptions(pydantic.BaseModel):
|
|
140
134
|
"""Configuration for change tracking."""
|
|
141
135
|
modes: Optional[List[Literal["git-diff", "json"]]] = None
|
|
142
|
-
|
|
136
|
+
schema_field: Optional[Any] = pydantic.Field(None, alias='schema')
|
|
143
137
|
prompt: Optional[str] = None
|
|
144
138
|
tag: Optional[str] = None
|
|
145
139
|
|
|
@@ -219,7 +213,7 @@ class ExtractAgent(pydantic.BaseModel):
|
|
|
219
213
|
class JsonConfig(pydantic.BaseModel):
|
|
220
214
|
"""Configuration for extraction."""
|
|
221
215
|
prompt: Optional[str] = None
|
|
222
|
-
|
|
216
|
+
schema_field: Optional[Any] = pydantic.Field(None, alias='schema')
|
|
223
217
|
systemPrompt: Optional[str] = None
|
|
224
218
|
agent: Optional[ExtractAgent] = None
|
|
225
219
|
|
|
@@ -317,7 +311,7 @@ class MapResponse(pydantic.BaseModel):
|
|
|
317
311
|
class ExtractParams(pydantic.BaseModel):
|
|
318
312
|
"""Parameters for extracting information from URLs."""
|
|
319
313
|
prompt: Optional[str] = None
|
|
320
|
-
|
|
314
|
+
schema_field: Optional[Any] = pydantic.Field(None, alias='schema')
|
|
321
315
|
systemPrompt: Optional[str] = None
|
|
322
316
|
allowExternalLinks: Optional[bool] = None
|
|
323
317
|
enableWebSearch: Optional[bool] = None
|
|
@@ -431,7 +425,7 @@ class ExtractParams(pydantic.BaseModel):
|
|
|
431
425
|
Parameters for the extract operation.
|
|
432
426
|
"""
|
|
433
427
|
prompt: Optional[str] = None
|
|
434
|
-
|
|
428
|
+
schema_field: Optional[Any] = pydantic.Field(None, alias='schema')
|
|
435
429
|
system_prompt: Optional[str] = None
|
|
436
430
|
allow_external_links: Optional[bool] = False
|
|
437
431
|
enable_web_search: Optional[bool] = False
|
|
@@ -484,6 +478,7 @@ class FirecrawlApp:
|
|
|
484
478
|
max_age: Optional[int] = None,
|
|
485
479
|
store_in_cache: Optional[bool] = None,
|
|
486
480
|
zero_data_retention: Optional[bool] = None,
|
|
481
|
+
agent: Optional[AgentOptions] = None,
|
|
487
482
|
**kwargs) -> ScrapeResponse[Any]:
|
|
488
483
|
"""
|
|
489
484
|
Scrape and extract content from a URL.
|
|
@@ -508,6 +503,7 @@ class FirecrawlApp:
|
|
|
508
503
|
actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction, PDFAction]]]): Actions to perform
|
|
509
504
|
change_tracking_options (Optional[ChangeTrackingOptions]): Change tracking settings
|
|
510
505
|
zero_data_retention (Optional[bool]): Whether to delete data after scrape is done
|
|
506
|
+
agent (Optional[AgentOptions]): Agent configuration for FIRE-1 model
|
|
511
507
|
|
|
512
508
|
|
|
513
509
|
Returns:
|
|
@@ -547,7 +543,7 @@ class FirecrawlApp:
|
|
|
547
543
|
if timeout:
|
|
548
544
|
scrape_params['timeout'] = timeout
|
|
549
545
|
if location:
|
|
550
|
-
scrape_params['location'] = location.dict(exclude_none=True)
|
|
546
|
+
scrape_params['location'] = location.dict(by_alias=True, exclude_none=True)
|
|
551
547
|
if mobile is not None:
|
|
552
548
|
scrape_params['mobile'] = mobile
|
|
553
549
|
if skip_tls_verification is not None:
|
|
@@ -564,22 +560,24 @@ class FirecrawlApp:
|
|
|
564
560
|
extract = self._ensure_schema_dict(extract)
|
|
565
561
|
if isinstance(extract, dict) and "schema" in extract:
|
|
566
562
|
extract["schema"] = self._ensure_schema_dict(extract["schema"])
|
|
567
|
-
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
|
|
563
|
+
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(by_alias=True, exclude_none=True)
|
|
568
564
|
if json_options is not None:
|
|
569
565
|
json_options = self._ensure_schema_dict(json_options)
|
|
570
566
|
if isinstance(json_options, dict) and "schema" in json_options:
|
|
571
567
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
|
572
|
-
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
|
568
|
+
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(by_alias=True, exclude_none=True)
|
|
573
569
|
if actions:
|
|
574
|
-
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(exclude_none=True) for action in actions]
|
|
570
|
+
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(by_alias=True, exclude_none=True) for action in actions]
|
|
575
571
|
if change_tracking_options:
|
|
576
|
-
scrape_params['changeTrackingOptions'] = change_tracking_options if isinstance(change_tracking_options, dict) else change_tracking_options.dict(exclude_none=True)
|
|
572
|
+
scrape_params['changeTrackingOptions'] = change_tracking_options if isinstance(change_tracking_options, dict) else change_tracking_options.dict(by_alias=True, exclude_none=True)
|
|
577
573
|
if max_age is not None:
|
|
578
574
|
scrape_params['maxAge'] = max_age
|
|
579
575
|
if store_in_cache is not None:
|
|
580
576
|
scrape_params['storeInCache'] = store_in_cache
|
|
581
577
|
if zero_data_retention is not None:
|
|
582
578
|
scrape_params['zeroDataRetention'] = zero_data_retention
|
|
579
|
+
if agent is not None:
|
|
580
|
+
scrape_params['agent'] = agent.dict(by_alias=True, exclude_none=True)
|
|
583
581
|
|
|
584
582
|
scrape_params.update(kwargs)
|
|
585
583
|
|
|
@@ -670,7 +668,7 @@ class FirecrawlApp:
|
|
|
670
668
|
if timeout is not None:
|
|
671
669
|
search_params['timeout'] = timeout
|
|
672
670
|
if scrape_options is not None:
|
|
673
|
-
search_params['scrapeOptions'] = scrape_options.dict(exclude_none=True)
|
|
671
|
+
search_params['scrapeOptions'] = scrape_options.dict(by_alias=True, exclude_none=True)
|
|
674
672
|
|
|
675
673
|
# Add any additional kwargs
|
|
676
674
|
search_params.update(kwargs)
|
|
@@ -678,7 +676,7 @@ class FirecrawlApp:
|
|
|
678
676
|
|
|
679
677
|
# Create final params object
|
|
680
678
|
final_params = SearchParams(query=query, **search_params)
|
|
681
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
679
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
682
680
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
683
681
|
|
|
684
682
|
if _integration:
|
|
@@ -792,7 +790,7 @@ class FirecrawlApp:
|
|
|
792
790
|
if ignore_sitemap is not None:
|
|
793
791
|
crawl_params['ignoreSitemap'] = ignore_sitemap
|
|
794
792
|
if scrape_options is not None:
|
|
795
|
-
crawl_params['scrapeOptions'] = scrape_options.dict(exclude_none=True)
|
|
793
|
+
crawl_params['scrapeOptions'] = scrape_options.dict(by_alias=True, exclude_none=True)
|
|
796
794
|
if webhook is not None:
|
|
797
795
|
crawl_params['webhook'] = webhook
|
|
798
796
|
if deduplicate_similar_urls is not None:
|
|
@@ -815,7 +813,7 @@ class FirecrawlApp:
|
|
|
815
813
|
|
|
816
814
|
# Create final params object
|
|
817
815
|
final_params = CrawlParams(**crawl_params)
|
|
818
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
816
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
819
817
|
params_dict['url'] = url
|
|
820
818
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
821
819
|
|
|
@@ -921,7 +919,7 @@ class FirecrawlApp:
|
|
|
921
919
|
if ignore_sitemap is not None:
|
|
922
920
|
crawl_params['ignoreSitemap'] = ignore_sitemap
|
|
923
921
|
if scrape_options is not None:
|
|
924
|
-
crawl_params['scrapeOptions'] = scrape_options.dict(exclude_none=True)
|
|
922
|
+
crawl_params['scrapeOptions'] = scrape_options.dict(by_alias=True, exclude_none=True)
|
|
925
923
|
if webhook is not None:
|
|
926
924
|
crawl_params['webhook'] = webhook
|
|
927
925
|
if deduplicate_similar_urls is not None:
|
|
@@ -943,7 +941,7 @@ class FirecrawlApp:
|
|
|
943
941
|
|
|
944
942
|
# Create final params object
|
|
945
943
|
final_params = CrawlParams(**crawl_params)
|
|
946
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
944
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
947
945
|
params_dict['url'] = url
|
|
948
946
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
949
947
|
|
|
@@ -1242,7 +1240,7 @@ class FirecrawlApp:
|
|
|
1242
1240
|
|
|
1243
1241
|
# Create final params object
|
|
1244
1242
|
final_params = MapParams(**map_params)
|
|
1245
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
1243
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
1246
1244
|
params_dict['url'] = url
|
|
1247
1245
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
1248
1246
|
|
|
@@ -1354,7 +1352,7 @@ class FirecrawlApp:
|
|
|
1354
1352
|
if timeout is not None:
|
|
1355
1353
|
scrape_params['timeout'] = timeout
|
|
1356
1354
|
if location is not None:
|
|
1357
|
-
scrape_params['location'] = location.dict(exclude_none=True)
|
|
1355
|
+
scrape_params['location'] = location.dict(by_alias=True, exclude_none=True)
|
|
1358
1356
|
if mobile is not None:
|
|
1359
1357
|
scrape_params['mobile'] = mobile
|
|
1360
1358
|
if skip_tls_verification is not None:
|
|
@@ -1369,16 +1367,16 @@ class FirecrawlApp:
|
|
|
1369
1367
|
extract = self._ensure_schema_dict(extract)
|
|
1370
1368
|
if isinstance(extract, dict) and "schema" in extract:
|
|
1371
1369
|
extract["schema"] = self._ensure_schema_dict(extract["schema"])
|
|
1372
|
-
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
|
|
1370
|
+
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(by_alias=True, exclude_none=True)
|
|
1373
1371
|
if json_options is not None:
|
|
1374
1372
|
json_options = self._ensure_schema_dict(json_options)
|
|
1375
1373
|
if isinstance(json_options, dict) and "schema" in json_options:
|
|
1376
1374
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
|
1377
|
-
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
|
1375
|
+
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(by_alias=True, exclude_none=True)
|
|
1378
1376
|
if actions:
|
|
1379
|
-
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(exclude_none=True) for action in actions]
|
|
1377
|
+
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(by_alias=True, exclude_none=True) for action in actions]
|
|
1380
1378
|
if agent is not None:
|
|
1381
|
-
scrape_params['agent'] = agent.dict(exclude_none=True)
|
|
1379
|
+
scrape_params['agent'] = agent.dict(by_alias=True, exclude_none=True)
|
|
1382
1380
|
if max_concurrency is not None:
|
|
1383
1381
|
scrape_params['maxConcurrency'] = max_concurrency
|
|
1384
1382
|
if zero_data_retention is not None:
|
|
@@ -1389,7 +1387,7 @@ class FirecrawlApp:
|
|
|
1389
1387
|
|
|
1390
1388
|
# Create final params object
|
|
1391
1389
|
final_params = ScrapeParams(**scrape_params)
|
|
1392
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
1390
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
1393
1391
|
params_dict['urls'] = urls
|
|
1394
1392
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
1395
1393
|
|
|
@@ -1495,7 +1493,7 @@ class FirecrawlApp:
|
|
|
1495
1493
|
if timeout is not None:
|
|
1496
1494
|
scrape_params['timeout'] = timeout
|
|
1497
1495
|
if location is not None:
|
|
1498
|
-
scrape_params['location'] = location.dict(exclude_none=True)
|
|
1496
|
+
scrape_params['location'] = location.dict(by_alias=True, exclude_none=True)
|
|
1499
1497
|
if mobile is not None:
|
|
1500
1498
|
scrape_params['mobile'] = mobile
|
|
1501
1499
|
if skip_tls_verification is not None:
|
|
@@ -1510,16 +1508,16 @@ class FirecrawlApp:
|
|
|
1510
1508
|
extract = self._ensure_schema_dict(extract)
|
|
1511
1509
|
if isinstance(extract, dict) and "schema" in extract:
|
|
1512
1510
|
extract["schema"] = self._ensure_schema_dict(extract["schema"])
|
|
1513
|
-
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
|
|
1511
|
+
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(by_alias=True, exclude_none=True)
|
|
1514
1512
|
if json_options is not None:
|
|
1515
1513
|
json_options = self._ensure_schema_dict(json_options)
|
|
1516
1514
|
if isinstance(json_options, dict) and "schema" in json_options:
|
|
1517
1515
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
|
1518
|
-
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
|
1516
|
+
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(by_alias=True, exclude_none=True)
|
|
1519
1517
|
if actions:
|
|
1520
|
-
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(exclude_none=True) for action in actions]
|
|
1518
|
+
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(by_alias=True, exclude_none=True) for action in actions]
|
|
1521
1519
|
if agent is not None:
|
|
1522
|
-
scrape_params['agent'] = agent.dict(exclude_none=True)
|
|
1520
|
+
scrape_params['agent'] = agent.dict(by_alias=True, exclude_none=True)
|
|
1523
1521
|
if max_concurrency is not None:
|
|
1524
1522
|
scrape_params['maxConcurrency'] = max_concurrency
|
|
1525
1523
|
if zero_data_retention is not None:
|
|
@@ -1530,7 +1528,7 @@ class FirecrawlApp:
|
|
|
1530
1528
|
|
|
1531
1529
|
# Create final params object
|
|
1532
1530
|
final_params = ScrapeParams(**scrape_params)
|
|
1533
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
1531
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
1534
1532
|
params_dict['urls'] = urls
|
|
1535
1533
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
1536
1534
|
|
|
@@ -1631,7 +1629,7 @@ class FirecrawlApp:
|
|
|
1631
1629
|
if timeout is not None:
|
|
1632
1630
|
scrape_params['timeout'] = timeout
|
|
1633
1631
|
if location is not None:
|
|
1634
|
-
scrape_params['location'] = location.dict(exclude_none=True)
|
|
1632
|
+
scrape_params['location'] = location.dict(by_alias=True, exclude_none=True)
|
|
1635
1633
|
if mobile is not None:
|
|
1636
1634
|
scrape_params['mobile'] = mobile
|
|
1637
1635
|
if skip_tls_verification is not None:
|
|
@@ -1646,16 +1644,16 @@ class FirecrawlApp:
|
|
|
1646
1644
|
extract = self._ensure_schema_dict(extract)
|
|
1647
1645
|
if isinstance(extract, dict) and "schema" in extract:
|
|
1648
1646
|
extract["schema"] = self._ensure_schema_dict(extract["schema"])
|
|
1649
|
-
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
|
|
1647
|
+
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(by_alias=True, exclude_none=True)
|
|
1650
1648
|
if json_options is not None:
|
|
1651
1649
|
json_options = self._ensure_schema_dict(json_options)
|
|
1652
1650
|
if isinstance(json_options, dict) and "schema" in json_options:
|
|
1653
1651
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
|
1654
|
-
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
|
1652
|
+
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(by_alias=True, exclude_none=True)
|
|
1655
1653
|
if actions:
|
|
1656
|
-
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(exclude_none=True) for action in actions]
|
|
1654
|
+
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(by_alias=True, exclude_none=True) for action in actions]
|
|
1657
1655
|
if agent is not None:
|
|
1658
|
-
scrape_params['agent'] = agent.dict(exclude_none=True)
|
|
1656
|
+
scrape_params['agent'] = agent.dict(by_alias=True, exclude_none=True)
|
|
1659
1657
|
if max_concurrency is not None:
|
|
1660
1658
|
scrape_params['maxConcurrency'] = max_concurrency
|
|
1661
1659
|
if zero_data_retention is not None:
|
|
@@ -1666,7 +1664,7 @@ class FirecrawlApp:
|
|
|
1666
1664
|
|
|
1667
1665
|
# Create final params object
|
|
1668
1666
|
final_params = ScrapeParams(**scrape_params)
|
|
1669
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
1667
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
1670
1668
|
params_dict['urls'] = urls
|
|
1671
1669
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
1672
1670
|
|
|
@@ -2095,7 +2093,7 @@ class FirecrawlApp:
|
|
|
2095
2093
|
)
|
|
2096
2094
|
|
|
2097
2095
|
headers = self._prepare_headers()
|
|
2098
|
-
json_data = {'url': url, **params.dict(exclude_none=True)}
|
|
2096
|
+
json_data = {'url': url, **params.dict(by_alias=True, exclude_none=True)}
|
|
2099
2097
|
json_data['origin'] = f"python-sdk@{version}"
|
|
2100
2098
|
|
|
2101
2099
|
try:
|
|
@@ -2336,10 +2334,22 @@ class FirecrawlApp:
|
|
|
2336
2334
|
Exception: An exception with a message containing the status code and error details from the response.
|
|
2337
2335
|
"""
|
|
2338
2336
|
try:
|
|
2339
|
-
|
|
2340
|
-
|
|
2337
|
+
response_json = response.json()
|
|
2338
|
+
error_message = response_json.get('error', 'No error message provided.')
|
|
2339
|
+
error_details = response_json.get('details', 'No additional error details provided.')
|
|
2341
2340
|
except:
|
|
2342
|
-
|
|
2341
|
+
# If we can't parse JSON, provide a helpful error message with response content
|
|
2342
|
+
try:
|
|
2343
|
+
response_text = response.text[:500] # Limit to first 500 chars
|
|
2344
|
+
if response_text.strip():
|
|
2345
|
+
error_message = f"Server returned non-JSON response: {response_text}"
|
|
2346
|
+
error_details = f"Full response status: {response.status_code}"
|
|
2347
|
+
else:
|
|
2348
|
+
error_message = f"Server returned empty response with status {response.status_code}"
|
|
2349
|
+
error_details = "No additional details available"
|
|
2350
|
+
except ValueError:
|
|
2351
|
+
error_message = f"Server returned unreadable response with status {response.status_code}"
|
|
2352
|
+
error_details = "No additional details available"
|
|
2343
2353
|
|
|
2344
2354
|
message = self._get_error_message(response.status_code, action, error_message, error_details)
|
|
2345
2355
|
|
|
@@ -2362,7 +2372,7 @@ class FirecrawlApp:
|
|
|
2362
2372
|
if status_code == 402:
|
|
2363
2373
|
return f"Payment Required: Failed to {action}. {error_message} - {error_details}"
|
|
2364
2374
|
elif status_code == 403:
|
|
2365
|
-
|
|
2375
|
+
return f"Website Not Supported: Failed to {action}. {error_message} - {error_details}"
|
|
2366
2376
|
elif status_code == 408:
|
|
2367
2377
|
return f"Request Timeout: Failed to {action} as the request timed out. {error_message} - {error_details}"
|
|
2368
2378
|
elif status_code == 409:
|
|
@@ -2516,7 +2526,7 @@ class FirecrawlApp:
|
|
|
2516
2526
|
|
|
2517
2527
|
headers = self._prepare_headers()
|
|
2518
2528
|
|
|
2519
|
-
json_data = {'query': query, **research_params.dict(exclude_none=True)}
|
|
2529
|
+
json_data = {'query': query, **research_params.dict(by_alias=True, exclude_none=True)}
|
|
2520
2530
|
json_data['origin'] = f"python-sdk@{version}"
|
|
2521
2531
|
|
|
2522
2532
|
# Handle json options schema if present
|
|
@@ -2600,7 +2610,7 @@ class FirecrawlApp:
|
|
|
2600
2610
|
method_params = {
|
|
2601
2611
|
"scrape_url": {"formats", "include_tags", "exclude_tags", "only_main_content", "wait_for",
|
|
2602
2612
|
"timeout", "location", "mobile", "skip_tls_verification", "remove_base64_images",
|
|
2603
|
-
"block_ads", "proxy", "extract", "json_options", "actions", "change_tracking_options", "max_age", "integration"},
|
|
2613
|
+
"block_ads", "proxy", "extract", "json_options", "actions", "change_tracking_options", "max_age", "agent", "integration"},
|
|
2604
2614
|
"search": {"limit", "tbs", "filter", "lang", "country", "location", "timeout", "scrape_options", "integration"},
|
|
2605
2615
|
"crawl_url": {"include_paths", "exclude_paths", "max_depth", "max_discovery_depth", "limit",
|
|
2606
2616
|
"allow_backward_links", "allow_external_links", "ignore_sitemap", "scrape_options",
|
|
@@ -2986,6 +2996,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
2986
2996
|
extract: Optional[JsonConfig] = None,
|
|
2987
2997
|
json_options: Optional[JsonConfig] = None,
|
|
2988
2998
|
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction, PDFAction]]] = None,
|
|
2999
|
+
agent: Optional[AgentOptions] = None,
|
|
2989
3000
|
**kwargs) -> ScrapeResponse[Any]:
|
|
2990
3001
|
"""
|
|
2991
3002
|
Scrape a single URL asynchronously.
|
|
@@ -3008,6 +3019,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3008
3019
|
extract (Optional[JsonConfig]): Content extraction settings
|
|
3009
3020
|
json_options (Optional[JsonConfig]): JSON extraction settings
|
|
3010
3021
|
actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction, PDFAction]]]): Actions to perform
|
|
3022
|
+
agent (Optional[AgentOptions]): Agent configuration for FIRE-1 model
|
|
3011
3023
|
**kwargs: Additional parameters to pass to the API
|
|
3012
3024
|
|
|
3013
3025
|
Returns:
|
|
@@ -3052,7 +3064,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3052
3064
|
if timeout:
|
|
3053
3065
|
scrape_params['timeout'] = timeout
|
|
3054
3066
|
if location:
|
|
3055
|
-
scrape_params['location'] = location.dict(exclude_none=True)
|
|
3067
|
+
scrape_params['location'] = location.dict(by_alias=True, exclude_none=True)
|
|
3056
3068
|
if mobile is not None:
|
|
3057
3069
|
scrape_params['mobile'] = mobile
|
|
3058
3070
|
if skip_tls_verification is not None:
|
|
@@ -3069,14 +3081,16 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3069
3081
|
extract = self._ensure_schema_dict(extract)
|
|
3070
3082
|
if isinstance(extract, dict) and "schema" in extract:
|
|
3071
3083
|
extract["schema"] = self._ensure_schema_dict(extract["schema"])
|
|
3072
|
-
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
|
|
3084
|
+
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(by_alias=True, exclude_none=True)
|
|
3073
3085
|
if json_options is not None:
|
|
3074
3086
|
json_options = self._ensure_schema_dict(json_options)
|
|
3075
3087
|
if isinstance(json_options, dict) and "schema" in json_options:
|
|
3076
3088
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
|
3077
|
-
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
|
3089
|
+
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(by_alias=True, exclude_none=True)
|
|
3078
3090
|
if actions:
|
|
3079
|
-
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(exclude_none=True) for action in actions]
|
|
3091
|
+
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(by_alias=True, exclude_none=True) for action in actions]
|
|
3092
|
+
if agent is not None:
|
|
3093
|
+
scrape_params['agent'] = agent.dict(by_alias=True, exclude_none=True)
|
|
3080
3094
|
if 'extract' in scrape_params and scrape_params['extract'] and 'schema' in scrape_params['extract']:
|
|
3081
3095
|
scrape_params['extract']['schema'] = self._ensure_schema_dict(scrape_params['extract']['schema'])
|
|
3082
3096
|
if 'jsonOptions' in scrape_params and scrape_params['jsonOptions'] and 'schema' in scrape_params['jsonOptions']:
|
|
@@ -3180,7 +3194,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3180
3194
|
if timeout is not None:
|
|
3181
3195
|
scrape_params['timeout'] = timeout
|
|
3182
3196
|
if location is not None:
|
|
3183
|
-
scrape_params['location'] = location.dict(exclude_none=True)
|
|
3197
|
+
scrape_params['location'] = location.dict(by_alias=True, exclude_none=True)
|
|
3184
3198
|
if mobile is not None:
|
|
3185
3199
|
scrape_params['mobile'] = mobile
|
|
3186
3200
|
if skip_tls_verification is not None:
|
|
@@ -3195,22 +3209,23 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3195
3209
|
extract = self._ensure_schema_dict(extract)
|
|
3196
3210
|
if isinstance(extract, dict) and "schema" in extract:
|
|
3197
3211
|
extract["schema"] = self._ensure_schema_dict(extract["schema"])
|
|
3198
|
-
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
|
|
3212
|
+
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(by_alias=True, exclude_none=True)
|
|
3199
3213
|
if json_options is not None:
|
|
3200
3214
|
json_options = self._ensure_schema_dict(json_options)
|
|
3201
3215
|
if isinstance(json_options, dict) and "schema" in json_options:
|
|
3202
3216
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
|
3203
|
-
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
|
3204
|
-
|
|
3217
|
+
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(by_alias=True, exclude_none=True)
|
|
3218
|
+
if actions is not None:
|
|
3219
|
+
scrape_params['actions'] = [action.dict(by_alias=True, exclude_none=True) for action in actions]
|
|
3205
3220
|
if agent is not None:
|
|
3206
|
-
scrape_params['agent'] = agent.dict(exclude_none=True)
|
|
3221
|
+
scrape_params['agent'] = agent.dict(by_alias=True, exclude_none=True)
|
|
3207
3222
|
|
|
3208
3223
|
# Add any additional kwargs
|
|
3209
3224
|
scrape_params.update(kwargs)
|
|
3210
3225
|
|
|
3211
3226
|
# Create final params object
|
|
3212
3227
|
final_params = ScrapeParams(**scrape_params)
|
|
3213
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
3228
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
3214
3229
|
params_dict['urls'] = urls
|
|
3215
3230
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
3216
3231
|
|
|
@@ -3319,7 +3334,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3319
3334
|
if timeout is not None:
|
|
3320
3335
|
scrape_params['timeout'] = timeout
|
|
3321
3336
|
if location is not None:
|
|
3322
|
-
scrape_params['location'] = location.dict(exclude_none=True)
|
|
3337
|
+
scrape_params['location'] = location.dict(by_alias=True, exclude_none=True)
|
|
3323
3338
|
if mobile is not None:
|
|
3324
3339
|
scrape_params['mobile'] = mobile
|
|
3325
3340
|
if skip_tls_verification is not None:
|
|
@@ -3334,16 +3349,16 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3334
3349
|
extract = self._ensure_schema_dict(extract)
|
|
3335
3350
|
if isinstance(extract, dict) and "schema" in extract:
|
|
3336
3351
|
extract["schema"] = self._ensure_schema_dict(extract["schema"])
|
|
3337
|
-
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
|
|
3352
|
+
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(by_alias=True, exclude_none=True)
|
|
3338
3353
|
if json_options is not None:
|
|
3339
3354
|
json_options = self._ensure_schema_dict(json_options)
|
|
3340
3355
|
if isinstance(json_options, dict) and "schema" in json_options:
|
|
3341
3356
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
|
3342
|
-
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
|
3357
|
+
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(by_alias=True, exclude_none=True)
|
|
3343
3358
|
if actions:
|
|
3344
|
-
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(exclude_none=True) for action in actions]
|
|
3359
|
+
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(by_alias=True, exclude_none=True) for action in actions]
|
|
3345
3360
|
if agent is not None:
|
|
3346
|
-
scrape_params['agent'] = agent.dict(exclude_none=True)
|
|
3361
|
+
scrape_params['agent'] = agent.dict(by_alias=True, exclude_none=True)
|
|
3347
3362
|
if zero_data_retention is not None:
|
|
3348
3363
|
scrape_params['zeroDataRetention'] = zero_data_retention
|
|
3349
3364
|
|
|
@@ -3352,7 +3367,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3352
3367
|
|
|
3353
3368
|
# Create final params object
|
|
3354
3369
|
final_params = ScrapeParams(**scrape_params)
|
|
3355
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
3370
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
3356
3371
|
params_dict['urls'] = urls
|
|
3357
3372
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
3358
3373
|
|
|
@@ -3460,7 +3475,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3460
3475
|
if ignore_sitemap is not None:
|
|
3461
3476
|
crawl_params['ignoreSitemap'] = ignore_sitemap
|
|
3462
3477
|
if scrape_options is not None:
|
|
3463
|
-
crawl_params['scrapeOptions'] = scrape_options.dict(exclude_none=True)
|
|
3478
|
+
crawl_params['scrapeOptions'] = scrape_options.dict(by_alias=True, exclude_none=True)
|
|
3464
3479
|
if webhook is not None:
|
|
3465
3480
|
crawl_params['webhook'] = webhook
|
|
3466
3481
|
if deduplicate_similar_urls is not None:
|
|
@@ -3479,7 +3494,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3479
3494
|
|
|
3480
3495
|
# Create final params object
|
|
3481
3496
|
final_params = CrawlParams(**crawl_params)
|
|
3482
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
3497
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
3483
3498
|
params_dict['url'] = url
|
|
3484
3499
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
3485
3500
|
# Make request
|
|
@@ -3575,7 +3590,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3575
3590
|
if ignore_sitemap is not None:
|
|
3576
3591
|
crawl_params['ignoreSitemap'] = ignore_sitemap
|
|
3577
3592
|
if scrape_options is not None:
|
|
3578
|
-
crawl_params['scrapeOptions'] = scrape_options.dict(exclude_none=True)
|
|
3593
|
+
crawl_params['scrapeOptions'] = scrape_options.dict(by_alias=True, exclude_none=True)
|
|
3579
3594
|
if webhook is not None:
|
|
3580
3595
|
crawl_params['webhook'] = webhook
|
|
3581
3596
|
if deduplicate_similar_urls is not None:
|
|
@@ -3594,7 +3609,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3594
3609
|
|
|
3595
3610
|
# Create final params object
|
|
3596
3611
|
final_params = CrawlParams(**crawl_params)
|
|
3597
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
3612
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
3598
3613
|
params_dict['url'] = url
|
|
3599
3614
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
3600
3615
|
|
|
@@ -3760,7 +3775,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3760
3775
|
"""
|
|
3761
3776
|
map_params = {}
|
|
3762
3777
|
if params:
|
|
3763
|
-
map_params.update(params.dict(exclude_none=True))
|
|
3778
|
+
map_params.update(params.dict(by_alias=True, exclude_none=True))
|
|
3764
3779
|
|
|
3765
3780
|
# Add individual parameters
|
|
3766
3781
|
if search is not None:
|
|
@@ -3778,7 +3793,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3778
3793
|
|
|
3779
3794
|
# Create final params object
|
|
3780
3795
|
final_params = MapParams(**map_params)
|
|
3781
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
3796
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
3782
3797
|
params_dict['url'] = url
|
|
3783
3798
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
3784
3799
|
|
|
@@ -4162,7 +4177,6 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
4162
4177
|
url,
|
|
4163
4178
|
max_urls=max_urls,
|
|
4164
4179
|
show_full_text=show_full_text,
|
|
4165
|
-
cache=cache,
|
|
4166
4180
|
experimental_stream=experimental_stream
|
|
4167
4181
|
)
|
|
4168
4182
|
if not response.get('success') or 'id' not in response:
|
|
@@ -4226,7 +4240,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
4226
4240
|
)
|
|
4227
4241
|
|
|
4228
4242
|
headers = self._prepare_headers()
|
|
4229
|
-
json_data = {'url': url, **params.dict(exclude_none=True)}
|
|
4243
|
+
json_data = {'url': url, **params.dict(by_alias=True, exclude_none=True)}
|
|
4230
4244
|
json_data['origin'] = f"python-sdk@{version}"
|
|
4231
4245
|
|
|
4232
4246
|
try:
|
|
@@ -4411,7 +4425,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
4411
4425
|
|
|
4412
4426
|
headers = self._prepare_headers()
|
|
4413
4427
|
|
|
4414
|
-
json_data = {'query': query, **research_params.dict(exclude_none=True)}
|
|
4428
|
+
json_data = {'query': query, **research_params.dict(by_alias=True, exclude_none=True)}
|
|
4415
4429
|
json_data['origin'] = f"python-sdk@{version}"
|
|
4416
4430
|
|
|
4417
4431
|
try:
|
|
@@ -4503,7 +4517,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
4503
4517
|
if isinstance(params, dict):
|
|
4504
4518
|
search_params.update(params)
|
|
4505
4519
|
else:
|
|
4506
|
-
search_params.update(params.dict(exclude_none=True))
|
|
4520
|
+
search_params.update(params.dict(by_alias=True, exclude_none=True))
|
|
4507
4521
|
|
|
4508
4522
|
# Add individual parameters
|
|
4509
4523
|
if limit is not None:
|
|
@@ -4521,14 +4535,14 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
4521
4535
|
if timeout is not None:
|
|
4522
4536
|
search_params['timeout'] = timeout
|
|
4523
4537
|
if scrape_options is not None:
|
|
4524
|
-
search_params['scrapeOptions'] = scrape_options.dict(exclude_none=True)
|
|
4538
|
+
search_params['scrapeOptions'] = scrape_options.dict(by_alias=True, exclude_none=True)
|
|
4525
4539
|
|
|
4526
4540
|
# Add any additional kwargs
|
|
4527
4541
|
search_params.update(kwargs)
|
|
4528
4542
|
|
|
4529
4543
|
# Create final params object
|
|
4530
4544
|
final_params = SearchParams(query=query, **search_params)
|
|
4531
|
-
params_dict = final_params.dict(exclude_none=True)
|
|
4545
|
+
params_dict = final_params.dict(by_alias=True, exclude_none=True)
|
|
4532
4546
|
params_dict['origin'] = f"python-sdk@{version}"
|
|
4533
4547
|
|
|
4534
4548
|
return await self._async_post_request(
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
firecrawl/__init__.py,sha256=
|
|
2
|
-
firecrawl/firecrawl.py,sha256=
|
|
1
|
+
firecrawl/__init__.py,sha256=M6X3Rk7QgbwslQRTX4yMw4TRAVtaD-11pQwfkleI3oc,2613
|
|
2
|
+
firecrawl/firecrawl.py,sha256=RXURxzCDbKJ17KVzXwbo6Iy72c9Q1OgjtwRCYaphNwU,200263
|
|
3
3
|
firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
firecrawl/__tests__/e2e_withAuth/test.py,sha256=-Fq2vPcMo0iQi4dwsUkkCd931ybDaTxMBnZbRfGdDcA,7931
|
|
5
5
|
firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=k9IsEbdTHL9Cu49M4FpnQDEo2rnG6RqwmZAsK_EVJr4,21069
|
|
7
7
|
tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
|
|
8
|
-
firecrawl-2.16.
|
|
9
|
-
firecrawl-2.16.
|
|
10
|
-
firecrawl-2.16.
|
|
11
|
-
firecrawl-2.16.
|
|
12
|
-
firecrawl-2.16.
|
|
8
|
+
firecrawl-2.16.5.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
|
|
9
|
+
firecrawl-2.16.5.dist-info/METADATA,sha256=kaGelYV72SrE23PK8xFBPcyBRAOMylHCQePSlFZO5B0,7166
|
|
10
|
+
firecrawl-2.16.5.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
|
11
|
+
firecrawl-2.16.5.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
|
|
12
|
+
firecrawl-2.16.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|