firecrawl 2.13.0__py3-none-any.whl → 2.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

firecrawl/__init__.py CHANGED
@@ -13,7 +13,7 @@ import os
13
13
 
14
14
  from .firecrawl import FirecrawlApp, AsyncFirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
15
15
 
16
- __version__ = "2.13.0"
16
+ __version__ = "2.14.0"
17
17
 
18
18
  # Define the logger for the Firecrawl project
19
19
  logger: logging.Logger = logging.getLogger("firecrawl")
firecrawl/firecrawl.py CHANGED
@@ -23,6 +23,8 @@ import websockets
23
23
  import aiohttp
24
24
  import asyncio
25
25
  from pydantic import Field
26
+ import ssl
27
+ import certifi
26
28
 
27
29
  # Suppress Pydantic warnings about attribute shadowing
28
30
  warnings.filterwarnings("ignore", message="Field name \"json\" in \"FirecrawlDocument\" shadows an attribute in parent \"BaseModel\"")
@@ -482,6 +484,7 @@ class FirecrawlApp:
482
484
  change_tracking_options: Optional[ChangeTrackingOptions] = None,
483
485
  max_age: Optional[int] = None,
484
486
  store_in_cache: Optional[bool] = None,
487
+ zero_data_retention: Optional[bool] = None,
485
488
  **kwargs) -> ScrapeResponse[Any]:
486
489
  """
487
490
  Scrape and extract content from a URL.
@@ -504,6 +507,7 @@ class FirecrawlApp:
504
507
  json_options (Optional[JsonConfig]): JSON extraction settings
505
508
  actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction, PDFAction]]]): Actions to perform
506
509
  change_tracking_options (Optional[ChangeTrackingOptions]): Change tracking settings
510
+ zero_data_retention (Optional[bool]): Whether to delete data after scrape is done
507
511
 
508
512
 
509
513
  Returns:
@@ -569,6 +573,8 @@ class FirecrawlApp:
569
573
  scrape_params['maxAge'] = max_age
570
574
  if store_in_cache is not None:
571
575
  scrape_params['storeInCache'] = store_in_cache
576
+ if zero_data_retention is not None:
577
+ scrape_params['zeroDataRetention'] = zero_data_retention
572
578
 
573
579
  scrape_params.update(kwargs)
574
580
 
@@ -663,12 +669,16 @@ class FirecrawlApp:
663
669
 
664
670
  # Add any additional kwargs
665
671
  search_params.update(kwargs)
672
+ _integration = search_params.get('integration')
666
673
 
667
674
  # Create final params object
668
675
  final_params = SearchParams(query=query, **search_params)
669
676
  params_dict = final_params.dict(exclude_none=True)
670
677
  params_dict['origin'] = f"python-sdk@{version}"
671
678
 
679
+ if _integration:
680
+ params_dict['integration'] = _integration
681
+
672
682
  # Make request
673
683
  response = requests.post(
674
684
  f"{self.api_url}/v1/search",
@@ -711,6 +721,7 @@ class FirecrawlApp:
711
721
  delay: Optional[int] = None,
712
722
  allow_subdomains: Optional[bool] = None,
713
723
  max_concurrency: Optional[int] = None,
724
+ zero_data_retention: Optional[bool] = None,
714
725
  poll_interval: Optional[int] = 2,
715
726
  idempotency_key: Optional[str] = None,
716
727
  **kwargs
@@ -737,6 +748,7 @@ class FirecrawlApp:
737
748
  delay (Optional[int]): Delay in seconds between scrapes
738
749
  allow_subdomains (Optional[bool]): Follow subdomains
739
750
  max_concurrency (Optional[int]): Maximum number of concurrent scrapes
751
+ zero_data_retention (Optional[bool]): Whether to delete data after 24 hours
740
752
  poll_interval (Optional[int]): Seconds between status checks (default: 2)
741
753
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
742
754
  **kwargs: Additional parameters to pass to the API
@@ -790,9 +802,11 @@ class FirecrawlApp:
790
802
  crawl_params['allowSubdomains'] = allow_subdomains
791
803
  if max_concurrency is not None:
792
804
  crawl_params['maxConcurrency'] = max_concurrency
793
-
805
+ if zero_data_retention is not None:
806
+ crawl_params['zeroDataRetention'] = zero_data_retention
794
807
  # Add any additional kwargs
795
808
  crawl_params.update(kwargs)
809
+ _integration = crawl_params.get('integration')
796
810
 
797
811
  # Create final params object
798
812
  final_params = CrawlParams(**crawl_params)
@@ -800,6 +814,9 @@ class FirecrawlApp:
800
814
  params_dict['url'] = url
801
815
  params_dict['origin'] = f"python-sdk@{version}"
802
816
 
817
+ if _integration:
818
+ params_dict['integration'] = _integration
819
+
803
820
  # Make request
804
821
  headers = self._prepare_headers(idempotency_key)
805
822
  response = self._post_request(f'{self.api_url}/v1/crawl', params_dict, headers)
@@ -834,6 +851,7 @@ class FirecrawlApp:
834
851
  delay: Optional[int] = None,
835
852
  allow_subdomains: Optional[bool] = None,
836
853
  max_concurrency: Optional[int] = None,
854
+ zero_data_retention: Optional[bool] = None,
837
855
  idempotency_key: Optional[str] = None,
838
856
  **kwargs
839
857
  ) -> CrawlResponse:
@@ -859,6 +877,7 @@ class FirecrawlApp:
859
877
  delay (Optional[int]): Delay in seconds between scrapes
860
878
  allow_subdomains (Optional[bool]): Follow subdomains
861
879
  max_concurrency (Optional[int]): Maximum number of concurrent scrapes
880
+ zero_data_retention (Optional[bool]): Whether to delete data after 24 hours
862
881
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
863
882
  **kwargs: Additional parameters to pass to the API
864
883
 
@@ -912,7 +931,8 @@ class FirecrawlApp:
912
931
  crawl_params['allowSubdomains'] = allow_subdomains
913
932
  if max_concurrency is not None:
914
933
  crawl_params['maxConcurrency'] = max_concurrency
915
-
934
+ if zero_data_retention is not None:
935
+ crawl_params['zeroDataRetention'] = zero_data_retention
916
936
  # Add any additional kwargs
917
937
  crawl_params.update(kwargs)
918
938
 
@@ -1092,6 +1112,7 @@ class FirecrawlApp:
1092
1112
  delay: Optional[int] = None,
1093
1113
  allow_subdomains: Optional[bool] = None,
1094
1114
  max_concurrency: Optional[int] = None,
1115
+ zero_data_retention: Optional[bool] = None,
1095
1116
  idempotency_key: Optional[str] = None,
1096
1117
  **kwargs
1097
1118
  ) -> 'CrawlWatcher':
@@ -1117,6 +1138,7 @@ class FirecrawlApp:
1117
1138
  delay (Optional[int]): Delay in seconds between scrapes
1118
1139
  allow_subdomains (Optional[bool]): Follow subdomains
1119
1140
  max_concurrency (Optional[int]): Maximum number of concurrent scrapes
1141
+ zero_data_retention (Optional[bool]): Whether to delete data after 24 hours
1120
1142
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
1121
1143
  **kwargs: Additional parameters to pass to the API
1122
1144
 
@@ -1144,6 +1166,7 @@ class FirecrawlApp:
1144
1166
  delay=delay,
1145
1167
  allow_subdomains=allow_subdomains,
1146
1168
  max_concurrency=max_concurrency,
1169
+ zero_data_retention=zero_data_retention,
1147
1170
  idempotency_key=idempotency_key,
1148
1171
  **kwargs
1149
1172
  )
@@ -1210,6 +1233,7 @@ class FirecrawlApp:
1210
1233
 
1211
1234
  # Add any additional kwargs
1212
1235
  map_params.update(kwargs)
1236
+ _integration = map_params.get('integration')
1213
1237
 
1214
1238
  # Create final params object
1215
1239
  final_params = MapParams(**map_params)
@@ -1217,6 +1241,9 @@ class FirecrawlApp:
1217
1241
  params_dict['url'] = url
1218
1242
  params_dict['origin'] = f"python-sdk@{version}"
1219
1243
 
1244
+ if _integration:
1245
+ params_dict['integration'] = _integration
1246
+
1220
1247
  # Make request
1221
1248
  response = requests.post(
1222
1249
  f"{self.api_url}/v1/map",
@@ -1261,6 +1288,7 @@ class FirecrawlApp:
1261
1288
  agent: Optional[AgentOptions] = None,
1262
1289
  poll_interval: Optional[int] = 2,
1263
1290
  max_concurrency: Optional[int] = None,
1291
+ zero_data_retention: Optional[bool] = None,
1264
1292
  idempotency_key: Optional[str] = None,
1265
1293
  **kwargs
1266
1294
  ) -> BatchScrapeStatusResponse:
@@ -1348,6 +1376,8 @@ class FirecrawlApp:
1348
1376
  scrape_params['agent'] = agent.dict(exclude_none=True)
1349
1377
  if max_concurrency is not None:
1350
1378
  scrape_params['maxConcurrency'] = max_concurrency
1379
+ if zero_data_retention is not None:
1380
+ scrape_params['zeroDataRetention'] = zero_data_retention
1351
1381
 
1352
1382
  # Add any additional kwargs
1353
1383
  scrape_params.update(kwargs)
@@ -1399,6 +1429,7 @@ class FirecrawlApp:
1399
1429
  agent: Optional[AgentOptions] = None,
1400
1430
  max_concurrency: Optional[int] = None,
1401
1431
  idempotency_key: Optional[str] = None,
1432
+ zero_data_retention: Optional[bool] = None,
1402
1433
  **kwargs
1403
1434
  ) -> BatchScrapeResponse:
1404
1435
  """
@@ -1424,6 +1455,7 @@ class FirecrawlApp:
1424
1455
  actions (Optional[List[Union]]): Actions to perform
1425
1456
  agent (Optional[AgentOptions]): Agent configuration
1426
1457
  max_concurrency (Optional[int]): Maximum number of concurrent scrapes
1458
+ zero_data_retention (Optional[bool]): Whether to delete data after 24 hours
1427
1459
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
1428
1460
  **kwargs: Additional parameters to pass to the API
1429
1461
 
@@ -1485,6 +1517,8 @@ class FirecrawlApp:
1485
1517
  scrape_params['agent'] = agent.dict(exclude_none=True)
1486
1518
  if max_concurrency is not None:
1487
1519
  scrape_params['maxConcurrency'] = max_concurrency
1520
+ if zero_data_retention is not None:
1521
+ scrape_params['zeroDataRetention'] = zero_data_retention
1488
1522
 
1489
1523
  # Add any additional kwargs
1490
1524
  scrape_params.update(kwargs)
@@ -1534,6 +1568,7 @@ class FirecrawlApp:
1534
1568
  actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction, PDFAction]]] = None,
1535
1569
  agent: Optional[AgentOptions] = None,
1536
1570
  max_concurrency: Optional[int] = None,
1571
+ zero_data_retention: Optional[bool] = None,
1537
1572
  idempotency_key: Optional[str] = None,
1538
1573
  **kwargs
1539
1574
  ) -> 'CrawlWatcher':
@@ -1560,6 +1595,7 @@ class FirecrawlApp:
1560
1595
  actions (Optional[List[Union]]): Actions to perform
1561
1596
  agent (Optional[AgentOptions]): Agent configuration
1562
1597
  max_concurrency (Optional[int]): Maximum number of concurrent scrapes
1598
+ zero_data_retention (Optional[bool]): Whether to delete data after 24 hours
1563
1599
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
1564
1600
  **kwargs: Additional parameters to pass to the API
1565
1601
 
@@ -1617,6 +1653,8 @@ class FirecrawlApp:
1617
1653
  scrape_params['agent'] = agent.dict(exclude_none=True)
1618
1654
  if max_concurrency is not None:
1619
1655
  scrape_params['maxConcurrency'] = max_concurrency
1656
+ if zero_data_retention is not None:
1657
+ scrape_params['zeroDataRetention'] = zero_data_retention
1620
1658
 
1621
1659
  # Add any additional kwargs
1622
1660
  scrape_params.update(kwargs)
@@ -1749,7 +1787,8 @@ class FirecrawlApp:
1749
1787
  allow_external_links: Optional[bool] = False,
1750
1788
  enable_web_search: Optional[bool] = False,
1751
1789
  show_sources: Optional[bool] = False,
1752
- agent: Optional[Dict[str, Any]] = None) -> ExtractResponse[Any]:
1790
+ agent: Optional[Dict[str, Any]] = None,
1791
+ **kwargs) -> ExtractResponse[Any]:
1753
1792
  """
1754
1793
  Extract structured information from URLs.
1755
1794
 
@@ -1762,6 +1801,7 @@ class FirecrawlApp:
1762
1801
  enable_web_search (Optional[bool]): Enable web search
1763
1802
  show_sources (Optional[bool]): Include source URLs
1764
1803
  agent (Optional[Dict[str, Any]]): Agent configuration
1804
+ **kwargs: Additional parameters to pass to the API
1765
1805
 
1766
1806
  Returns:
1767
1807
  ExtractResponse[Any] with:
@@ -1772,6 +1812,9 @@ class FirecrawlApp:
1772
1812
  Raises:
1773
1813
  ValueError: If prompt/schema missing or extraction fails
1774
1814
  """
1815
+ # Validate any additional kwargs
1816
+ self._validate_kwargs(kwargs, "extract")
1817
+
1775
1818
  headers = self._prepare_headers()
1776
1819
 
1777
1820
  if not prompt and not schema:
@@ -1801,6 +1844,9 @@ class FirecrawlApp:
1801
1844
  if agent:
1802
1845
  request_data['agent'] = agent
1803
1846
 
1847
+ # Add any additional kwargs
1848
+ request_data.update(kwargs)
1849
+
1804
1850
  try:
1805
1851
  # Send the initial extract request
1806
1852
  response = self._post_request(
@@ -2549,12 +2595,13 @@ class FirecrawlApp:
2549
2595
  method_params = {
2550
2596
  "scrape_url": {"formats", "include_tags", "exclude_tags", "only_main_content", "wait_for",
2551
2597
  "timeout", "location", "mobile", "skip_tls_verification", "remove_base64_images",
2552
- "block_ads", "proxy", "extract", "json_options", "actions", "change_tracking_options"},
2553
- "search": {"limit", "tbs", "filter", "lang", "country", "location", "timeout", "scrape_options"},
2598
+ "block_ads", "proxy", "extract", "json_options", "actions", "change_tracking_options", "integration"},
2599
+ "search": {"limit", "tbs", "filter", "lang", "country", "location", "timeout", "scrape_options", "integration"},
2554
2600
  "crawl_url": {"include_paths", "exclude_paths", "max_depth", "max_discovery_depth", "limit",
2555
2601
  "allow_backward_links", "allow_external_links", "ignore_sitemap", "scrape_options",
2556
- "webhook", "deduplicate_similar_urls", "ignore_query_parameters", "regex_on_full_url"},
2557
- "map_url": {"search", "ignore_sitemap", "include_subdomains", "sitemap_only", "limit", "timeout"},
2602
+ "webhook", "deduplicate_similar_urls", "ignore_query_parameters", "regex_on_full_url", "integration"},
2603
+ "map_url": {"search", "ignore_sitemap", "include_subdomains", "sitemap_only", "limit", "timeout", "integration"},
2604
+ "extract": {"prompt", "schema", "system_prompt", "allow_external_links", "enable_web_search", "show_sources", "agent", "integration"},
2558
2605
  "batch_scrape_urls": {"formats", "headers", "include_tags", "exclude_tags", "only_main_content",
2559
2606
  "wait_for", "timeout", "location", "mobile", "skip_tls_verification",
2560
2607
  "remove_base64_images", "block_ads", "proxy", "extract", "json_options",
@@ -2722,7 +2769,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
2722
2769
  aiohttp.ClientError: If the request fails after all retries.
2723
2770
  Exception: If max retries are exceeded or other errors occur.
2724
2771
  """
2725
- async with aiohttp.ClientSession() as session:
2772
+ ssl_context = ssl.create_default_context(cafile=certifi.where())
2773
+ async with aiohttp.ClientSession(ssl=ssl_context) as session:
2726
2774
  for attempt in range(retries):
2727
2775
  try:
2728
2776
  async with session.request(
@@ -3204,6 +3252,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3204
3252
  json_options: Optional[JsonConfig] = None,
3205
3253
  actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction, PDFAction]]] = None,
3206
3254
  agent: Optional[AgentOptions] = None,
3255
+ zero_data_retention: Optional[bool] = None,
3207
3256
  idempotency_key: Optional[str] = None,
3208
3257
  **kwargs
3209
3258
  ) -> BatchScrapeResponse:
@@ -3229,6 +3278,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3229
3278
  json_options (Optional[JsonConfig]): JSON extraction config
3230
3279
  actions (Optional[List[Union]]): Actions to perform
3231
3280
  agent (Optional[AgentOptions]): Agent configuration
3281
+ zero_data_retention (Optional[bool]): Whether to delete data after 24 hours
3232
3282
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
3233
3283
  **kwargs: Additional parameters to pass to the API
3234
3284
 
@@ -3288,7 +3338,9 @@ class AsyncFirecrawlApp(FirecrawlApp):
3288
3338
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
3289
3339
  if agent is not None:
3290
3340
  scrape_params['agent'] = agent.dict(exclude_none=True)
3291
-
3341
+ if zero_data_retention is not None:
3342
+ scrape_params['zeroDataRetention'] = zero_data_retention
3343
+
3292
3344
  # Add any additional kwargs
3293
3345
  scrape_params.update(kwargs)
3294
3346
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 2.13.0
3
+ Version: 2.14.0
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
@@ -1,12 +1,12 @@
1
- firecrawl/__init__.py,sha256=wIp44hLUBzFYK-Hl85v0FBvjRAg19jMtB_ZbkYCOsKc,2613
2
- firecrawl/firecrawl.py,sha256=HRDjrYMG4y3k5b_hPKFcilIF4yGGYsfpR34a1AzHQoA,195401
1
+ firecrawl/__init__.py,sha256=z0tkSJXprkOwIOZhdp3MXZYCX_3FV8_eitdvC2A4tmo,2613
2
+ firecrawl/firecrawl.py,sha256=3JydYwW5sYG-4Qf4baQIiFQhHSh2gI_hXoV8nJ2LylU,198259
3
3
  firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  firecrawl/__tests__/e2e_withAuth/test.py,sha256=-Fq2vPcMo0iQi4dwsUkkCd931ybDaTxMBnZbRfGdDcA,7931
5
5
  firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=k9IsEbdTHL9Cu49M4FpnQDEo2rnG6RqwmZAsK_EVJr4,21069
7
7
  tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
8
- firecrawl-2.13.0.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
9
- firecrawl-2.13.0.dist-info/METADATA,sha256=KjXkz2Xr1OXCiMElT5ZB7xXjoqYNVOCNix9MSNWWZMo,7166
10
- firecrawl-2.13.0.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
11
- firecrawl-2.13.0.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
12
- firecrawl-2.13.0.dist-info/RECORD,,
8
+ firecrawl-2.14.0.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
9
+ firecrawl-2.14.0.dist-info/METADATA,sha256=jY-cqG4nfHGJ9U0_tTbhuiGLlO47l1z9ZLLNUdXRqlY,7166
10
+ firecrawl-2.14.0.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
11
+ firecrawl-2.14.0.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
12
+ firecrawl-2.14.0.dist-info/RECORD,,