firecrawl 2.13.0__tar.gz → 2.15.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 2.13.0
3
+ Version: 2.15.0
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
@@ -13,7 +13,7 @@ import os
13
13
 
14
14
  from .firecrawl import FirecrawlApp, AsyncFirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
15
15
 
16
- __version__ = "2.13.0"
16
+ __version__ = "2.15.0"
17
17
 
18
18
  # Define the logger for the Firecrawl project
19
19
  logger: logging.Logger = logging.getLogger("firecrawl")
@@ -482,6 +482,7 @@ class FirecrawlApp:
482
482
  change_tracking_options: Optional[ChangeTrackingOptions] = None,
483
483
  max_age: Optional[int] = None,
484
484
  store_in_cache: Optional[bool] = None,
485
+ zero_data_retention: Optional[bool] = None,
485
486
  **kwargs) -> ScrapeResponse[Any]:
486
487
  """
487
488
  Scrape and extract content from a URL.
@@ -504,6 +505,7 @@ class FirecrawlApp:
504
505
  json_options (Optional[JsonConfig]): JSON extraction settings
505
506
  actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction, PDFAction]]]): Actions to perform
506
507
  change_tracking_options (Optional[ChangeTrackingOptions]): Change tracking settings
508
+ zero_data_retention (Optional[bool]): Whether to delete data after scrape is done
507
509
 
508
510
 
509
511
  Returns:
@@ -569,6 +571,8 @@ class FirecrawlApp:
569
571
  scrape_params['maxAge'] = max_age
570
572
  if store_in_cache is not None:
571
573
  scrape_params['storeInCache'] = store_in_cache
574
+ if zero_data_retention is not None:
575
+ scrape_params['zeroDataRetention'] = zero_data_retention
572
576
 
573
577
  scrape_params.update(kwargs)
574
578
 
@@ -663,12 +667,16 @@ class FirecrawlApp:
663
667
 
664
668
  # Add any additional kwargs
665
669
  search_params.update(kwargs)
670
+ _integration = search_params.get('integration')
666
671
 
667
672
  # Create final params object
668
673
  final_params = SearchParams(query=query, **search_params)
669
674
  params_dict = final_params.dict(exclude_none=True)
670
675
  params_dict['origin'] = f"python-sdk@{version}"
671
676
 
677
+ if _integration:
678
+ params_dict['integration'] = _integration
679
+
672
680
  # Make request
673
681
  response = requests.post(
674
682
  f"{self.api_url}/v1/search",
@@ -711,6 +719,7 @@ class FirecrawlApp:
711
719
  delay: Optional[int] = None,
712
720
  allow_subdomains: Optional[bool] = None,
713
721
  max_concurrency: Optional[int] = None,
722
+ zero_data_retention: Optional[bool] = None,
714
723
  poll_interval: Optional[int] = 2,
715
724
  idempotency_key: Optional[str] = None,
716
725
  **kwargs
@@ -737,6 +746,7 @@ class FirecrawlApp:
737
746
  delay (Optional[int]): Delay in seconds between scrapes
738
747
  allow_subdomains (Optional[bool]): Follow subdomains
739
748
  max_concurrency (Optional[int]): Maximum number of concurrent scrapes
749
+ zero_data_retention (Optional[bool]): Whether to delete data after 24 hours
740
750
  poll_interval (Optional[int]): Seconds between status checks (default: 2)
741
751
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
742
752
  **kwargs: Additional parameters to pass to the API
@@ -790,9 +800,11 @@ class FirecrawlApp:
790
800
  crawl_params['allowSubdomains'] = allow_subdomains
791
801
  if max_concurrency is not None:
792
802
  crawl_params['maxConcurrency'] = max_concurrency
793
-
803
+ if zero_data_retention is not None:
804
+ crawl_params['zeroDataRetention'] = zero_data_retention
794
805
  # Add any additional kwargs
795
806
  crawl_params.update(kwargs)
807
+ _integration = crawl_params.get('integration')
796
808
 
797
809
  # Create final params object
798
810
  final_params = CrawlParams(**crawl_params)
@@ -800,6 +812,9 @@ class FirecrawlApp:
800
812
  params_dict['url'] = url
801
813
  params_dict['origin'] = f"python-sdk@{version}"
802
814
 
815
+ if _integration:
816
+ params_dict['integration'] = _integration
817
+
803
818
  # Make request
804
819
  headers = self._prepare_headers(idempotency_key)
805
820
  response = self._post_request(f'{self.api_url}/v1/crawl', params_dict, headers)
@@ -834,6 +849,7 @@ class FirecrawlApp:
834
849
  delay: Optional[int] = None,
835
850
  allow_subdomains: Optional[bool] = None,
836
851
  max_concurrency: Optional[int] = None,
852
+ zero_data_retention: Optional[bool] = None,
837
853
  idempotency_key: Optional[str] = None,
838
854
  **kwargs
839
855
  ) -> CrawlResponse:
@@ -859,6 +875,7 @@ class FirecrawlApp:
859
875
  delay (Optional[int]): Delay in seconds between scrapes
860
876
  allow_subdomains (Optional[bool]): Follow subdomains
861
877
  max_concurrency (Optional[int]): Maximum number of concurrent scrapes
878
+ zero_data_retention (Optional[bool]): Whether to delete data after 24 hours
862
879
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
863
880
  **kwargs: Additional parameters to pass to the API
864
881
 
@@ -912,7 +929,8 @@ class FirecrawlApp:
912
929
  crawl_params['allowSubdomains'] = allow_subdomains
913
930
  if max_concurrency is not None:
914
931
  crawl_params['maxConcurrency'] = max_concurrency
915
-
932
+ if zero_data_retention is not None:
933
+ crawl_params['zeroDataRetention'] = zero_data_retention
916
934
  # Add any additional kwargs
917
935
  crawl_params.update(kwargs)
918
936
 
@@ -1092,6 +1110,7 @@ class FirecrawlApp:
1092
1110
  delay: Optional[int] = None,
1093
1111
  allow_subdomains: Optional[bool] = None,
1094
1112
  max_concurrency: Optional[int] = None,
1113
+ zero_data_retention: Optional[bool] = None,
1095
1114
  idempotency_key: Optional[str] = None,
1096
1115
  **kwargs
1097
1116
  ) -> 'CrawlWatcher':
@@ -1117,6 +1136,7 @@ class FirecrawlApp:
1117
1136
  delay (Optional[int]): Delay in seconds between scrapes
1118
1137
  allow_subdomains (Optional[bool]): Follow subdomains
1119
1138
  max_concurrency (Optional[int]): Maximum number of concurrent scrapes
1139
+ zero_data_retention (Optional[bool]): Whether to delete data after 24 hours
1120
1140
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
1121
1141
  **kwargs: Additional parameters to pass to the API
1122
1142
 
@@ -1144,6 +1164,7 @@ class FirecrawlApp:
1144
1164
  delay=delay,
1145
1165
  allow_subdomains=allow_subdomains,
1146
1166
  max_concurrency=max_concurrency,
1167
+ zero_data_retention=zero_data_retention,
1147
1168
  idempotency_key=idempotency_key,
1148
1169
  **kwargs
1149
1170
  )
@@ -1210,6 +1231,7 @@ class FirecrawlApp:
1210
1231
 
1211
1232
  # Add any additional kwargs
1212
1233
  map_params.update(kwargs)
1234
+ _integration = map_params.get('integration')
1213
1235
 
1214
1236
  # Create final params object
1215
1237
  final_params = MapParams(**map_params)
@@ -1217,6 +1239,9 @@ class FirecrawlApp:
1217
1239
  params_dict['url'] = url
1218
1240
  params_dict['origin'] = f"python-sdk@{version}"
1219
1241
 
1242
+ if _integration:
1243
+ params_dict['integration'] = _integration
1244
+
1220
1245
  # Make request
1221
1246
  response = requests.post(
1222
1247
  f"{self.api_url}/v1/map",
@@ -1261,6 +1286,7 @@ class FirecrawlApp:
1261
1286
  agent: Optional[AgentOptions] = None,
1262
1287
  poll_interval: Optional[int] = 2,
1263
1288
  max_concurrency: Optional[int] = None,
1289
+ zero_data_retention: Optional[bool] = None,
1264
1290
  idempotency_key: Optional[str] = None,
1265
1291
  **kwargs
1266
1292
  ) -> BatchScrapeStatusResponse:
@@ -1348,6 +1374,8 @@ class FirecrawlApp:
1348
1374
  scrape_params['agent'] = agent.dict(exclude_none=True)
1349
1375
  if max_concurrency is not None:
1350
1376
  scrape_params['maxConcurrency'] = max_concurrency
1377
+ if zero_data_retention is not None:
1378
+ scrape_params['zeroDataRetention'] = zero_data_retention
1351
1379
 
1352
1380
  # Add any additional kwargs
1353
1381
  scrape_params.update(kwargs)
@@ -1399,6 +1427,7 @@ class FirecrawlApp:
1399
1427
  agent: Optional[AgentOptions] = None,
1400
1428
  max_concurrency: Optional[int] = None,
1401
1429
  idempotency_key: Optional[str] = None,
1430
+ zero_data_retention: Optional[bool] = None,
1402
1431
  **kwargs
1403
1432
  ) -> BatchScrapeResponse:
1404
1433
  """
@@ -1424,6 +1453,7 @@ class FirecrawlApp:
1424
1453
  actions (Optional[List[Union]]): Actions to perform
1425
1454
  agent (Optional[AgentOptions]): Agent configuration
1426
1455
  max_concurrency (Optional[int]): Maximum number of concurrent scrapes
1456
+ zero_data_retention (Optional[bool]): Whether to delete data after 24 hours
1427
1457
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
1428
1458
  **kwargs: Additional parameters to pass to the API
1429
1459
 
@@ -1485,6 +1515,8 @@ class FirecrawlApp:
1485
1515
  scrape_params['agent'] = agent.dict(exclude_none=True)
1486
1516
  if max_concurrency is not None:
1487
1517
  scrape_params['maxConcurrency'] = max_concurrency
1518
+ if zero_data_retention is not None:
1519
+ scrape_params['zeroDataRetention'] = zero_data_retention
1488
1520
 
1489
1521
  # Add any additional kwargs
1490
1522
  scrape_params.update(kwargs)
@@ -1534,6 +1566,7 @@ class FirecrawlApp:
1534
1566
  actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction, PDFAction]]] = None,
1535
1567
  agent: Optional[AgentOptions] = None,
1536
1568
  max_concurrency: Optional[int] = None,
1569
+ zero_data_retention: Optional[bool] = None,
1537
1570
  idempotency_key: Optional[str] = None,
1538
1571
  **kwargs
1539
1572
  ) -> 'CrawlWatcher':
@@ -1560,6 +1593,7 @@ class FirecrawlApp:
1560
1593
  actions (Optional[List[Union]]): Actions to perform
1561
1594
  agent (Optional[AgentOptions]): Agent configuration
1562
1595
  max_concurrency (Optional[int]): Maximum number of concurrent scrapes
1596
+ zero_data_retention (Optional[bool]): Whether to delete data after 24 hours
1563
1597
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
1564
1598
  **kwargs: Additional parameters to pass to the API
1565
1599
 
@@ -1617,6 +1651,8 @@ class FirecrawlApp:
1617
1651
  scrape_params['agent'] = agent.dict(exclude_none=True)
1618
1652
  if max_concurrency is not None:
1619
1653
  scrape_params['maxConcurrency'] = max_concurrency
1654
+ if zero_data_retention is not None:
1655
+ scrape_params['zeroDataRetention'] = zero_data_retention
1620
1656
 
1621
1657
  # Add any additional kwargs
1622
1658
  scrape_params.update(kwargs)
@@ -1749,7 +1785,8 @@ class FirecrawlApp:
1749
1785
  allow_external_links: Optional[bool] = False,
1750
1786
  enable_web_search: Optional[bool] = False,
1751
1787
  show_sources: Optional[bool] = False,
1752
- agent: Optional[Dict[str, Any]] = None) -> ExtractResponse[Any]:
1788
+ agent: Optional[Dict[str, Any]] = None,
1789
+ **kwargs) -> ExtractResponse[Any]:
1753
1790
  """
1754
1791
  Extract structured information from URLs.
1755
1792
 
@@ -1762,6 +1799,7 @@ class FirecrawlApp:
1762
1799
  enable_web_search (Optional[bool]): Enable web search
1763
1800
  show_sources (Optional[bool]): Include source URLs
1764
1801
  agent (Optional[Dict[str, Any]]): Agent configuration
1802
+ **kwargs: Additional parameters to pass to the API
1765
1803
 
1766
1804
  Returns:
1767
1805
  ExtractResponse[Any] with:
@@ -1772,6 +1810,9 @@ class FirecrawlApp:
1772
1810
  Raises:
1773
1811
  ValueError: If prompt/schema missing or extraction fails
1774
1812
  """
1813
+ # Validate any additional kwargs
1814
+ self._validate_kwargs(kwargs, "extract")
1815
+
1775
1816
  headers = self._prepare_headers()
1776
1817
 
1777
1818
  if not prompt and not schema:
@@ -1801,6 +1842,9 @@ class FirecrawlApp:
1801
1842
  if agent:
1802
1843
  request_data['agent'] = agent
1803
1844
 
1845
+ # Add any additional kwargs
1846
+ request_data.update(kwargs)
1847
+
1804
1848
  try:
1805
1849
  # Send the initial extract request
1806
1850
  response = self._post_request(
@@ -2549,12 +2593,13 @@ class FirecrawlApp:
2549
2593
  method_params = {
2550
2594
  "scrape_url": {"formats", "include_tags", "exclude_tags", "only_main_content", "wait_for",
2551
2595
  "timeout", "location", "mobile", "skip_tls_verification", "remove_base64_images",
2552
- "block_ads", "proxy", "extract", "json_options", "actions", "change_tracking_options"},
2553
- "search": {"limit", "tbs", "filter", "lang", "country", "location", "timeout", "scrape_options"},
2596
+ "block_ads", "proxy", "extract", "json_options", "actions", "change_tracking_options", "integration"},
2597
+ "search": {"limit", "tbs", "filter", "lang", "country", "location", "timeout", "scrape_options", "integration"},
2554
2598
  "crawl_url": {"include_paths", "exclude_paths", "max_depth", "max_discovery_depth", "limit",
2555
2599
  "allow_backward_links", "allow_external_links", "ignore_sitemap", "scrape_options",
2556
- "webhook", "deduplicate_similar_urls", "ignore_query_parameters", "regex_on_full_url"},
2557
- "map_url": {"search", "ignore_sitemap", "include_subdomains", "sitemap_only", "limit", "timeout"},
2600
+ "webhook", "deduplicate_similar_urls", "ignore_query_parameters", "regex_on_full_url", "integration"},
2601
+ "map_url": {"search", "ignore_sitemap", "include_subdomains", "sitemap_only", "limit", "timeout", "integration"},
2602
+ "extract": {"prompt", "schema", "system_prompt", "allow_external_links", "enable_web_search", "show_sources", "agent", "integration"},
2558
2603
  "batch_scrape_urls": {"formats", "headers", "include_tags", "exclude_tags", "only_main_content",
2559
2604
  "wait_for", "timeout", "location", "mobile", "skip_tls_verification",
2560
2605
  "remove_base64_images", "block_ads", "proxy", "extract", "json_options",
@@ -3204,6 +3249,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3204
3249
  json_options: Optional[JsonConfig] = None,
3205
3250
  actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction, PDFAction]]] = None,
3206
3251
  agent: Optional[AgentOptions] = None,
3252
+ zero_data_retention: Optional[bool] = None,
3207
3253
  idempotency_key: Optional[str] = None,
3208
3254
  **kwargs
3209
3255
  ) -> BatchScrapeResponse:
@@ -3229,6 +3275,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3229
3275
  json_options (Optional[JsonConfig]): JSON extraction config
3230
3276
  actions (Optional[List[Union]]): Actions to perform
3231
3277
  agent (Optional[AgentOptions]): Agent configuration
3278
+ zero_data_retention (Optional[bool]): Whether to delete data after 24 hours
3232
3279
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
3233
3280
  **kwargs: Additional parameters to pass to the API
3234
3281
 
@@ -3288,7 +3335,9 @@ class AsyncFirecrawlApp(FirecrawlApp):
3288
3335
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
3289
3336
  if agent is not None:
3290
3337
  scrape_params['agent'] = agent.dict(exclude_none=True)
3291
-
3338
+ if zero_data_retention is not None:
3339
+ scrape_params['zeroDataRetention'] = zero_data_retention
3340
+
3292
3341
  # Add any additional kwargs
3293
3342
  scrape_params.update(kwargs)
3294
3343
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 2.13.0
3
+ Version: 2.15.0
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
File without changes
File without changes
File without changes
File without changes
File without changes