firecrawl-py 2.8.0__tar.gz → 2.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl-py might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl-py
3
- Version: 2.8.0
3
+ Version: 2.9.0
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
@@ -13,7 +13,7 @@ import os
13
13
 
14
14
  from .firecrawl import FirecrawlApp, AsyncFirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
15
15
 
16
- __version__ = "2.8.0"
16
+ __version__ = "2.9.0"
17
17
 
18
18
  # Define the logger for the Firecrawl project
19
19
  logger: logging.Logger = logging.getLogger("firecrawl")
@@ -263,6 +263,7 @@ class CrawlParams(pydantic.BaseModel):
263
263
  ignoreQueryParameters: Optional[bool] = None
264
264
  regexOnFullURL: Optional[bool] = None
265
265
  delay: Optional[int] = None # Delay in seconds between scrapes
266
+ maxConcurrency: Optional[int] = None
266
267
 
267
268
  class CrawlResponse(pydantic.BaseModel):
268
269
  """Response from crawling operations."""
@@ -686,6 +687,7 @@ class FirecrawlApp:
686
687
  max_discovery_depth: Optional[int] = None,
687
688
  limit: Optional[int] = None,
688
689
  allow_backward_links: Optional[bool] = None,
690
+ crawl_entire_domain: Optional[bool] = None,
689
691
  allow_external_links: Optional[bool] = None,
690
692
  ignore_sitemap: Optional[bool] = None,
691
693
  scrape_options: Optional[ScrapeOptions] = None,
@@ -694,6 +696,7 @@ class FirecrawlApp:
694
696
  ignore_query_parameters: Optional[bool] = None,
695
697
  regex_on_full_url: Optional[bool] = None,
696
698
  delay: Optional[int] = None,
699
+ max_concurrency: Optional[int] = None,
697
700
  poll_interval: Optional[int] = 2,
698
701
  idempotency_key: Optional[str] = None,
699
702
  **kwargs
@@ -708,7 +711,8 @@ class FirecrawlApp:
708
711
  max_depth (Optional[int]): Maximum crawl depth
709
712
  max_discovery_depth (Optional[int]): Maximum depth for finding new URLs
710
713
  limit (Optional[int]): Maximum pages to crawl
711
- allow_backward_links (Optional[bool]): Follow parent directory links
714
+ allow_backward_links (Optional[bool]): DEPRECATED: Use crawl_entire_domain instead
715
+ crawl_entire_domain (Optional[bool]): Follow parent directory links
712
716
  allow_external_links (Optional[bool]): Follow external domain links
713
717
  ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
714
718
  scrape_options (Optional[ScrapeOptions]): Page scraping configuration
@@ -717,6 +721,7 @@ class FirecrawlApp:
717
721
  ignore_query_parameters (Optional[bool]): Ignore URL parameters
718
722
  regex_on_full_url (Optional[bool]): Apply regex to full URLs
719
723
  delay (Optional[int]): Delay in seconds between scrapes
724
+ max_concurrency (Optional[int]): Maximum number of concurrent scrapes
720
725
  poll_interval (Optional[int]): Seconds between status checks (default: 2)
721
726
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
722
727
  **kwargs: Additional parameters to pass to the API
@@ -746,7 +751,9 @@ class FirecrawlApp:
746
751
  crawl_params['maxDiscoveryDepth'] = max_discovery_depth
747
752
  if limit is not None:
748
753
  crawl_params['limit'] = limit
749
- if allow_backward_links is not None:
754
+ if crawl_entire_domain is not None:
755
+ crawl_params['crawlEntireDomain'] = crawl_entire_domain
756
+ elif allow_backward_links is not None:
750
757
  crawl_params['allowBackwardLinks'] = allow_backward_links
751
758
  if allow_external_links is not None:
752
759
  crawl_params['allowExternalLinks'] = allow_external_links
@@ -764,7 +771,9 @@ class FirecrawlApp:
764
771
  crawl_params['regexOnFullURL'] = regex_on_full_url
765
772
  if delay is not None:
766
773
  crawl_params['delay'] = delay
767
-
774
+ if max_concurrency is not None:
775
+ crawl_params['maxConcurrency'] = max_concurrency
776
+
768
777
  # Add any additional kwargs
769
778
  crawl_params.update(kwargs)
770
779
 
@@ -797,6 +806,7 @@ class FirecrawlApp:
797
806
  max_discovery_depth: Optional[int] = None,
798
807
  limit: Optional[int] = None,
799
808
  allow_backward_links: Optional[bool] = None,
809
+ crawl_entire_domain: Optional[bool] = None,
800
810
  allow_external_links: Optional[bool] = None,
801
811
  ignore_sitemap: Optional[bool] = None,
802
812
  scrape_options: Optional[ScrapeOptions] = None,
@@ -818,7 +828,8 @@ class FirecrawlApp:
818
828
  max_depth (Optional[int]): Maximum crawl depth
819
829
  max_discovery_depth (Optional[int]): Maximum depth for finding new URLs
820
830
  limit (Optional[int]): Maximum pages to crawl
821
- allow_backward_links (Optional[bool]): Follow parent directory links
831
+ allow_backward_links (Optional[bool]): DEPRECATED: Use crawl_entire_domain instead
832
+ crawl_entire_domain (Optional[bool]): Follow parent directory links
822
833
  allow_external_links (Optional[bool]): Follow external domain links
823
834
  ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
824
835
  scrape_options (Optional[ScrapeOptions]): Page scraping configuration
@@ -826,6 +837,8 @@ class FirecrawlApp:
826
837
  deduplicate_similar_urls (Optional[bool]): Remove similar URLs
827
838
  ignore_query_parameters (Optional[bool]): Ignore URL parameters
828
839
  regex_on_full_url (Optional[bool]): Apply regex to full URLs
840
+ delay (Optional[int]): Delay in seconds between scrapes
841
+ max_concurrency (Optional[int]): Maximum number of concurrent scrapes
829
842
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
830
843
  **kwargs: Additional parameters to pass to the API
831
844
 
@@ -855,7 +868,9 @@ class FirecrawlApp:
855
868
  crawl_params['maxDiscoveryDepth'] = max_discovery_depth
856
869
  if limit is not None:
857
870
  crawl_params['limit'] = limit
858
- if allow_backward_links is not None:
871
+ if crawl_entire_domain is not None:
872
+ crawl_params['crawlEntireDomain'] = crawl_entire_domain
873
+ elif allow_backward_links is not None:
859
874
  crawl_params['allowBackwardLinks'] = allow_backward_links
860
875
  if allow_external_links is not None:
861
876
  crawl_params['allowExternalLinks'] = allow_external_links
@@ -873,7 +888,9 @@ class FirecrawlApp:
873
888
  crawl_params['regexOnFullURL'] = regex_on_full_url
874
889
  if delay is not None:
875
890
  crawl_params['delay'] = delay
876
-
891
+ if max_concurrency is not None:
892
+ crawl_params['maxConcurrency'] = max_concurrency
893
+
877
894
  # Add any additional kwargs
878
895
  crawl_params.update(kwargs)
879
896
 
@@ -1042,6 +1059,7 @@ class FirecrawlApp:
1042
1059
  max_discovery_depth: Optional[int] = None,
1043
1060
  limit: Optional[int] = None,
1044
1061
  allow_backward_links: Optional[bool] = None,
1062
+ crawl_entire_domain: Optional[bool] = None,
1045
1063
  allow_external_links: Optional[bool] = None,
1046
1064
  ignore_sitemap: Optional[bool] = None,
1047
1065
  scrape_options: Optional[ScrapeOptions] = None,
@@ -1049,6 +1067,8 @@ class FirecrawlApp:
1049
1067
  deduplicate_similar_urls: Optional[bool] = None,
1050
1068
  ignore_query_parameters: Optional[bool] = None,
1051
1069
  regex_on_full_url: Optional[bool] = None,
1070
+ delay: Optional[int] = None,
1071
+ max_concurrency: Optional[int] = None,
1052
1072
  idempotency_key: Optional[str] = None,
1053
1073
  **kwargs
1054
1074
  ) -> 'CrawlWatcher':
@@ -1062,7 +1082,8 @@ class FirecrawlApp:
1062
1082
  max_depth (Optional[int]): Maximum crawl depth
1063
1083
  max_discovery_depth (Optional[int]): Maximum depth for finding new URLs
1064
1084
  limit (Optional[int]): Maximum pages to crawl
1065
- allow_backward_links (Optional[bool]): Follow parent directory links
1085
+ allow_backward_links (Optional[bool]): DEPRECATED: Use crawl_entire_domain instead
1086
+ crawl_entire_domain (Optional[bool]): Follow parent directory links
1066
1087
  allow_external_links (Optional[bool]): Follow external domain links
1067
1088
  ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
1068
1089
  scrape_options (Optional[ScrapeOptions]): Page scraping configuration
@@ -1070,6 +1091,8 @@ class FirecrawlApp:
1070
1091
  deduplicate_similar_urls (Optional[bool]): Remove similar URLs
1071
1092
  ignore_query_parameters (Optional[bool]): Ignore URL parameters
1072
1093
  regex_on_full_url (Optional[bool]): Apply regex to full URLs
1094
+ delay (Optional[int]): Delay in seconds between scrapes
1095
+ max_concurrency (Optional[int]): Maximum number of concurrent scrapes
1073
1096
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
1074
1097
  **kwargs: Additional parameters to pass to the API
1075
1098
 
@@ -1094,6 +1117,8 @@ class FirecrawlApp:
1094
1117
  deduplicate_similar_urls=deduplicate_similar_urls,
1095
1118
  ignore_query_parameters=ignore_query_parameters,
1096
1119
  regex_on_full_url=regex_on_full_url,
1120
+ delay=delay,
1121
+ max_concurrency=max_concurrency,
1097
1122
  idempotency_key=idempotency_key,
1098
1123
  **kwargs
1099
1124
  )
@@ -1210,6 +1235,7 @@ class FirecrawlApp:
1210
1235
  actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
1211
1236
  agent: Optional[AgentOptions] = None,
1212
1237
  poll_interval: Optional[int] = 2,
1238
+ max_concurrency: Optional[int] = None,
1213
1239
  idempotency_key: Optional[str] = None,
1214
1240
  **kwargs
1215
1241
  ) -> BatchScrapeStatusResponse:
@@ -1235,6 +1261,7 @@ class FirecrawlApp:
1235
1261
  json_options (Optional[JsonConfig]): JSON extraction config
1236
1262
  actions (Optional[List[Union]]): Actions to perform
1237
1263
  agent (Optional[AgentOptions]): Agent configuration
1264
+ max_concurrency (Optional[int]): Maximum number of concurrent scrapes
1238
1265
  poll_interval (Optional[int]): Seconds between status checks (default: 2)
1239
1266
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
1240
1267
  **kwargs: Additional parameters to pass to the API
@@ -1294,7 +1321,9 @@ class FirecrawlApp:
1294
1321
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
1295
1322
  if agent is not None:
1296
1323
  scrape_params['agent'] = agent.dict(exclude_none=True)
1297
-
1324
+ if max_concurrency is not None:
1325
+ scrape_params['maxConcurrency'] = max_concurrency
1326
+
1298
1327
  # Add any additional kwargs
1299
1328
  scrape_params.update(kwargs)
1300
1329
 
@@ -1343,6 +1372,7 @@ class FirecrawlApp:
1343
1372
  json_options: Optional[JsonConfig] = None,
1344
1373
  actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
1345
1374
  agent: Optional[AgentOptions] = None,
1375
+ max_concurrency: Optional[int] = None,
1346
1376
  idempotency_key: Optional[str] = None,
1347
1377
  **kwargs
1348
1378
  ) -> BatchScrapeResponse:
@@ -1368,6 +1398,7 @@ class FirecrawlApp:
1368
1398
  json_options (Optional[JsonConfig]): JSON extraction config
1369
1399
  actions (Optional[List[Union]]): Actions to perform
1370
1400
  agent (Optional[AgentOptions]): Agent configuration
1401
+ max_concurrency (Optional[int]): Maximum number of concurrent scrapes
1371
1402
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
1372
1403
  **kwargs: Additional parameters to pass to the API
1373
1404
 
@@ -1427,7 +1458,9 @@ class FirecrawlApp:
1427
1458
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
1428
1459
  if agent is not None:
1429
1460
  scrape_params['agent'] = agent.dict(exclude_none=True)
1430
-
1461
+ if max_concurrency is not None:
1462
+ scrape_params['maxConcurrency'] = max_concurrency
1463
+
1431
1464
  # Add any additional kwargs
1432
1465
  scrape_params.update(kwargs)
1433
1466
 
@@ -1475,6 +1508,7 @@ class FirecrawlApp:
1475
1508
  json_options: Optional[JsonConfig] = None,
1476
1509
  actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
1477
1510
  agent: Optional[AgentOptions] = None,
1511
+ max_concurrency: Optional[int] = None,
1478
1512
  idempotency_key: Optional[str] = None,
1479
1513
  **kwargs
1480
1514
  ) -> 'CrawlWatcher':
@@ -1500,6 +1534,7 @@ class FirecrawlApp:
1500
1534
  json_options (Optional[JsonConfig]): JSON extraction config
1501
1535
  actions (Optional[List[Union]]): Actions to perform
1502
1536
  agent (Optional[AgentOptions]): Agent configuration
1537
+ max_concurrency (Optional[int]): Maximum number of concurrent scrapes
1503
1538
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
1504
1539
  **kwargs: Additional parameters to pass to the API
1505
1540
 
@@ -1555,7 +1590,9 @@ class FirecrawlApp:
1555
1590
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
1556
1591
  if agent is not None:
1557
1592
  scrape_params['agent'] = agent.dict(exclude_none=True)
1558
-
1593
+ if max_concurrency is not None:
1594
+ scrape_params['maxConcurrency'] = max_concurrency
1595
+
1559
1596
  # Add any additional kwargs
1560
1597
  scrape_params.update(kwargs)
1561
1598
 
@@ -2784,7 +2821,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
2784
2821
  * limit - Maximum pages to crawl
2785
2822
 
2786
2823
  Link Following:
2787
- * allowBackwardLinks - Follow parent directory links
2824
+ * allowBackwardLinks - DEPRECATED: Use crawlEntireDomain instead
2825
+ * crawlEntireDomain - Follow parent directory links
2788
2826
  * allowExternalLinks - Follow external domain links
2789
2827
  * ignoreSitemap - Skip sitemap.xml processing
2790
2828
 
@@ -3263,6 +3301,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3263
3301
  max_discovery_depth: Optional[int] = None,
3264
3302
  limit: Optional[int] = None,
3265
3303
  allow_backward_links: Optional[bool] = None,
3304
+ crawl_entire_domain: Optional[bool] = None,
3266
3305
  allow_external_links: Optional[bool] = None,
3267
3306
  ignore_sitemap: Optional[bool] = None,
3268
3307
  scrape_options: Optional[ScrapeOptions] = None,
@@ -3285,7 +3324,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
3285
3324
  max_depth (Optional[int]): Maximum crawl depth
3286
3325
  max_discovery_depth (Optional[int]): Maximum depth for finding new URLs
3287
3326
  limit (Optional[int]): Maximum pages to crawl
3288
- allow_backward_links (Optional[bool]): Follow parent directory links
3327
+ allow_backward_links (Optional[bool]): DEPRECATED: Use crawl_entire_domain instead
3328
+ crawl_entire_domain (Optional[bool]): Follow parent directory links
3289
3329
  allow_external_links (Optional[bool]): Follow external domain links
3290
3330
  ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
3291
3331
  scrape_options (Optional[ScrapeOptions]): Page scraping configuration
@@ -3323,7 +3363,9 @@ class AsyncFirecrawlApp(FirecrawlApp):
3323
3363
  crawl_params['maxDiscoveryDepth'] = max_discovery_depth
3324
3364
  if limit is not None:
3325
3365
  crawl_params['limit'] = limit
3326
- if allow_backward_links is not None:
3366
+ if crawl_entire_domain is not None:
3367
+ crawl_params['crawlEntireDomain'] = crawl_entire_domain
3368
+ elif allow_backward_links is not None:
3327
3369
  crawl_params['allowBackwardLinks'] = allow_backward_links
3328
3370
  if allow_external_links is not None:
3329
3371
  crawl_params['allowExternalLinks'] = allow_external_links
@@ -3375,6 +3417,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3375
3417
  max_discovery_depth: Optional[int] = None,
3376
3418
  limit: Optional[int] = None,
3377
3419
  allow_backward_links: Optional[bool] = None,
3420
+ crawl_entire_domain: Optional[bool] = None,
3378
3421
  allow_external_links: Optional[bool] = None,
3379
3422
  ignore_sitemap: Optional[bool] = None,
3380
3423
  scrape_options: Optional[ScrapeOptions] = None,
@@ -3397,7 +3440,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
3397
3440
  max_depth (Optional[int]): Maximum crawl depth
3398
3441
  max_discovery_depth (Optional[int]): Maximum depth for finding new URLs
3399
3442
  limit (Optional[int]): Maximum pages to crawl
3400
- allow_backward_links (Optional[bool]): Follow parent directory links
3443
+ allow_backward_links (Optional[bool]): DEPRECATED: Use crawl_entire_domain instead
3444
+ crawl_entire_domain (Optional[bool]): Follow parent directory links
3401
3445
  allow_external_links (Optional[bool]): Follow external domain links
3402
3446
  ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
3403
3447
  scrape_options (Optional[ScrapeOptions]): Page scraping configuration
@@ -3431,7 +3475,9 @@ class AsyncFirecrawlApp(FirecrawlApp):
3431
3475
  crawl_params['maxDiscoveryDepth'] = max_discovery_depth
3432
3476
  if limit is not None:
3433
3477
  crawl_params['limit'] = limit
3434
- if allow_backward_links is not None:
3478
+ if crawl_entire_domain is not None:
3479
+ crawl_params['crawlEntireDomain'] = crawl_entire_domain
3480
+ elif allow_backward_links is not None:
3435
3481
  crawl_params['allowBackwardLinks'] = allow_backward_links
3436
3482
  if allow_external_links is not None:
3437
3483
  crawl_params['allowExternalLinks'] = allow_external_links
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl-py
3
- Version: 2.8.0
3
+ Version: 2.9.0
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
File without changes
File without changes
File without changes
File without changes