firecrawl 2.7.1__tar.gz → 2.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 2.7.1
3
+ Version: 2.9.0
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
@@ -13,7 +13,7 @@ import os
13
13
 
14
14
  from .firecrawl import FirecrawlApp, AsyncFirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
15
15
 
16
- __version__ = "2.7.1"
16
+ __version__ = "2.9.0"
17
17
 
18
18
  # Define the logger for the Firecrawl project
19
19
  logger: logging.Logger = logging.getLogger("firecrawl")
@@ -140,6 +140,7 @@ class ChangeTrackingOptions(pydantic.BaseModel):
140
140
  modes: Optional[List[Literal["git-diff", "json"]]] = None
141
141
  schema: Optional[Any] = None
142
142
  prompt: Optional[str] = None
143
+ tag: Optional[str] = None
143
144
 
144
145
  class ScrapeOptions(pydantic.BaseModel):
145
146
  """Parameters for scraping operations."""
@@ -157,6 +158,8 @@ class ScrapeOptions(pydantic.BaseModel):
157
158
  blockAds: Optional[bool] = None
158
159
  proxy: Optional[Literal["basic", "stealth", "auto"]] = None
159
160
  changeTrackingOptions: Optional[ChangeTrackingOptions] = None
161
+ maxAge: Optional[int] = None
162
+ storeInCache: Optional[bool] = None
160
163
 
161
164
  class WaitAction(pydantic.BaseModel):
162
165
  """Wait action to perform during scraping."""
@@ -260,6 +263,7 @@ class CrawlParams(pydantic.BaseModel):
260
263
  ignoreQueryParameters: Optional[bool] = None
261
264
  regexOnFullURL: Optional[bool] = None
262
265
  delay: Optional[int] = None # Delay in seconds between scrapes
266
+ maxConcurrency: Optional[int] = None
263
267
 
264
268
  class CrawlResponse(pydantic.BaseModel):
265
269
  """Response from crawling operations."""
@@ -292,6 +296,7 @@ class MapParams(pydantic.BaseModel):
292
296
  sitemapOnly: Optional[bool] = None
293
297
  limit: Optional[int] = None
294
298
  timeout: Optional[int] = None
299
+ useIndex: Optional[bool] = None
295
300
 
296
301
  class MapResponse(pydantic.BaseModel):
297
302
  """Response from mapping operations."""
@@ -464,6 +469,8 @@ class FirecrawlApp:
464
469
  json_options: Optional[JsonConfig] = None,
465
470
  actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
466
471
  change_tracking_options: Optional[ChangeTrackingOptions] = None,
472
+ max_age: Optional[int] = None,
473
+ store_in_cache: Optional[bool] = None,
467
474
  **kwargs) -> ScrapeResponse[Any]:
468
475
  """
469
476
  Scrape and extract content from a URL.
@@ -545,6 +552,10 @@ class FirecrawlApp:
545
552
  scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(exclude_none=True) for action in actions]
546
553
  if change_tracking_options:
547
554
  scrape_params['changeTrackingOptions'] = change_tracking_options if isinstance(change_tracking_options, dict) else change_tracking_options.dict(exclude_none=True)
555
+ if max_age is not None:
556
+ scrape_params['maxAge'] = max_age
557
+ if store_in_cache is not None:
558
+ scrape_params['storeInCache'] = store_in_cache
548
559
 
549
560
  scrape_params.update(kwargs)
550
561
 
@@ -676,6 +687,7 @@ class FirecrawlApp:
676
687
  max_discovery_depth: Optional[int] = None,
677
688
  limit: Optional[int] = None,
678
689
  allow_backward_links: Optional[bool] = None,
690
+ crawl_entire_domain: Optional[bool] = None,
679
691
  allow_external_links: Optional[bool] = None,
680
692
  ignore_sitemap: Optional[bool] = None,
681
693
  scrape_options: Optional[ScrapeOptions] = None,
@@ -684,6 +696,7 @@ class FirecrawlApp:
684
696
  ignore_query_parameters: Optional[bool] = None,
685
697
  regex_on_full_url: Optional[bool] = None,
686
698
  delay: Optional[int] = None,
699
+ max_concurrency: Optional[int] = None,
687
700
  poll_interval: Optional[int] = 2,
688
701
  idempotency_key: Optional[str] = None,
689
702
  **kwargs
@@ -698,7 +711,8 @@ class FirecrawlApp:
698
711
  max_depth (Optional[int]): Maximum crawl depth
699
712
  max_discovery_depth (Optional[int]): Maximum depth for finding new URLs
700
713
  limit (Optional[int]): Maximum pages to crawl
701
- allow_backward_links (Optional[bool]): Follow parent directory links
714
+ allow_backward_links (Optional[bool]): DEPRECATED: Use crawl_entire_domain instead
715
+ crawl_entire_domain (Optional[bool]): Follow parent directory links
702
716
  allow_external_links (Optional[bool]): Follow external domain links
703
717
  ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
704
718
  scrape_options (Optional[ScrapeOptions]): Page scraping configuration
@@ -707,6 +721,7 @@ class FirecrawlApp:
707
721
  ignore_query_parameters (Optional[bool]): Ignore URL parameters
708
722
  regex_on_full_url (Optional[bool]): Apply regex to full URLs
709
723
  delay (Optional[int]): Delay in seconds between scrapes
724
+ max_concurrency (Optional[int]): Maximum number of concurrent scrapes
710
725
  poll_interval (Optional[int]): Seconds between status checks (default: 2)
711
726
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
712
727
  **kwargs: Additional parameters to pass to the API
@@ -736,7 +751,9 @@ class FirecrawlApp:
736
751
  crawl_params['maxDiscoveryDepth'] = max_discovery_depth
737
752
  if limit is not None:
738
753
  crawl_params['limit'] = limit
739
- if allow_backward_links is not None:
754
+ if crawl_entire_domain is not None:
755
+ crawl_params['crawlEntireDomain'] = crawl_entire_domain
756
+ elif allow_backward_links is not None:
740
757
  crawl_params['allowBackwardLinks'] = allow_backward_links
741
758
  if allow_external_links is not None:
742
759
  crawl_params['allowExternalLinks'] = allow_external_links
@@ -754,7 +771,9 @@ class FirecrawlApp:
754
771
  crawl_params['regexOnFullURL'] = regex_on_full_url
755
772
  if delay is not None:
756
773
  crawl_params['delay'] = delay
757
-
774
+ if max_concurrency is not None:
775
+ crawl_params['maxConcurrency'] = max_concurrency
776
+
758
777
  # Add any additional kwargs
759
778
  crawl_params.update(kwargs)
760
779
 
@@ -787,6 +806,7 @@ class FirecrawlApp:
787
806
  max_discovery_depth: Optional[int] = None,
788
807
  limit: Optional[int] = None,
789
808
  allow_backward_links: Optional[bool] = None,
809
+ crawl_entire_domain: Optional[bool] = None,
790
810
  allow_external_links: Optional[bool] = None,
791
811
  ignore_sitemap: Optional[bool] = None,
792
812
  scrape_options: Optional[ScrapeOptions] = None,
@@ -808,7 +828,8 @@ class FirecrawlApp:
808
828
  max_depth (Optional[int]): Maximum crawl depth
809
829
  max_discovery_depth (Optional[int]): Maximum depth for finding new URLs
810
830
  limit (Optional[int]): Maximum pages to crawl
811
- allow_backward_links (Optional[bool]): Follow parent directory links
831
+ allow_backward_links (Optional[bool]): DEPRECATED: Use crawl_entire_domain instead
832
+ crawl_entire_domain (Optional[bool]): Follow parent directory links
812
833
  allow_external_links (Optional[bool]): Follow external domain links
813
834
  ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
814
835
  scrape_options (Optional[ScrapeOptions]): Page scraping configuration
@@ -816,6 +837,8 @@ class FirecrawlApp:
816
837
  deduplicate_similar_urls (Optional[bool]): Remove similar URLs
817
838
  ignore_query_parameters (Optional[bool]): Ignore URL parameters
818
839
  regex_on_full_url (Optional[bool]): Apply regex to full URLs
840
+ delay (Optional[int]): Delay in seconds between scrapes
841
+ max_concurrency (Optional[int]): Maximum number of concurrent scrapes
819
842
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
820
843
  **kwargs: Additional parameters to pass to the API
821
844
 
@@ -845,7 +868,9 @@ class FirecrawlApp:
845
868
  crawl_params['maxDiscoveryDepth'] = max_discovery_depth
846
869
  if limit is not None:
847
870
  crawl_params['limit'] = limit
848
- if allow_backward_links is not None:
871
+ if crawl_entire_domain is not None:
872
+ crawl_params['crawlEntireDomain'] = crawl_entire_domain
873
+ elif allow_backward_links is not None:
849
874
  crawl_params['allowBackwardLinks'] = allow_backward_links
850
875
  if allow_external_links is not None:
851
876
  crawl_params['allowExternalLinks'] = allow_external_links
@@ -863,7 +888,9 @@ class FirecrawlApp:
863
888
  crawl_params['regexOnFullURL'] = regex_on_full_url
864
889
  if delay is not None:
865
890
  crawl_params['delay'] = delay
866
-
891
+ if max_concurrency is not None:
892
+ crawl_params['maxConcurrency'] = max_concurrency
893
+
867
894
  # Add any additional kwargs
868
895
  crawl_params.update(kwargs)
869
896
 
@@ -1032,6 +1059,7 @@ class FirecrawlApp:
1032
1059
  max_discovery_depth: Optional[int] = None,
1033
1060
  limit: Optional[int] = None,
1034
1061
  allow_backward_links: Optional[bool] = None,
1062
+ crawl_entire_domain: Optional[bool] = None,
1035
1063
  allow_external_links: Optional[bool] = None,
1036
1064
  ignore_sitemap: Optional[bool] = None,
1037
1065
  scrape_options: Optional[ScrapeOptions] = None,
@@ -1039,6 +1067,8 @@ class FirecrawlApp:
1039
1067
  deduplicate_similar_urls: Optional[bool] = None,
1040
1068
  ignore_query_parameters: Optional[bool] = None,
1041
1069
  regex_on_full_url: Optional[bool] = None,
1070
+ delay: Optional[int] = None,
1071
+ max_concurrency: Optional[int] = None,
1042
1072
  idempotency_key: Optional[str] = None,
1043
1073
  **kwargs
1044
1074
  ) -> 'CrawlWatcher':
@@ -1052,7 +1082,8 @@ class FirecrawlApp:
1052
1082
  max_depth (Optional[int]): Maximum crawl depth
1053
1083
  max_discovery_depth (Optional[int]): Maximum depth for finding new URLs
1054
1084
  limit (Optional[int]): Maximum pages to crawl
1055
- allow_backward_links (Optional[bool]): Follow parent directory links
1085
+ allow_backward_links (Optional[bool]): DEPRECATED: Use crawl_entire_domain instead
1086
+ crawl_entire_domain (Optional[bool]): Follow parent directory links
1056
1087
  allow_external_links (Optional[bool]): Follow external domain links
1057
1088
  ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
1058
1089
  scrape_options (Optional[ScrapeOptions]): Page scraping configuration
@@ -1060,6 +1091,8 @@ class FirecrawlApp:
1060
1091
  deduplicate_similar_urls (Optional[bool]): Remove similar URLs
1061
1092
  ignore_query_parameters (Optional[bool]): Ignore URL parameters
1062
1093
  regex_on_full_url (Optional[bool]): Apply regex to full URLs
1094
+ delay (Optional[int]): Delay in seconds between scrapes
1095
+ max_concurrency (Optional[int]): Maximum number of concurrent scrapes
1063
1096
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
1064
1097
  **kwargs: Additional parameters to pass to the API
1065
1098
 
@@ -1084,6 +1117,8 @@ class FirecrawlApp:
1084
1117
  deduplicate_similar_urls=deduplicate_similar_urls,
1085
1118
  ignore_query_parameters=ignore_query_parameters,
1086
1119
  regex_on_full_url=regex_on_full_url,
1120
+ delay=delay,
1121
+ max_concurrency=max_concurrency,
1087
1122
  idempotency_key=idempotency_key,
1088
1123
  **kwargs
1089
1124
  )
@@ -1102,6 +1137,7 @@ class FirecrawlApp:
1102
1137
  sitemap_only: Optional[bool] = None,
1103
1138
  limit: Optional[int] = None,
1104
1139
  timeout: Optional[int] = None,
1140
+ use_index: Optional[bool] = None,
1105
1141
  **kwargs) -> MapResponse:
1106
1142
  """
1107
1143
  Map and discover links from a URL.
@@ -1144,7 +1180,9 @@ class FirecrawlApp:
1144
1180
  map_params['limit'] = limit
1145
1181
  if timeout is not None:
1146
1182
  map_params['timeout'] = timeout
1147
-
1183
+ if use_index is not None:
1184
+ map_params['useIndex'] = use_index
1185
+
1148
1186
  # Add any additional kwargs
1149
1187
  map_params.update(kwargs)
1150
1188
 
@@ -1197,6 +1235,7 @@ class FirecrawlApp:
1197
1235
  actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
1198
1236
  agent: Optional[AgentOptions] = None,
1199
1237
  poll_interval: Optional[int] = 2,
1238
+ max_concurrency: Optional[int] = None,
1200
1239
  idempotency_key: Optional[str] = None,
1201
1240
  **kwargs
1202
1241
  ) -> BatchScrapeStatusResponse:
@@ -1222,6 +1261,7 @@ class FirecrawlApp:
1222
1261
  json_options (Optional[JsonConfig]): JSON extraction config
1223
1262
  actions (Optional[List[Union]]): Actions to perform
1224
1263
  agent (Optional[AgentOptions]): Agent configuration
1264
+ max_concurrency (Optional[int]): Maximum number of concurrent scrapes
1225
1265
  poll_interval (Optional[int]): Seconds between status checks (default: 2)
1226
1266
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
1227
1267
  **kwargs: Additional parameters to pass to the API
@@ -1281,7 +1321,9 @@ class FirecrawlApp:
1281
1321
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
1282
1322
  if agent is not None:
1283
1323
  scrape_params['agent'] = agent.dict(exclude_none=True)
1284
-
1324
+ if max_concurrency is not None:
1325
+ scrape_params['maxConcurrency'] = max_concurrency
1326
+
1285
1327
  # Add any additional kwargs
1286
1328
  scrape_params.update(kwargs)
1287
1329
 
@@ -1330,6 +1372,7 @@ class FirecrawlApp:
1330
1372
  json_options: Optional[JsonConfig] = None,
1331
1373
  actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
1332
1374
  agent: Optional[AgentOptions] = None,
1375
+ max_concurrency: Optional[int] = None,
1333
1376
  idempotency_key: Optional[str] = None,
1334
1377
  **kwargs
1335
1378
  ) -> BatchScrapeResponse:
@@ -1355,6 +1398,7 @@ class FirecrawlApp:
1355
1398
  json_options (Optional[JsonConfig]): JSON extraction config
1356
1399
  actions (Optional[List[Union]]): Actions to perform
1357
1400
  agent (Optional[AgentOptions]): Agent configuration
1401
+ max_concurrency (Optional[int]): Maximum number of concurrent scrapes
1358
1402
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
1359
1403
  **kwargs: Additional parameters to pass to the API
1360
1404
 
@@ -1414,7 +1458,9 @@ class FirecrawlApp:
1414
1458
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
1415
1459
  if agent is not None:
1416
1460
  scrape_params['agent'] = agent.dict(exclude_none=True)
1417
-
1461
+ if max_concurrency is not None:
1462
+ scrape_params['maxConcurrency'] = max_concurrency
1463
+
1418
1464
  # Add any additional kwargs
1419
1465
  scrape_params.update(kwargs)
1420
1466
 
@@ -1462,6 +1508,7 @@ class FirecrawlApp:
1462
1508
  json_options: Optional[JsonConfig] = None,
1463
1509
  actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
1464
1510
  agent: Optional[AgentOptions] = None,
1511
+ max_concurrency: Optional[int] = None,
1465
1512
  idempotency_key: Optional[str] = None,
1466
1513
  **kwargs
1467
1514
  ) -> 'CrawlWatcher':
@@ -1487,6 +1534,7 @@ class FirecrawlApp:
1487
1534
  json_options (Optional[JsonConfig]): JSON extraction config
1488
1535
  actions (Optional[List[Union]]): Actions to perform
1489
1536
  agent (Optional[AgentOptions]): Agent configuration
1537
+ max_concurrency (Optional[int]): Maximum number of concurrent scrapes
1490
1538
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
1491
1539
  **kwargs: Additional parameters to pass to the API
1492
1540
 
@@ -1542,7 +1590,9 @@ class FirecrawlApp:
1542
1590
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
1543
1591
  if agent is not None:
1544
1592
  scrape_params['agent'] = agent.dict(exclude_none=True)
1545
-
1593
+ if max_concurrency is not None:
1594
+ scrape_params['maxConcurrency'] = max_concurrency
1595
+
1546
1596
  # Add any additional kwargs
1547
1597
  scrape_params.update(kwargs)
1548
1598
 
@@ -2771,7 +2821,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
2771
2821
  * limit - Maximum pages to crawl
2772
2822
 
2773
2823
  Link Following:
2774
- * allowBackwardLinks - Follow parent directory links
2824
+ * allowBackwardLinks - DEPRECATED: Use crawlEntireDomain instead
2825
+ * crawlEntireDomain - Follow parent directory links
2775
2826
  * allowExternalLinks - Follow external domain links
2776
2827
  * ignoreSitemap - Skip sitemap.xml processing
2777
2828
 
@@ -3250,6 +3301,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3250
3301
  max_discovery_depth: Optional[int] = None,
3251
3302
  limit: Optional[int] = None,
3252
3303
  allow_backward_links: Optional[bool] = None,
3304
+ crawl_entire_domain: Optional[bool] = None,
3253
3305
  allow_external_links: Optional[bool] = None,
3254
3306
  ignore_sitemap: Optional[bool] = None,
3255
3307
  scrape_options: Optional[ScrapeOptions] = None,
@@ -3272,7 +3324,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
3272
3324
  max_depth (Optional[int]): Maximum crawl depth
3273
3325
  max_discovery_depth (Optional[int]): Maximum depth for finding new URLs
3274
3326
  limit (Optional[int]): Maximum pages to crawl
3275
- allow_backward_links (Optional[bool]): Follow parent directory links
3327
+ allow_backward_links (Optional[bool]): DEPRECATED: Use crawl_entire_domain instead
3328
+ crawl_entire_domain (Optional[bool]): Follow parent directory links
3276
3329
  allow_external_links (Optional[bool]): Follow external domain links
3277
3330
  ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
3278
3331
  scrape_options (Optional[ScrapeOptions]): Page scraping configuration
@@ -3310,7 +3363,9 @@ class AsyncFirecrawlApp(FirecrawlApp):
3310
3363
  crawl_params['maxDiscoveryDepth'] = max_discovery_depth
3311
3364
  if limit is not None:
3312
3365
  crawl_params['limit'] = limit
3313
- if allow_backward_links is not None:
3366
+ if crawl_entire_domain is not None:
3367
+ crawl_params['crawlEntireDomain'] = crawl_entire_domain
3368
+ elif allow_backward_links is not None:
3314
3369
  crawl_params['allowBackwardLinks'] = allow_backward_links
3315
3370
  if allow_external_links is not None:
3316
3371
  crawl_params['allowExternalLinks'] = allow_external_links
@@ -3362,6 +3417,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3362
3417
  max_discovery_depth: Optional[int] = None,
3363
3418
  limit: Optional[int] = None,
3364
3419
  allow_backward_links: Optional[bool] = None,
3420
+ crawl_entire_domain: Optional[bool] = None,
3365
3421
  allow_external_links: Optional[bool] = None,
3366
3422
  ignore_sitemap: Optional[bool] = None,
3367
3423
  scrape_options: Optional[ScrapeOptions] = None,
@@ -3384,7 +3440,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
3384
3440
  max_depth (Optional[int]): Maximum crawl depth
3385
3441
  max_discovery_depth (Optional[int]): Maximum depth for finding new URLs
3386
3442
  limit (Optional[int]): Maximum pages to crawl
3387
- allow_backward_links (Optional[bool]): Follow parent directory links
3443
+ allow_backward_links (Optional[bool]): DEPRECATED: Use crawl_entire_domain instead
3444
+ crawl_entire_domain (Optional[bool]): Follow parent directory links
3388
3445
  allow_external_links (Optional[bool]): Follow external domain links
3389
3446
  ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
3390
3447
  scrape_options (Optional[ScrapeOptions]): Page scraping configuration
@@ -3418,7 +3475,9 @@ class AsyncFirecrawlApp(FirecrawlApp):
3418
3475
  crawl_params['maxDiscoveryDepth'] = max_discovery_depth
3419
3476
  if limit is not None:
3420
3477
  crawl_params['limit'] = limit
3421
- if allow_backward_links is not None:
3478
+ if crawl_entire_domain is not None:
3479
+ crawl_params['crawlEntireDomain'] = crawl_entire_domain
3480
+ elif allow_backward_links is not None:
3422
3481
  crawl_params['allowBackwardLinks'] = allow_backward_links
3423
3482
  if allow_external_links is not None:
3424
3483
  crawl_params['allowExternalLinks'] = allow_external_links
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 2.7.1
3
+ Version: 2.9.0
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
@@ -1,4 +1,2 @@
1
- build
2
- dist
3
1
  firecrawl
4
2
  tests
File without changes
File without changes
File without changes
File without changes
File without changes