firecrawl 2.7.1__tar.gz → 2.10.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

File without changes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 2.7.1
3
+ Version: 2.10.0
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
File without changes
@@ -13,7 +13,7 @@ import os
13
13
 
14
14
  from .firecrawl import FirecrawlApp, AsyncFirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
15
15
 
16
- __version__ = "2.7.1"
16
+ __version__ = "2.10.0"
17
17
 
18
18
  # Define the logger for the Firecrawl project
19
19
  logger: logging.Logger = logging.getLogger("firecrawl")
@@ -437,4 +437,29 @@ def test_search_with_invalid_params():
437
437
  app.search("test query", {"invalid_param": "value"})
438
438
  assert "ValidationError" in str(e.value)
439
439
 
440
+ # def test_scrape_url_with_parse_pdf_true():
441
+ # if TEST_API_KEY:
442
+ # app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
443
+ # response = app.scrape_url('https://arxiv.org/pdf/astro-ph/9301001.pdf', parse_pdf=True)
444
+ # assert response is not None
445
+ # assert 'markdown' in response
446
+ # assert len(response['markdown']) > 100
447
+
448
+ # def test_scrape_url_with_parse_pdf_false():
449
+ # if TEST_API_KEY:
450
+ # app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
451
+ # response = app.scrape_url('https://arxiv.org/pdf/astro-ph/9301001.pdf', parse_pdf=False)
452
+ # assert response is not None
453
+ # assert 'markdown' in response
454
+ # assert 'h7uKu14adDL6yGfnGf2qycY5uq8kC3OKCWkPxm' in response['markdown']
455
+
456
+ # def test_scrape_options_with_parse_pdf():
457
+ # if TEST_API_KEY:
458
+ # from firecrawl.firecrawl import ScrapeOptions
459
+ # app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
460
+ # scrape_options = ScrapeOptions(parsePDF=False, formats=['markdown'])
461
+ # response = app.search("firecrawl", limit=1, scrape_options=scrape_options)
462
+ # assert response is not None
463
+ # assert 'data' in response
464
+
440
465
 
@@ -140,6 +140,7 @@ class ChangeTrackingOptions(pydantic.BaseModel):
140
140
  modes: Optional[List[Literal["git-diff", "json"]]] = None
141
141
  schema: Optional[Any] = None
142
142
  prompt: Optional[str] = None
143
+ tag: Optional[str] = None
143
144
 
144
145
  class ScrapeOptions(pydantic.BaseModel):
145
146
  """Parameters for scraping operations."""
@@ -157,6 +158,9 @@ class ScrapeOptions(pydantic.BaseModel):
157
158
  blockAds: Optional[bool] = None
158
159
  proxy: Optional[Literal["basic", "stealth", "auto"]] = None
159
160
  changeTrackingOptions: Optional[ChangeTrackingOptions] = None
161
+ maxAge: Optional[int] = None
162
+ storeInCache: Optional[bool] = None
163
+ parsePDF: Optional[bool] = None
160
164
 
161
165
  class WaitAction(pydantic.BaseModel):
162
166
  """Wait action to perform during scraping."""
@@ -260,6 +264,7 @@ class CrawlParams(pydantic.BaseModel):
260
264
  ignoreQueryParameters: Optional[bool] = None
261
265
  regexOnFullURL: Optional[bool] = None
262
266
  delay: Optional[int] = None # Delay in seconds between scrapes
267
+ maxConcurrency: Optional[int] = None
263
268
 
264
269
  class CrawlResponse(pydantic.BaseModel):
265
270
  """Response from crawling operations."""
@@ -292,6 +297,7 @@ class MapParams(pydantic.BaseModel):
292
297
  sitemapOnly: Optional[bool] = None
293
298
  limit: Optional[int] = None
294
299
  timeout: Optional[int] = None
300
+ useIndex: Optional[bool] = None
295
301
 
296
302
  class MapResponse(pydantic.BaseModel):
297
303
  """Response from mapping operations."""
@@ -460,10 +466,13 @@ class FirecrawlApp:
460
466
  remove_base64_images: Optional[bool] = None,
461
467
  block_ads: Optional[bool] = None,
462
468
  proxy: Optional[Literal["basic", "stealth", "auto"]] = None,
469
+ parse_pdf: Optional[bool] = None,
463
470
  extract: Optional[JsonConfig] = None,
464
471
  json_options: Optional[JsonConfig] = None,
465
472
  actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
466
473
  change_tracking_options: Optional[ChangeTrackingOptions] = None,
474
+ max_age: Optional[int] = None,
475
+ store_in_cache: Optional[bool] = None,
467
476
  **kwargs) -> ScrapeResponse[Any]:
468
477
  """
469
478
  Scrape and extract content from a URL.
@@ -531,6 +540,8 @@ class FirecrawlApp:
531
540
  scrape_params['blockAds'] = block_ads
532
541
  if proxy:
533
542
  scrape_params['proxy'] = proxy
543
+ if parse_pdf is not None:
544
+ scrape_params['parsePDF'] = parse_pdf
534
545
  if extract is not None:
535
546
  extract = self._ensure_schema_dict(extract)
536
547
  if isinstance(extract, dict) and "schema" in extract:
@@ -545,6 +556,10 @@ class FirecrawlApp:
545
556
  scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(exclude_none=True) for action in actions]
546
557
  if change_tracking_options:
547
558
  scrape_params['changeTrackingOptions'] = change_tracking_options if isinstance(change_tracking_options, dict) else change_tracking_options.dict(exclude_none=True)
559
+ if max_age is not None:
560
+ scrape_params['maxAge'] = max_age
561
+ if store_in_cache is not None:
562
+ scrape_params['storeInCache'] = store_in_cache
548
563
 
549
564
  scrape_params.update(kwargs)
550
565
 
@@ -676,6 +691,7 @@ class FirecrawlApp:
676
691
  max_discovery_depth: Optional[int] = None,
677
692
  limit: Optional[int] = None,
678
693
  allow_backward_links: Optional[bool] = None,
694
+ crawl_entire_domain: Optional[bool] = None,
679
695
  allow_external_links: Optional[bool] = None,
680
696
  ignore_sitemap: Optional[bool] = None,
681
697
  scrape_options: Optional[ScrapeOptions] = None,
@@ -684,6 +700,7 @@ class FirecrawlApp:
684
700
  ignore_query_parameters: Optional[bool] = None,
685
701
  regex_on_full_url: Optional[bool] = None,
686
702
  delay: Optional[int] = None,
703
+ max_concurrency: Optional[int] = None,
687
704
  poll_interval: Optional[int] = 2,
688
705
  idempotency_key: Optional[str] = None,
689
706
  **kwargs
@@ -698,7 +715,8 @@ class FirecrawlApp:
698
715
  max_depth (Optional[int]): Maximum crawl depth
699
716
  max_discovery_depth (Optional[int]): Maximum depth for finding new URLs
700
717
  limit (Optional[int]): Maximum pages to crawl
701
- allow_backward_links (Optional[bool]): Follow parent directory links
718
+ allow_backward_links (Optional[bool]): DEPRECATED: Use crawl_entire_domain instead
719
+ crawl_entire_domain (Optional[bool]): Follow parent directory links
702
720
  allow_external_links (Optional[bool]): Follow external domain links
703
721
  ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
704
722
  scrape_options (Optional[ScrapeOptions]): Page scraping configuration
@@ -707,6 +725,7 @@ class FirecrawlApp:
707
725
  ignore_query_parameters (Optional[bool]): Ignore URL parameters
708
726
  regex_on_full_url (Optional[bool]): Apply regex to full URLs
709
727
  delay (Optional[int]): Delay in seconds between scrapes
728
+ max_concurrency (Optional[int]): Maximum number of concurrent scrapes
710
729
  poll_interval (Optional[int]): Seconds between status checks (default: 2)
711
730
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
712
731
  **kwargs: Additional parameters to pass to the API
@@ -736,7 +755,9 @@ class FirecrawlApp:
736
755
  crawl_params['maxDiscoveryDepth'] = max_discovery_depth
737
756
  if limit is not None:
738
757
  crawl_params['limit'] = limit
739
- if allow_backward_links is not None:
758
+ if crawl_entire_domain is not None:
759
+ crawl_params['crawlEntireDomain'] = crawl_entire_domain
760
+ elif allow_backward_links is not None:
740
761
  crawl_params['allowBackwardLinks'] = allow_backward_links
741
762
  if allow_external_links is not None:
742
763
  crawl_params['allowExternalLinks'] = allow_external_links
@@ -754,7 +775,9 @@ class FirecrawlApp:
754
775
  crawl_params['regexOnFullURL'] = regex_on_full_url
755
776
  if delay is not None:
756
777
  crawl_params['delay'] = delay
757
-
778
+ if max_concurrency is not None:
779
+ crawl_params['maxConcurrency'] = max_concurrency
780
+
758
781
  # Add any additional kwargs
759
782
  crawl_params.update(kwargs)
760
783
 
@@ -787,6 +810,7 @@ class FirecrawlApp:
787
810
  max_discovery_depth: Optional[int] = None,
788
811
  limit: Optional[int] = None,
789
812
  allow_backward_links: Optional[bool] = None,
813
+ crawl_entire_domain: Optional[bool] = None,
790
814
  allow_external_links: Optional[bool] = None,
791
815
  ignore_sitemap: Optional[bool] = None,
792
816
  scrape_options: Optional[ScrapeOptions] = None,
@@ -808,7 +832,8 @@ class FirecrawlApp:
808
832
  max_depth (Optional[int]): Maximum crawl depth
809
833
  max_discovery_depth (Optional[int]): Maximum depth for finding new URLs
810
834
  limit (Optional[int]): Maximum pages to crawl
811
- allow_backward_links (Optional[bool]): Follow parent directory links
835
+ allow_backward_links (Optional[bool]): DEPRECATED: Use crawl_entire_domain instead
836
+ crawl_entire_domain (Optional[bool]): Follow parent directory links
812
837
  allow_external_links (Optional[bool]): Follow external domain links
813
838
  ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
814
839
  scrape_options (Optional[ScrapeOptions]): Page scraping configuration
@@ -816,6 +841,8 @@ class FirecrawlApp:
816
841
  deduplicate_similar_urls (Optional[bool]): Remove similar URLs
817
842
  ignore_query_parameters (Optional[bool]): Ignore URL parameters
818
843
  regex_on_full_url (Optional[bool]): Apply regex to full URLs
844
+ delay (Optional[int]): Delay in seconds between scrapes
845
+ max_concurrency (Optional[int]): Maximum number of concurrent scrapes
819
846
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
820
847
  **kwargs: Additional parameters to pass to the API
821
848
 
@@ -845,7 +872,9 @@ class FirecrawlApp:
845
872
  crawl_params['maxDiscoveryDepth'] = max_discovery_depth
846
873
  if limit is not None:
847
874
  crawl_params['limit'] = limit
848
- if allow_backward_links is not None:
875
+ if crawl_entire_domain is not None:
876
+ crawl_params['crawlEntireDomain'] = crawl_entire_domain
877
+ elif allow_backward_links is not None:
849
878
  crawl_params['allowBackwardLinks'] = allow_backward_links
850
879
  if allow_external_links is not None:
851
880
  crawl_params['allowExternalLinks'] = allow_external_links
@@ -863,7 +892,9 @@ class FirecrawlApp:
863
892
  crawl_params['regexOnFullURL'] = regex_on_full_url
864
893
  if delay is not None:
865
894
  crawl_params['delay'] = delay
866
-
895
+ if max_concurrency is not None:
896
+ crawl_params['maxConcurrency'] = max_concurrency
897
+
867
898
  # Add any additional kwargs
868
899
  crawl_params.update(kwargs)
869
900
 
@@ -1032,6 +1063,7 @@ class FirecrawlApp:
1032
1063
  max_discovery_depth: Optional[int] = None,
1033
1064
  limit: Optional[int] = None,
1034
1065
  allow_backward_links: Optional[bool] = None,
1066
+ crawl_entire_domain: Optional[bool] = None,
1035
1067
  allow_external_links: Optional[bool] = None,
1036
1068
  ignore_sitemap: Optional[bool] = None,
1037
1069
  scrape_options: Optional[ScrapeOptions] = None,
@@ -1039,6 +1071,8 @@ class FirecrawlApp:
1039
1071
  deduplicate_similar_urls: Optional[bool] = None,
1040
1072
  ignore_query_parameters: Optional[bool] = None,
1041
1073
  regex_on_full_url: Optional[bool] = None,
1074
+ delay: Optional[int] = None,
1075
+ max_concurrency: Optional[int] = None,
1042
1076
  idempotency_key: Optional[str] = None,
1043
1077
  **kwargs
1044
1078
  ) -> 'CrawlWatcher':
@@ -1052,7 +1086,8 @@ class FirecrawlApp:
1052
1086
  max_depth (Optional[int]): Maximum crawl depth
1053
1087
  max_discovery_depth (Optional[int]): Maximum depth for finding new URLs
1054
1088
  limit (Optional[int]): Maximum pages to crawl
1055
- allow_backward_links (Optional[bool]): Follow parent directory links
1089
+ allow_backward_links (Optional[bool]): DEPRECATED: Use crawl_entire_domain instead
1090
+ crawl_entire_domain (Optional[bool]): Follow parent directory links
1056
1091
  allow_external_links (Optional[bool]): Follow external domain links
1057
1092
  ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
1058
1093
  scrape_options (Optional[ScrapeOptions]): Page scraping configuration
@@ -1060,6 +1095,8 @@ class FirecrawlApp:
1060
1095
  deduplicate_similar_urls (Optional[bool]): Remove similar URLs
1061
1096
  ignore_query_parameters (Optional[bool]): Ignore URL parameters
1062
1097
  regex_on_full_url (Optional[bool]): Apply regex to full URLs
1098
+ delay (Optional[int]): Delay in seconds between scrapes
1099
+ max_concurrency (Optional[int]): Maximum number of concurrent scrapes
1063
1100
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
1064
1101
  **kwargs: Additional parameters to pass to the API
1065
1102
 
@@ -1084,6 +1121,8 @@ class FirecrawlApp:
1084
1121
  deduplicate_similar_urls=deduplicate_similar_urls,
1085
1122
  ignore_query_parameters=ignore_query_parameters,
1086
1123
  regex_on_full_url=regex_on_full_url,
1124
+ delay=delay,
1125
+ max_concurrency=max_concurrency,
1087
1126
  idempotency_key=idempotency_key,
1088
1127
  **kwargs
1089
1128
  )
@@ -1102,6 +1141,7 @@ class FirecrawlApp:
1102
1141
  sitemap_only: Optional[bool] = None,
1103
1142
  limit: Optional[int] = None,
1104
1143
  timeout: Optional[int] = None,
1144
+ use_index: Optional[bool] = None,
1105
1145
  **kwargs) -> MapResponse:
1106
1146
  """
1107
1147
  Map and discover links from a URL.
@@ -1144,7 +1184,9 @@ class FirecrawlApp:
1144
1184
  map_params['limit'] = limit
1145
1185
  if timeout is not None:
1146
1186
  map_params['timeout'] = timeout
1147
-
1187
+ if use_index is not None:
1188
+ map_params['useIndex'] = use_index
1189
+
1148
1190
  # Add any additional kwargs
1149
1191
  map_params.update(kwargs)
1150
1192
 
@@ -1197,6 +1239,7 @@ class FirecrawlApp:
1197
1239
  actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
1198
1240
  agent: Optional[AgentOptions] = None,
1199
1241
  poll_interval: Optional[int] = 2,
1242
+ max_concurrency: Optional[int] = None,
1200
1243
  idempotency_key: Optional[str] = None,
1201
1244
  **kwargs
1202
1245
  ) -> BatchScrapeStatusResponse:
@@ -1222,6 +1265,7 @@ class FirecrawlApp:
1222
1265
  json_options (Optional[JsonConfig]): JSON extraction config
1223
1266
  actions (Optional[List[Union]]): Actions to perform
1224
1267
  agent (Optional[AgentOptions]): Agent configuration
1268
+ max_concurrency (Optional[int]): Maximum number of concurrent scrapes
1225
1269
  poll_interval (Optional[int]): Seconds between status checks (default: 2)
1226
1270
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
1227
1271
  **kwargs: Additional parameters to pass to the API
@@ -1281,7 +1325,9 @@ class FirecrawlApp:
1281
1325
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
1282
1326
  if agent is not None:
1283
1327
  scrape_params['agent'] = agent.dict(exclude_none=True)
1284
-
1328
+ if max_concurrency is not None:
1329
+ scrape_params['maxConcurrency'] = max_concurrency
1330
+
1285
1331
  # Add any additional kwargs
1286
1332
  scrape_params.update(kwargs)
1287
1333
 
@@ -1330,6 +1376,7 @@ class FirecrawlApp:
1330
1376
  json_options: Optional[JsonConfig] = None,
1331
1377
  actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
1332
1378
  agent: Optional[AgentOptions] = None,
1379
+ max_concurrency: Optional[int] = None,
1333
1380
  idempotency_key: Optional[str] = None,
1334
1381
  **kwargs
1335
1382
  ) -> BatchScrapeResponse:
@@ -1355,6 +1402,7 @@ class FirecrawlApp:
1355
1402
  json_options (Optional[JsonConfig]): JSON extraction config
1356
1403
  actions (Optional[List[Union]]): Actions to perform
1357
1404
  agent (Optional[AgentOptions]): Agent configuration
1405
+ max_concurrency (Optional[int]): Maximum number of concurrent scrapes
1358
1406
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
1359
1407
  **kwargs: Additional parameters to pass to the API
1360
1408
 
@@ -1414,7 +1462,9 @@ class FirecrawlApp:
1414
1462
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
1415
1463
  if agent is not None:
1416
1464
  scrape_params['agent'] = agent.dict(exclude_none=True)
1417
-
1465
+ if max_concurrency is not None:
1466
+ scrape_params['maxConcurrency'] = max_concurrency
1467
+
1418
1468
  # Add any additional kwargs
1419
1469
  scrape_params.update(kwargs)
1420
1470
 
@@ -1462,6 +1512,7 @@ class FirecrawlApp:
1462
1512
  json_options: Optional[JsonConfig] = None,
1463
1513
  actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
1464
1514
  agent: Optional[AgentOptions] = None,
1515
+ max_concurrency: Optional[int] = None,
1465
1516
  idempotency_key: Optional[str] = None,
1466
1517
  **kwargs
1467
1518
  ) -> 'CrawlWatcher':
@@ -1487,6 +1538,7 @@ class FirecrawlApp:
1487
1538
  json_options (Optional[JsonConfig]): JSON extraction config
1488
1539
  actions (Optional[List[Union]]): Actions to perform
1489
1540
  agent (Optional[AgentOptions]): Agent configuration
1541
+ max_concurrency (Optional[int]): Maximum number of concurrent scrapes
1490
1542
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
1491
1543
  **kwargs: Additional parameters to pass to the API
1492
1544
 
@@ -1542,7 +1594,9 @@ class FirecrawlApp:
1542
1594
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
1543
1595
  if agent is not None:
1544
1596
  scrape_params['agent'] = agent.dict(exclude_none=True)
1545
-
1597
+ if max_concurrency is not None:
1598
+ scrape_params['maxConcurrency'] = max_concurrency
1599
+
1546
1600
  # Add any additional kwargs
1547
1601
  scrape_params.update(kwargs)
1548
1602
 
@@ -2771,7 +2825,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
2771
2825
  * limit - Maximum pages to crawl
2772
2826
 
2773
2827
  Link Following:
2774
- * allowBackwardLinks - Follow parent directory links
2828
+ * allowBackwardLinks - DEPRECATED: Use crawlEntireDomain instead
2829
+ * crawlEntireDomain - Follow parent directory links
2775
2830
  * allowExternalLinks - Follow external domain links
2776
2831
  * ignoreSitemap - Skip sitemap.xml processing
2777
2832
 
@@ -2853,6 +2908,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
2853
2908
  remove_base64_images: Optional[bool] = None,
2854
2909
  block_ads: Optional[bool] = None,
2855
2910
  proxy: Optional[Literal["basic", "stealth", "auto"]] = None,
2911
+ parse_pdf: Optional[bool] = None,
2856
2912
  extract: Optional[JsonConfig] = None,
2857
2913
  json_options: Optional[JsonConfig] = None,
2858
2914
  actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
@@ -2930,6 +2986,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
2930
2986
  scrape_params['blockAds'] = block_ads
2931
2987
  if proxy:
2932
2988
  scrape_params['proxy'] = proxy
2989
+ if parse_pdf is not None:
2990
+ scrape_params['parsePDF'] = parse_pdf
2933
2991
  if extract is not None:
2934
2992
  extract = self._ensure_schema_dict(extract)
2935
2993
  if isinstance(extract, dict) and "schema" in extract:
@@ -3250,6 +3308,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3250
3308
  max_discovery_depth: Optional[int] = None,
3251
3309
  limit: Optional[int] = None,
3252
3310
  allow_backward_links: Optional[bool] = None,
3311
+ crawl_entire_domain: Optional[bool] = None,
3253
3312
  allow_external_links: Optional[bool] = None,
3254
3313
  ignore_sitemap: Optional[bool] = None,
3255
3314
  scrape_options: Optional[ScrapeOptions] = None,
@@ -3272,7 +3331,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
3272
3331
  max_depth (Optional[int]): Maximum crawl depth
3273
3332
  max_discovery_depth (Optional[int]): Maximum depth for finding new URLs
3274
3333
  limit (Optional[int]): Maximum pages to crawl
3275
- allow_backward_links (Optional[bool]): Follow parent directory links
3334
+ allow_backward_links (Optional[bool]): DEPRECATED: Use crawl_entire_domain instead
3335
+ crawl_entire_domain (Optional[bool]): Follow parent directory links
3276
3336
  allow_external_links (Optional[bool]): Follow external domain links
3277
3337
  ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
3278
3338
  scrape_options (Optional[ScrapeOptions]): Page scraping configuration
@@ -3310,7 +3370,9 @@ class AsyncFirecrawlApp(FirecrawlApp):
3310
3370
  crawl_params['maxDiscoveryDepth'] = max_discovery_depth
3311
3371
  if limit is not None:
3312
3372
  crawl_params['limit'] = limit
3313
- if allow_backward_links is not None:
3373
+ if crawl_entire_domain is not None:
3374
+ crawl_params['crawlEntireDomain'] = crawl_entire_domain
3375
+ elif allow_backward_links is not None:
3314
3376
  crawl_params['allowBackwardLinks'] = allow_backward_links
3315
3377
  if allow_external_links is not None:
3316
3378
  crawl_params['allowExternalLinks'] = allow_external_links
@@ -3362,6 +3424,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3362
3424
  max_discovery_depth: Optional[int] = None,
3363
3425
  limit: Optional[int] = None,
3364
3426
  allow_backward_links: Optional[bool] = None,
3427
+ crawl_entire_domain: Optional[bool] = None,
3365
3428
  allow_external_links: Optional[bool] = None,
3366
3429
  ignore_sitemap: Optional[bool] = None,
3367
3430
  scrape_options: Optional[ScrapeOptions] = None,
@@ -3384,7 +3447,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
3384
3447
  max_depth (Optional[int]): Maximum crawl depth
3385
3448
  max_discovery_depth (Optional[int]): Maximum depth for finding new URLs
3386
3449
  limit (Optional[int]): Maximum pages to crawl
3387
- allow_backward_links (Optional[bool]): Follow parent directory links
3450
+ allow_backward_links (Optional[bool]): DEPRECATED: Use crawl_entire_domain instead
3451
+ crawl_entire_domain (Optional[bool]): Follow parent directory links
3388
3452
  allow_external_links (Optional[bool]): Follow external domain links
3389
3453
  ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
3390
3454
  scrape_options (Optional[ScrapeOptions]): Page scraping configuration
@@ -3418,7 +3482,9 @@ class AsyncFirecrawlApp(FirecrawlApp):
3418
3482
  crawl_params['maxDiscoveryDepth'] = max_discovery_depth
3419
3483
  if limit is not None:
3420
3484
  crawl_params['limit'] = limit
3421
- if allow_backward_links is not None:
3485
+ if crawl_entire_domain is not None:
3486
+ crawl_params['crawlEntireDomain'] = crawl_entire_domain
3487
+ elif allow_backward_links is not None:
3422
3488
  crawl_params['allowBackwardLinks'] = allow_backward_links
3423
3489
  if allow_external_links is not None:
3424
3490
  crawl_params['allowExternalLinks'] = allow_external_links
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 2.7.1
3
+ Version: 2.10.0
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
@@ -1,4 +1,2 @@
1
- build
2
- dist
3
1
  firecrawl
4
2
  tests
File without changes
File without changes
File without changes