firecrawl 2.5.2__tar.gz → 2.5.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 2.5.2
3
+ Version: 2.5.4
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
@@ -13,7 +13,7 @@ import os
13
13
 
14
14
  from .firecrawl import FirecrawlApp, AsyncFirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
15
15
 
16
- __version__ = "2.5.2"
16
+ __version__ = "2.5.4"
17
17
 
18
18
  # Define the logger for the Firecrawl project
19
19
  logger: logging.Logger = logging.getLogger("firecrawl")
@@ -161,7 +161,7 @@ class ScrapeOptions(pydantic.BaseModel):
161
161
  class WaitAction(pydantic.BaseModel):
162
162
  """Wait action to perform during scraping."""
163
163
  type: Literal["wait"]
164
- milliseconds: int
164
+ milliseconds: Optional[int] = None
165
165
  selector: Optional[str] = None
166
166
 
167
167
  class ScreenshotAction(pydantic.BaseModel):
@@ -259,6 +259,7 @@ class CrawlParams(pydantic.BaseModel):
259
259
  deduplicateSimilarURLs: Optional[bool] = None
260
260
  ignoreQueryParameters: Optional[bool] = None
261
261
  regexOnFullURL: Optional[bool] = None
262
+ delay: Optional[int] = None # Delay in seconds between scrapes
262
263
 
263
264
  class CrawlResponse(pydantic.BaseModel):
264
265
  """Response from crawling operations."""
@@ -540,9 +541,9 @@ class FirecrawlApp:
540
541
  json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
541
542
  scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
542
543
  if actions:
543
- scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
544
+ scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(exclude_none=True) for action in actions]
544
545
  if change_tracking_options:
545
- scrape_params['changeTrackingOptions'] = change_tracking_options.dict(exclude_none=True)
546
+ scrape_params['changeTrackingOptions'] = change_tracking_options if isinstance(change_tracking_options, dict) else change_tracking_options.dict(exclude_none=True)
546
547
 
547
548
  scrape_params.update(kwargs)
548
549
 
@@ -681,6 +682,7 @@ class FirecrawlApp:
681
682
  deduplicate_similar_urls: Optional[bool] = None,
682
683
  ignore_query_parameters: Optional[bool] = None,
683
684
  regex_on_full_url: Optional[bool] = None,
685
+ delay: Optional[int] = None,
684
686
  poll_interval: Optional[int] = 2,
685
687
  idempotency_key: Optional[str] = None,
686
688
  **kwargs
@@ -703,6 +705,7 @@ class FirecrawlApp:
703
705
  deduplicate_similar_urls (Optional[bool]): Remove similar URLs
704
706
  ignore_query_parameters (Optional[bool]): Ignore URL parameters
705
707
  regex_on_full_url (Optional[bool]): Apply regex to full URLs
708
+ delay (Optional[int]): Delay in seconds between scrapes
706
709
  poll_interval (Optional[int]): Seconds between status checks (default: 2)
707
710
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
708
711
  **kwargs: Additional parameters to pass to the API
@@ -748,6 +751,8 @@ class FirecrawlApp:
748
751
  crawl_params['ignoreQueryParameters'] = ignore_query_parameters
749
752
  if regex_on_full_url is not None:
750
753
  crawl_params['regexOnFullURL'] = regex_on_full_url
754
+ if delay is not None:
755
+ crawl_params['delay'] = delay
751
756
 
752
757
  # Add any additional kwargs
753
758
  crawl_params.update(kwargs)
@@ -788,6 +793,7 @@ class FirecrawlApp:
788
793
  deduplicate_similar_urls: Optional[bool] = None,
789
794
  ignore_query_parameters: Optional[bool] = None,
790
795
  regex_on_full_url: Optional[bool] = None,
796
+ delay: Optional[int] = None,
791
797
  idempotency_key: Optional[str] = None,
792
798
  **kwargs
793
799
  ) -> CrawlResponse:
@@ -854,6 +860,8 @@ class FirecrawlApp:
854
860
  crawl_params['ignoreQueryParameters'] = ignore_query_parameters
855
861
  if regex_on_full_url is not None:
856
862
  crawl_params['regexOnFullURL'] = regex_on_full_url
863
+ if delay is not None:
864
+ crawl_params['delay'] = delay
857
865
 
858
866
  # Add any additional kwargs
859
867
  crawl_params.update(kwargs)
@@ -2924,7 +2932,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
2924
2932
  json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
2925
2933
  scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
2926
2934
  if actions:
2927
- scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
2935
+ scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(exclude_none=True) for action in actions]
2928
2936
 
2929
2937
  if 'extract' in scrape_params and scrape_params['extract'] and 'schema' in scrape_params['extract']:
2930
2938
  scrape_params['extract']['schema'] = self._ensure_schema_dict(scrape_params['extract']['schema'])
@@ -3240,6 +3248,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3240
3248
  deduplicate_similar_urls: Optional[bool] = None,
3241
3249
  ignore_query_parameters: Optional[bool] = None,
3242
3250
  regex_on_full_url: Optional[bool] = None,
3251
+ delay: Optional[int] = None,
3243
3252
  poll_interval: Optional[int] = 2,
3244
3253
  idempotency_key: Optional[str] = None,
3245
3254
  **kwargs
@@ -3262,6 +3271,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3262
3271
  deduplicate_similar_urls (Optional[bool]): Remove similar URLs
3263
3272
  ignore_query_parameters (Optional[bool]): Ignore URL parameters
3264
3273
  regex_on_full_url (Optional[bool]): Apply regex to full URLs
3274
+ delay (Optional[int]): Delay in seconds between scrapes
3265
3275
  poll_interval (Optional[int]): Seconds between status checks (default: 2)
3266
3276
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
3267
3277
  **kwargs: Additional parameters to pass to the API
@@ -3307,6 +3317,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
3307
3317
  crawl_params['ignoreQueryParameters'] = ignore_query_parameters
3308
3318
  if regex_on_full_url is not None:
3309
3319
  crawl_params['regexOnFullURL'] = regex_on_full_url
3320
+ if delay is not None:
3321
+ crawl_params['delay'] = delay
3310
3322
 
3311
3323
  # Add any additional kwargs
3312
3324
  crawl_params.update(kwargs)
@@ -3348,6 +3360,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3348
3360
  deduplicate_similar_urls: Optional[bool] = None,
3349
3361
  ignore_query_parameters: Optional[bool] = None,
3350
3362
  regex_on_full_url: Optional[bool] = None,
3363
+ delay: Optional[int] = None,
3351
3364
  poll_interval: Optional[int] = 2,
3352
3365
  idempotency_key: Optional[str] = None,
3353
3366
  **kwargs
@@ -3412,6 +3425,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
3412
3425
  crawl_params['ignoreQueryParameters'] = ignore_query_parameters
3413
3426
  if regex_on_full_url is not None:
3414
3427
  crawl_params['regexOnFullURL'] = regex_on_full_url
3428
+ if delay is not None:
3429
+ crawl_params['delay'] = delay
3415
3430
 
3416
3431
  # Add any additional kwargs
3417
3432
  crawl_params.update(kwargs)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 2.5.2
3
+ Version: 2.5.4
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
File without changes
File without changes
File without changes
File without changes
File without changes