firecrawl 2.5.3__tar.gz → 2.5.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of firecrawl might be problematic. Click here for more details.
- {firecrawl-2.5.3 → firecrawl-2.5.4}/PKG-INFO +1 -1
- {firecrawl-2.5.3 → firecrawl-2.5.4}/firecrawl/__init__.py +1 -1
- {firecrawl-2.5.3 → firecrawl-2.5.4}/firecrawl/firecrawl.py +16 -1
- {firecrawl-2.5.3 → firecrawl-2.5.4}/firecrawl.egg-info/PKG-INFO +1 -1
- {firecrawl-2.5.3 → firecrawl-2.5.4}/LICENSE +0 -0
- {firecrawl-2.5.3 → firecrawl-2.5.4}/README.md +0 -0
- {firecrawl-2.5.3 → firecrawl-2.5.4}/firecrawl/__tests__/e2e_withAuth/__init__.py +0 -0
- {firecrawl-2.5.3 → firecrawl-2.5.4}/firecrawl/__tests__/e2e_withAuth/test.py +0 -0
- {firecrawl-2.5.3 → firecrawl-2.5.4}/firecrawl/__tests__/v1/e2e_withAuth/__init__.py +0 -0
- {firecrawl-2.5.3 → firecrawl-2.5.4}/firecrawl/__tests__/v1/e2e_withAuth/test.py +0 -0
- {firecrawl-2.5.3 → firecrawl-2.5.4}/firecrawl.egg-info/SOURCES.txt +0 -0
- {firecrawl-2.5.3 → firecrawl-2.5.4}/firecrawl.egg-info/dependency_links.txt +0 -0
- {firecrawl-2.5.3 → firecrawl-2.5.4}/firecrawl.egg-info/requires.txt +0 -0
- {firecrawl-2.5.3 → firecrawl-2.5.4}/firecrawl.egg-info/top_level.txt +0 -0
- {firecrawl-2.5.3 → firecrawl-2.5.4}/pyproject.toml +0 -0
- {firecrawl-2.5.3 → firecrawl-2.5.4}/setup.cfg +0 -0
- {firecrawl-2.5.3 → firecrawl-2.5.4}/setup.py +0 -0
- {firecrawl-2.5.3 → firecrawl-2.5.4}/tests/test_change_tracking.py +0 -0
|
@@ -13,7 +13,7 @@ import os
|
|
|
13
13
|
|
|
14
14
|
from .firecrawl import FirecrawlApp, AsyncFirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
|
|
15
15
|
|
|
16
|
-
__version__ = "2.5.
|
|
16
|
+
__version__ = "2.5.4"
|
|
17
17
|
|
|
18
18
|
# Define the logger for the Firecrawl project
|
|
19
19
|
logger: logging.Logger = logging.getLogger("firecrawl")
|
|
@@ -161,7 +161,7 @@ class ScrapeOptions(pydantic.BaseModel):
|
|
|
161
161
|
class WaitAction(pydantic.BaseModel):
|
|
162
162
|
"""Wait action to perform during scraping."""
|
|
163
163
|
type: Literal["wait"]
|
|
164
|
-
milliseconds: int
|
|
164
|
+
milliseconds: Optional[int] = None
|
|
165
165
|
selector: Optional[str] = None
|
|
166
166
|
|
|
167
167
|
class ScreenshotAction(pydantic.BaseModel):
|
|
@@ -259,6 +259,7 @@ class CrawlParams(pydantic.BaseModel):
|
|
|
259
259
|
deduplicateSimilarURLs: Optional[bool] = None
|
|
260
260
|
ignoreQueryParameters: Optional[bool] = None
|
|
261
261
|
regexOnFullURL: Optional[bool] = None
|
|
262
|
+
delay: Optional[int] = None # Delay in seconds between scrapes
|
|
262
263
|
|
|
263
264
|
class CrawlResponse(pydantic.BaseModel):
|
|
264
265
|
"""Response from crawling operations."""
|
|
@@ -681,6 +682,7 @@ class FirecrawlApp:
|
|
|
681
682
|
deduplicate_similar_urls: Optional[bool] = None,
|
|
682
683
|
ignore_query_parameters: Optional[bool] = None,
|
|
683
684
|
regex_on_full_url: Optional[bool] = None,
|
|
685
|
+
delay: Optional[int] = None,
|
|
684
686
|
poll_interval: Optional[int] = 2,
|
|
685
687
|
idempotency_key: Optional[str] = None,
|
|
686
688
|
**kwargs
|
|
@@ -703,6 +705,7 @@ class FirecrawlApp:
|
|
|
703
705
|
deduplicate_similar_urls (Optional[bool]): Remove similar URLs
|
|
704
706
|
ignore_query_parameters (Optional[bool]): Ignore URL parameters
|
|
705
707
|
regex_on_full_url (Optional[bool]): Apply regex to full URLs
|
|
708
|
+
delay (Optional[int]): Delay in seconds between scrapes
|
|
706
709
|
poll_interval (Optional[int]): Seconds between status checks (default: 2)
|
|
707
710
|
idempotency_key (Optional[str]): Unique key to prevent duplicate requests
|
|
708
711
|
**kwargs: Additional parameters to pass to the API
|
|
@@ -748,6 +751,8 @@ class FirecrawlApp:
|
|
|
748
751
|
crawl_params['ignoreQueryParameters'] = ignore_query_parameters
|
|
749
752
|
if regex_on_full_url is not None:
|
|
750
753
|
crawl_params['regexOnFullURL'] = regex_on_full_url
|
|
754
|
+
if delay is not None:
|
|
755
|
+
crawl_params['delay'] = delay
|
|
751
756
|
|
|
752
757
|
# Add any additional kwargs
|
|
753
758
|
crawl_params.update(kwargs)
|
|
@@ -788,6 +793,7 @@ class FirecrawlApp:
|
|
|
788
793
|
deduplicate_similar_urls: Optional[bool] = None,
|
|
789
794
|
ignore_query_parameters: Optional[bool] = None,
|
|
790
795
|
regex_on_full_url: Optional[bool] = None,
|
|
796
|
+
delay: Optional[int] = None,
|
|
791
797
|
idempotency_key: Optional[str] = None,
|
|
792
798
|
**kwargs
|
|
793
799
|
) -> CrawlResponse:
|
|
@@ -854,6 +860,8 @@ class FirecrawlApp:
|
|
|
854
860
|
crawl_params['ignoreQueryParameters'] = ignore_query_parameters
|
|
855
861
|
if regex_on_full_url is not None:
|
|
856
862
|
crawl_params['regexOnFullURL'] = regex_on_full_url
|
|
863
|
+
if delay is not None:
|
|
864
|
+
crawl_params['delay'] = delay
|
|
857
865
|
|
|
858
866
|
# Add any additional kwargs
|
|
859
867
|
crawl_params.update(kwargs)
|
|
@@ -3240,6 +3248,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3240
3248
|
deduplicate_similar_urls: Optional[bool] = None,
|
|
3241
3249
|
ignore_query_parameters: Optional[bool] = None,
|
|
3242
3250
|
regex_on_full_url: Optional[bool] = None,
|
|
3251
|
+
delay: Optional[int] = None,
|
|
3243
3252
|
poll_interval: Optional[int] = 2,
|
|
3244
3253
|
idempotency_key: Optional[str] = None,
|
|
3245
3254
|
**kwargs
|
|
@@ -3262,6 +3271,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3262
3271
|
deduplicate_similar_urls (Optional[bool]): Remove similar URLs
|
|
3263
3272
|
ignore_query_parameters (Optional[bool]): Ignore URL parameters
|
|
3264
3273
|
regex_on_full_url (Optional[bool]): Apply regex to full URLs
|
|
3274
|
+
delay (Optional[int]): Delay in seconds between scrapes
|
|
3265
3275
|
poll_interval (Optional[int]): Seconds between status checks (default: 2)
|
|
3266
3276
|
idempotency_key (Optional[str]): Unique key to prevent duplicate requests
|
|
3267
3277
|
**kwargs: Additional parameters to pass to the API
|
|
@@ -3307,6 +3317,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3307
3317
|
crawl_params['ignoreQueryParameters'] = ignore_query_parameters
|
|
3308
3318
|
if regex_on_full_url is not None:
|
|
3309
3319
|
crawl_params['regexOnFullURL'] = regex_on_full_url
|
|
3320
|
+
if delay is not None:
|
|
3321
|
+
crawl_params['delay'] = delay
|
|
3310
3322
|
|
|
3311
3323
|
# Add any additional kwargs
|
|
3312
3324
|
crawl_params.update(kwargs)
|
|
@@ -3348,6 +3360,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3348
3360
|
deduplicate_similar_urls: Optional[bool] = None,
|
|
3349
3361
|
ignore_query_parameters: Optional[bool] = None,
|
|
3350
3362
|
regex_on_full_url: Optional[bool] = None,
|
|
3363
|
+
delay: Optional[int] = None,
|
|
3351
3364
|
poll_interval: Optional[int] = 2,
|
|
3352
3365
|
idempotency_key: Optional[str] = None,
|
|
3353
3366
|
**kwargs
|
|
@@ -3412,6 +3425,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3412
3425
|
crawl_params['ignoreQueryParameters'] = ignore_query_parameters
|
|
3413
3426
|
if regex_on_full_url is not None:
|
|
3414
3427
|
crawl_params['regexOnFullURL'] = regex_on_full_url
|
|
3428
|
+
if delay is not None:
|
|
3429
|
+
crawl_params['delay'] = delay
|
|
3415
3430
|
|
|
3416
3431
|
# Add any additional kwargs
|
|
3417
3432
|
crawl_params.update(kwargs)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|