firecrawl 2.15.0__tar.gz → 2.16.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of firecrawl might be problematic. Click here for more details.
- {firecrawl-2.15.0 → firecrawl-2.16.0}/PKG-INFO +1 -1
- {firecrawl-2.15.0 → firecrawl-2.16.0}/firecrawl/__init__.py +1 -1
- {firecrawl-2.15.0 → firecrawl-2.16.0}/firecrawl/firecrawl.py +12 -4
- {firecrawl-2.15.0 → firecrawl-2.16.0}/firecrawl.egg-info/PKG-INFO +1 -1
- {firecrawl-2.15.0 → firecrawl-2.16.0}/LICENSE +0 -0
- {firecrawl-2.15.0 → firecrawl-2.16.0}/README.md +0 -0
- {firecrawl-2.15.0 → firecrawl-2.16.0}/firecrawl/__tests__/e2e_withAuth/__init__.py +0 -0
- {firecrawl-2.15.0 → firecrawl-2.16.0}/firecrawl/__tests__/e2e_withAuth/test.py +0 -0
- {firecrawl-2.15.0 → firecrawl-2.16.0}/firecrawl/__tests__/v1/e2e_withAuth/__init__.py +0 -0
- {firecrawl-2.15.0 → firecrawl-2.16.0}/firecrawl/__tests__/v1/e2e_withAuth/test.py +0 -0
- {firecrawl-2.15.0 → firecrawl-2.16.0}/firecrawl.egg-info/SOURCES.txt +0 -0
- {firecrawl-2.15.0 → firecrawl-2.16.0}/firecrawl.egg-info/dependency_links.txt +0 -0
- {firecrawl-2.15.0 → firecrawl-2.16.0}/firecrawl.egg-info/requires.txt +0 -0
- {firecrawl-2.15.0 → firecrawl-2.16.0}/firecrawl.egg-info/top_level.txt +0 -0
- {firecrawl-2.15.0 → firecrawl-2.16.0}/pyproject.toml +0 -0
- {firecrawl-2.15.0 → firecrawl-2.16.0}/setup.cfg +0 -0
- {firecrawl-2.15.0 → firecrawl-2.16.0}/setup.py +0 -0
- {firecrawl-2.15.0 → firecrawl-2.16.0}/tests/test_change_tracking.py +0 -0
|
@@ -13,7 +13,7 @@ import os
|
|
|
13
13
|
|
|
14
14
|
from .firecrawl import FirecrawlApp, AsyncFirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
|
|
15
15
|
|
|
16
|
-
__version__ = "2.
|
|
16
|
+
__version__ = "2.16.0"
|
|
17
17
|
|
|
18
18
|
# Define the logger for the Firecrawl project
|
|
19
19
|
logger: logging.Logger = logging.getLogger("firecrawl")
|
|
@@ -464,6 +464,7 @@ class FirecrawlApp:
|
|
|
464
464
|
url: str,
|
|
465
465
|
*,
|
|
466
466
|
formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json", "changeTracking"]]] = None,
|
|
467
|
+
headers: Optional[Dict[str, str]] = None,
|
|
467
468
|
include_tags: Optional[List[str]] = None,
|
|
468
469
|
exclude_tags: Optional[List[str]] = None,
|
|
469
470
|
only_main_content: Optional[bool] = None,
|
|
@@ -490,6 +491,7 @@ class FirecrawlApp:
|
|
|
490
491
|
Args:
|
|
491
492
|
url (str): Target URL to scrape
|
|
492
493
|
formats (Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json"]]]): Content types to retrieve (markdown/html/etc)
|
|
494
|
+
headers (Optional[Dict[str, str]]): Custom HTTP headers
|
|
493
495
|
include_tags (Optional[List[str]]): HTML tags to include
|
|
494
496
|
exclude_tags (Optional[List[str]]): HTML tags to exclude
|
|
495
497
|
only_main_content (Optional[bool]): Extract main content only
|
|
@@ -518,7 +520,7 @@ class FirecrawlApp:
|
|
|
518
520
|
Raises:
|
|
519
521
|
Exception: If scraping fails
|
|
520
522
|
"""
|
|
521
|
-
|
|
523
|
+
_headers = self._prepare_headers()
|
|
522
524
|
|
|
523
525
|
# Build scrape parameters
|
|
524
526
|
scrape_params = {
|
|
@@ -529,6 +531,8 @@ class FirecrawlApp:
|
|
|
529
531
|
# Add optional parameters if provided
|
|
530
532
|
if formats:
|
|
531
533
|
scrape_params['formats'] = formats
|
|
534
|
+
if headers:
|
|
535
|
+
scrape_params['headers'] = headers
|
|
532
536
|
if include_tags:
|
|
533
537
|
scrape_params['includeTags'] = include_tags
|
|
534
538
|
if exclude_tags:
|
|
@@ -584,7 +588,7 @@ class FirecrawlApp:
|
|
|
584
588
|
# Make request
|
|
585
589
|
response = requests.post(
|
|
586
590
|
f'{self.api_url}/v1/scrape',
|
|
587
|
-
headers=
|
|
591
|
+
headers=_headers,
|
|
588
592
|
json=scrape_params,
|
|
589
593
|
timeout=(timeout + 5000 if timeout else None)
|
|
590
594
|
)
|
|
@@ -2963,6 +2967,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
2963
2967
|
url: str,
|
|
2964
2968
|
*,
|
|
2965
2969
|
formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json", "changeTracking"]]] = None,
|
|
2970
|
+
headers: Optional[Dict[str, str]] = None,
|
|
2966
2971
|
include_tags: Optional[List[str]] = None,
|
|
2967
2972
|
exclude_tags: Optional[List[str]] = None,
|
|
2968
2973
|
only_main_content: Optional[bool] = None,
|
|
@@ -2985,6 +2990,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
2985
2990
|
Args:
|
|
2986
2991
|
url (str): Target URL to scrape
|
|
2987
2992
|
formats (Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json"]]]): Content types to retrieve (markdown/html/etc)
|
|
2993
|
+
headers (Optional[Dict[str, str]]): Custom HTTP headers
|
|
2988
2994
|
include_tags (Optional[List[str]]): HTML tags to include
|
|
2989
2995
|
exclude_tags (Optional[List[str]]): HTML tags to exclude
|
|
2990
2996
|
only_main_content (Optional[bool]): Extract main content only
|
|
@@ -3019,7 +3025,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3019
3025
|
# Validate any additional kwargs
|
|
3020
3026
|
self._validate_kwargs(kwargs, "scrape_url")
|
|
3021
3027
|
|
|
3022
|
-
|
|
3028
|
+
_headers = self._prepare_headers()
|
|
3023
3029
|
|
|
3024
3030
|
# Build scrape parameters
|
|
3025
3031
|
scrape_params = {
|
|
@@ -3030,6 +3036,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3030
3036
|
# Add optional parameters if provided and not None
|
|
3031
3037
|
if formats:
|
|
3032
3038
|
scrape_params['formats'] = formats
|
|
3039
|
+
if headers:
|
|
3040
|
+
scrape_params['headers'] = headers
|
|
3033
3041
|
if include_tags:
|
|
3034
3042
|
scrape_params['includeTags'] = include_tags
|
|
3035
3043
|
if exclude_tags:
|
|
@@ -3077,7 +3085,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3077
3085
|
response = await self._async_post_request(
|
|
3078
3086
|
f'{self.api_url}{endpoint}',
|
|
3079
3087
|
scrape_params,
|
|
3080
|
-
|
|
3088
|
+
_headers
|
|
3081
3089
|
)
|
|
3082
3090
|
|
|
3083
3091
|
if response.get('success') and 'data' in response:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|