firecrawl 2.15.0__tar.gz → 2.16.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of firecrawl might be problematic. Click here for more details.
- {firecrawl-2.15.0 → firecrawl-2.16.1}/PKG-INFO +1 -1
- {firecrawl-2.15.0 → firecrawl-2.16.1}/firecrawl/__init__.py +1 -1
- {firecrawl-2.15.0 → firecrawl-2.16.1}/firecrawl/firecrawl.py +15 -7
- {firecrawl-2.15.0 → firecrawl-2.16.1}/firecrawl.egg-info/PKG-INFO +1 -1
- {firecrawl-2.15.0 → firecrawl-2.16.1}/LICENSE +0 -0
- {firecrawl-2.15.0 → firecrawl-2.16.1}/README.md +0 -0
- {firecrawl-2.15.0 → firecrawl-2.16.1}/firecrawl/__tests__/e2e_withAuth/__init__.py +0 -0
- {firecrawl-2.15.0 → firecrawl-2.16.1}/firecrawl/__tests__/e2e_withAuth/test.py +0 -0
- {firecrawl-2.15.0 → firecrawl-2.16.1}/firecrawl/__tests__/v1/e2e_withAuth/__init__.py +0 -0
- {firecrawl-2.15.0 → firecrawl-2.16.1}/firecrawl/__tests__/v1/e2e_withAuth/test.py +0 -0
- {firecrawl-2.15.0 → firecrawl-2.16.1}/firecrawl.egg-info/SOURCES.txt +0 -0
- {firecrawl-2.15.0 → firecrawl-2.16.1}/firecrawl.egg-info/dependency_links.txt +0 -0
- {firecrawl-2.15.0 → firecrawl-2.16.1}/firecrawl.egg-info/requires.txt +0 -0
- {firecrawl-2.15.0 → firecrawl-2.16.1}/firecrawl.egg-info/top_level.txt +0 -0
- {firecrawl-2.15.0 → firecrawl-2.16.1}/pyproject.toml +0 -0
- {firecrawl-2.15.0 → firecrawl-2.16.1}/setup.cfg +0 -0
- {firecrawl-2.15.0 → firecrawl-2.16.1}/setup.py +0 -0
- {firecrawl-2.15.0 → firecrawl-2.16.1}/tests/test_change_tracking.py +0 -0
|
@@ -13,7 +13,7 @@ import os
|
|
|
13
13
|
|
|
14
14
|
from .firecrawl import FirecrawlApp, AsyncFirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
|
|
15
15
|
|
|
16
|
-
__version__ = "2.
|
|
16
|
+
__version__ = "2.16.1"
|
|
17
17
|
|
|
18
18
|
# Define the logger for the Firecrawl project
|
|
19
19
|
logger: logging.Logger = logging.getLogger("firecrawl")
|
|
@@ -464,6 +464,7 @@ class FirecrawlApp:
|
|
|
464
464
|
url: str,
|
|
465
465
|
*,
|
|
466
466
|
formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json", "changeTracking"]]] = None,
|
|
467
|
+
headers: Optional[Dict[str, str]] = None,
|
|
467
468
|
include_tags: Optional[List[str]] = None,
|
|
468
469
|
exclude_tags: Optional[List[str]] = None,
|
|
469
470
|
only_main_content: Optional[bool] = None,
|
|
@@ -490,6 +491,7 @@ class FirecrawlApp:
|
|
|
490
491
|
Args:
|
|
491
492
|
url (str): Target URL to scrape
|
|
492
493
|
formats (Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json"]]]): Content types to retrieve (markdown/html/etc)
|
|
494
|
+
headers (Optional[Dict[str, str]]): Custom HTTP headers
|
|
493
495
|
include_tags (Optional[List[str]]): HTML tags to include
|
|
494
496
|
exclude_tags (Optional[List[str]]): HTML tags to exclude
|
|
495
497
|
only_main_content (Optional[bool]): Extract main content only
|
|
@@ -518,7 +520,7 @@ class FirecrawlApp:
|
|
|
518
520
|
Raises:
|
|
519
521
|
Exception: If scraping fails
|
|
520
522
|
"""
|
|
521
|
-
|
|
523
|
+
_headers = self._prepare_headers()
|
|
522
524
|
|
|
523
525
|
# Build scrape parameters
|
|
524
526
|
scrape_params = {
|
|
@@ -529,6 +531,8 @@ class FirecrawlApp:
|
|
|
529
531
|
# Add optional parameters if provided
|
|
530
532
|
if formats:
|
|
531
533
|
scrape_params['formats'] = formats
|
|
534
|
+
if headers:
|
|
535
|
+
scrape_params['headers'] = headers
|
|
532
536
|
if include_tags:
|
|
533
537
|
scrape_params['includeTags'] = include_tags
|
|
534
538
|
if exclude_tags:
|
|
@@ -584,7 +588,7 @@ class FirecrawlApp:
|
|
|
584
588
|
# Make request
|
|
585
589
|
response = requests.post(
|
|
586
590
|
f'{self.api_url}/v1/scrape',
|
|
587
|
-
headers=
|
|
591
|
+
headers=_headers,
|
|
588
592
|
json=scrape_params,
|
|
589
593
|
timeout=(timeout + 5000 if timeout else None)
|
|
590
594
|
)
|
|
@@ -2963,6 +2967,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
2963
2967
|
url: str,
|
|
2964
2968
|
*,
|
|
2965
2969
|
formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json", "changeTracking"]]] = None,
|
|
2970
|
+
headers: Optional[Dict[str, str]] = None,
|
|
2966
2971
|
include_tags: Optional[List[str]] = None,
|
|
2967
2972
|
exclude_tags: Optional[List[str]] = None,
|
|
2968
2973
|
only_main_content: Optional[bool] = None,
|
|
@@ -2985,6 +2990,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
2985
2990
|
Args:
|
|
2986
2991
|
url (str): Target URL to scrape
|
|
2987
2992
|
formats (Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json"]]]): Content types to retrieve (markdown/html/etc)
|
|
2993
|
+
headers (Optional[Dict[str, str]]): Custom HTTP headers
|
|
2988
2994
|
include_tags (Optional[List[str]]): HTML tags to include
|
|
2989
2995
|
exclude_tags (Optional[List[str]]): HTML tags to exclude
|
|
2990
2996
|
only_main_content (Optional[bool]): Extract main content only
|
|
@@ -3019,7 +3025,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3019
3025
|
# Validate any additional kwargs
|
|
3020
3026
|
self._validate_kwargs(kwargs, "scrape_url")
|
|
3021
3027
|
|
|
3022
|
-
|
|
3028
|
+
_headers = self._prepare_headers()
|
|
3023
3029
|
|
|
3024
3030
|
# Build scrape parameters
|
|
3025
3031
|
scrape_params = {
|
|
@@ -3030,6 +3036,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3030
3036
|
# Add optional parameters if provided and not None
|
|
3031
3037
|
if formats:
|
|
3032
3038
|
scrape_params['formats'] = formats
|
|
3039
|
+
if headers:
|
|
3040
|
+
scrape_params['headers'] = headers
|
|
3033
3041
|
if include_tags:
|
|
3034
3042
|
scrape_params['includeTags'] = include_tags
|
|
3035
3043
|
if exclude_tags:
|
|
@@ -3077,7 +3085,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3077
3085
|
response = await self._async_post_request(
|
|
3078
3086
|
f'{self.api_url}{endpoint}',
|
|
3079
3087
|
scrape_params,
|
|
3080
|
-
|
|
3088
|
+
_headers
|
|
3081
3089
|
)
|
|
3082
3090
|
|
|
3083
3091
|
if response.get('success') and 'data' in response:
|
|
@@ -3366,7 +3374,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3366
3374
|
except:
|
|
3367
3375
|
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
|
3368
3376
|
else:
|
|
3369
|
-
self._handle_error(response, 'start batch scrape job')
|
|
3377
|
+
await self._handle_error(response, 'start batch scrape job')
|
|
3370
3378
|
|
|
3371
3379
|
async def crawl_url(
|
|
3372
3380
|
self,
|
|
@@ -3485,7 +3493,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3485
3493
|
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
|
3486
3494
|
return await self._async_monitor_job_status(id, headers, poll_interval)
|
|
3487
3495
|
else:
|
|
3488
|
-
self._handle_error(response, 'start crawl job')
|
|
3496
|
+
await self._handle_error(response, 'start crawl job')
|
|
3489
3497
|
|
|
3490
3498
|
|
|
3491
3499
|
async def async_crawl_url(
|
|
@@ -3603,7 +3611,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3603
3611
|
except:
|
|
3604
3612
|
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
|
3605
3613
|
else:
|
|
3606
|
-
self._handle_error(response, 'start crawl job')
|
|
3614
|
+
await self._handle_error(response, 'start crawl job')
|
|
3607
3615
|
|
|
3608
3616
|
async def check_crawl_status(self, id: str) -> CrawlStatusResponse:
|
|
3609
3617
|
"""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|