firecrawl 2.1.2__tar.gz → 2.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of firecrawl might be problematic. Click here for more details.
- {firecrawl-2.1.2 → firecrawl-2.2.0}/PKG-INFO +1 -1
- {firecrawl-2.1.2 → firecrawl-2.2.0}/firecrawl/__init__.py +1 -1
- {firecrawl-2.1.2 → firecrawl-2.2.0}/firecrawl/firecrawl.py +105 -20
- {firecrawl-2.1.2 → firecrawl-2.2.0}/firecrawl.egg-info/PKG-INFO +1 -1
- {firecrawl-2.1.2 → firecrawl-2.2.0}/firecrawl.egg-info/top_level.txt +0 -2
- {firecrawl-2.1.2 → firecrawl-2.2.0}/LICENSE +0 -0
- {firecrawl-2.1.2 → firecrawl-2.2.0}/README.md +0 -0
- {firecrawl-2.1.2 → firecrawl-2.2.0}/firecrawl/__tests__/e2e_withAuth/__init__.py +0 -0
- {firecrawl-2.1.2 → firecrawl-2.2.0}/firecrawl/__tests__/e2e_withAuth/test.py +0 -0
- {firecrawl-2.1.2 → firecrawl-2.2.0}/firecrawl/__tests__/v1/e2e_withAuth/__init__.py +0 -0
- {firecrawl-2.1.2 → firecrawl-2.2.0}/firecrawl/__tests__/v1/e2e_withAuth/test.py +0 -0
- {firecrawl-2.1.2 → firecrawl-2.2.0}/firecrawl.egg-info/SOURCES.txt +0 -0
- {firecrawl-2.1.2 → firecrawl-2.2.0}/firecrawl.egg-info/dependency_links.txt +0 -0
- {firecrawl-2.1.2 → firecrawl-2.2.0}/firecrawl.egg-info/requires.txt +0 -0
- {firecrawl-2.1.2 → firecrawl-2.2.0}/pyproject.toml +0 -0
- {firecrawl-2.1.2 → firecrawl-2.2.0}/setup.cfg +0 -0
- {firecrawl-2.1.2 → firecrawl-2.2.0}/setup.py +0 -0
- {firecrawl-2.1.2 → firecrawl-2.2.0}/tests/test_change_tracking.py +0 -0
|
@@ -570,7 +570,6 @@ class FirecrawlApp:
|
|
|
570
570
|
location: Optional[str] = None,
|
|
571
571
|
timeout: Optional[int] = None,
|
|
572
572
|
scrape_options: Optional[ScrapeOptions] = None,
|
|
573
|
-
params: Optional[Union[Dict[str, Any], SearchParams]] = None,
|
|
574
573
|
**kwargs) -> SearchResponse:
|
|
575
574
|
"""
|
|
576
575
|
Search for content using Firecrawl.
|
|
@@ -585,7 +584,6 @@ class FirecrawlApp:
|
|
|
585
584
|
location (Optional[str]): Geo-targeting
|
|
586
585
|
timeout (Optional[int]): Request timeout in milliseconds
|
|
587
586
|
scrape_options (Optional[ScrapeOptions]): Result scraping configuration
|
|
588
|
-
params (Optional[Union[Dict[str, Any], SearchParams]]): Additional search parameters
|
|
589
587
|
**kwargs: Additional keyword arguments for future compatibility
|
|
590
588
|
|
|
591
589
|
Returns:
|
|
@@ -598,13 +596,11 @@ class FirecrawlApp:
|
|
|
598
596
|
Raises:
|
|
599
597
|
Exception: If search fails or response cannot be parsed
|
|
600
598
|
"""
|
|
599
|
+
# Validate any additional kwargs
|
|
600
|
+
self._validate_kwargs(kwargs, "search")
|
|
601
|
+
|
|
601
602
|
# Build search parameters
|
|
602
603
|
search_params = {}
|
|
603
|
-
if params:
|
|
604
|
-
if isinstance(params, dict):
|
|
605
|
-
search_params.update(params)
|
|
606
|
-
else:
|
|
607
|
-
search_params.update(params.dict(exclude_none=True))
|
|
608
604
|
|
|
609
605
|
# Add individual parameters
|
|
610
606
|
if limit is not None:
|
|
@@ -705,6 +701,9 @@ class FirecrawlApp:
|
|
|
705
701
|
Raises:
|
|
706
702
|
Exception: If crawl fails
|
|
707
703
|
"""
|
|
704
|
+
# Validate any additional kwargs
|
|
705
|
+
self._validate_kwargs(kwargs, "crawl_url")
|
|
706
|
+
|
|
708
707
|
crawl_params = {}
|
|
709
708
|
|
|
710
709
|
# Add individual parameters
|
|
@@ -808,6 +807,9 @@ class FirecrawlApp:
|
|
|
808
807
|
Raises:
|
|
809
808
|
Exception: If crawl initiation fails
|
|
810
809
|
"""
|
|
810
|
+
# Validate any additional kwargs
|
|
811
|
+
self._validate_kwargs(kwargs, "async_crawl_url")
|
|
812
|
+
|
|
811
813
|
crawl_params = {}
|
|
812
814
|
|
|
813
815
|
# Add individual parameters
|
|
@@ -1076,7 +1078,7 @@ class FirecrawlApp:
|
|
|
1076
1078
|
sitemap_only: Optional[bool] = None,
|
|
1077
1079
|
limit: Optional[int] = None,
|
|
1078
1080
|
timeout: Optional[int] = None,
|
|
1079
|
-
|
|
1081
|
+
**kwargs) -> MapResponse:
|
|
1080
1082
|
"""
|
|
1081
1083
|
Map and discover links from a URL.
|
|
1082
1084
|
|
|
@@ -1088,7 +1090,7 @@ class FirecrawlApp:
|
|
|
1088
1090
|
sitemap_only (Optional[bool]): Only use sitemap.xml
|
|
1089
1091
|
limit (Optional[int]): Maximum URLs to return
|
|
1090
1092
|
timeout (Optional[int]): Request timeout in milliseconds
|
|
1091
|
-
|
|
1093
|
+
**kwargs: Additional parameters to pass to the API
|
|
1092
1094
|
|
|
1093
1095
|
Returns:
|
|
1094
1096
|
MapResponse: Response containing:
|
|
@@ -1099,10 +1101,11 @@ class FirecrawlApp:
|
|
|
1099
1101
|
Raises:
|
|
1100
1102
|
Exception: If mapping fails or response cannot be parsed
|
|
1101
1103
|
"""
|
|
1104
|
+
# Validate any additional kwargs
|
|
1105
|
+
self._validate_kwargs(kwargs, "map_url")
|
|
1106
|
+
|
|
1102
1107
|
# Build map parameters
|
|
1103
1108
|
map_params = {}
|
|
1104
|
-
if params:
|
|
1105
|
-
map_params.update(params.dict(exclude_none=True))
|
|
1106
1109
|
|
|
1107
1110
|
# Add individual parameters
|
|
1108
1111
|
if search is not None:
|
|
@@ -1118,6 +1121,9 @@ class FirecrawlApp:
|
|
|
1118
1121
|
if timeout is not None:
|
|
1119
1122
|
map_params['timeout'] = timeout
|
|
1120
1123
|
|
|
1124
|
+
# Add any additional kwargs
|
|
1125
|
+
map_params.update(kwargs)
|
|
1126
|
+
|
|
1121
1127
|
# Create final params object
|
|
1122
1128
|
final_params = MapParams(**map_params)
|
|
1123
1129
|
params_dict = final_params.dict(exclude_none=True)
|
|
@@ -1205,6 +1211,9 @@ class FirecrawlApp:
|
|
|
1205
1211
|
Raises:
|
|
1206
1212
|
Exception: If batch scrape fails
|
|
1207
1213
|
"""
|
|
1214
|
+
# Validate any additional kwargs
|
|
1215
|
+
self._validate_kwargs(kwargs, "batch_scrape_urls")
|
|
1216
|
+
|
|
1208
1217
|
scrape_params = {}
|
|
1209
1218
|
|
|
1210
1219
|
# Add individual parameters
|
|
@@ -1328,6 +1337,9 @@ class FirecrawlApp:
|
|
|
1328
1337
|
Raises:
|
|
1329
1338
|
Exception: If job initiation fails
|
|
1330
1339
|
"""
|
|
1340
|
+
# Validate any additional kwargs
|
|
1341
|
+
self._validate_kwargs(kwargs, "async_batch_scrape_urls")
|
|
1342
|
+
|
|
1331
1343
|
scrape_params = {}
|
|
1332
1344
|
|
|
1333
1345
|
# Add individual parameters
|
|
@@ -1446,6 +1458,9 @@ class FirecrawlApp:
|
|
|
1446
1458
|
Raises:
|
|
1447
1459
|
Exception: If batch scrape job fails to start
|
|
1448
1460
|
"""
|
|
1461
|
+
# Validate any additional kwargs
|
|
1462
|
+
self._validate_kwargs(kwargs, "batch_scrape_urls_and_watch")
|
|
1463
|
+
|
|
1449
1464
|
scrape_params = {}
|
|
1450
1465
|
|
|
1451
1466
|
# Add individual parameters
|
|
@@ -2394,6 +2409,56 @@ class FirecrawlApp:
|
|
|
2394
2409
|
|
|
2395
2410
|
return {'success': False, 'error': 'Internal server error'}
|
|
2396
2411
|
|
|
2412
|
+
def _validate_kwargs(self, kwargs: Dict[str, Any], method_name: str) -> None:
|
|
2413
|
+
"""
|
|
2414
|
+
Validate additional keyword arguments before they are passed to the API.
|
|
2415
|
+
This provides early validation before the Pydantic model validation.
|
|
2416
|
+
|
|
2417
|
+
Args:
|
|
2418
|
+
kwargs (Dict[str, Any]): Additional keyword arguments to validate
|
|
2419
|
+
method_name (str): Name of the method these kwargs are for
|
|
2420
|
+
|
|
2421
|
+
Raises:
|
|
2422
|
+
ValueError: If kwargs contain invalid or unsupported parameters
|
|
2423
|
+
"""
|
|
2424
|
+
if not kwargs:
|
|
2425
|
+
return
|
|
2426
|
+
|
|
2427
|
+
# Known parameter mappings for each method
|
|
2428
|
+
method_params = {
|
|
2429
|
+
"scrape_url": {"formats", "include_tags", "exclude_tags", "only_main_content", "wait_for",
|
|
2430
|
+
"timeout", "location", "mobile", "skip_tls_verification", "remove_base64_images",
|
|
2431
|
+
"block_ads", "proxy", "extract", "json_options", "actions"},
|
|
2432
|
+
"search": {"limit", "tbs", "filter", "lang", "country", "location", "timeout", "scrape_options"},
|
|
2433
|
+
"crawl_url": {"include_paths", "exclude_paths", "max_depth", "max_discovery_depth", "limit",
|
|
2434
|
+
"allow_backward_links", "allow_external_links", "ignore_sitemap", "scrape_options",
|
|
2435
|
+
"webhook", "deduplicate_similar_urls", "ignore_query_parameters", "regex_on_full_url"},
|
|
2436
|
+
"map_url": {"search", "ignore_sitemap", "include_subdomains", "sitemap_only", "limit", "timeout"},
|
|
2437
|
+
"batch_scrape_urls": {"formats", "headers", "include_tags", "exclude_tags", "only_main_content",
|
|
2438
|
+
"wait_for", "timeout", "location", "mobile", "skip_tls_verification",
|
|
2439
|
+
"remove_base64_images", "block_ads", "proxy", "extract", "json_options",
|
|
2440
|
+
"actions", "agent"},
|
|
2441
|
+
"async_batch_scrape_urls": {"formats", "headers", "include_tags", "exclude_tags", "only_main_content",
|
|
2442
|
+
"wait_for", "timeout", "location", "mobile", "skip_tls_verification",
|
|
2443
|
+
"remove_base64_images", "block_ads", "proxy", "extract", "json_options",
|
|
2444
|
+
"actions", "agent"},
|
|
2445
|
+
"batch_scrape_urls_and_watch": {"formats", "headers", "include_tags", "exclude_tags", "only_main_content",
|
|
2446
|
+
"wait_for", "timeout", "location", "mobile", "skip_tls_verification",
|
|
2447
|
+
"remove_base64_images", "block_ads", "proxy", "extract", "json_options",
|
|
2448
|
+
"actions", "agent"}
|
|
2449
|
+
}
|
|
2450
|
+
|
|
2451
|
+
# Get allowed parameters for this method
|
|
2452
|
+
allowed_params = method_params.get(method_name, set())
|
|
2453
|
+
|
|
2454
|
+
# Check for unknown parameters
|
|
2455
|
+
unknown_params = set(kwargs.keys()) - allowed_params
|
|
2456
|
+
if unknown_params:
|
|
2457
|
+
raise ValueError(f"Unsupported parameter(s) for {method_name}: {', '.join(unknown_params)}. Please refer to the API documentation for the correct parameters.")
|
|
2458
|
+
|
|
2459
|
+
# Additional type validation can be added here if needed
|
|
2460
|
+
# For now, we rely on Pydantic models for detailed type validation
|
|
2461
|
+
|
|
2397
2462
|
class CrawlWatcher:
|
|
2398
2463
|
"""
|
|
2399
2464
|
A class to watch and handle crawl job events via WebSocket connection.
|
|
@@ -2710,7 +2775,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
2710
2775
|
async def scrape_url(
|
|
2711
2776
|
self,
|
|
2712
2777
|
url: str,
|
|
2713
|
-
|
|
2778
|
+
*,
|
|
2779
|
+
formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json", "changeTracking"]]] = None,
|
|
2714
2780
|
include_tags: Optional[List[str]] = None,
|
|
2715
2781
|
exclude_tags: Optional[List[str]] = None,
|
|
2716
2782
|
only_main_content: Optional[bool] = None,
|
|
@@ -2724,9 +2790,10 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
2724
2790
|
proxy: Optional[Literal["basic", "stealth"]] = None,
|
|
2725
2791
|
extract: Optional[JsonConfig] = None,
|
|
2726
2792
|
json_options: Optional[JsonConfig] = None,
|
|
2727
|
-
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None
|
|
2793
|
+
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
|
|
2794
|
+
**kwargs) -> ScrapeResponse[Any]:
|
|
2728
2795
|
"""
|
|
2729
|
-
Scrape
|
|
2796
|
+
Scrape a single URL asynchronously.
|
|
2730
2797
|
|
|
2731
2798
|
Args:
|
|
2732
2799
|
url (str): Target URL to scrape
|
|
@@ -2745,17 +2812,26 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
2745
2812
|
extract (Optional[JsonConfig]): Content extraction settings
|
|
2746
2813
|
json_options (Optional[JsonConfig]): JSON extraction settings
|
|
2747
2814
|
actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]]): Actions to perform
|
|
2815
|
+
**kwargs: Additional parameters to pass to the API
|
|
2748
2816
|
|
|
2749
2817
|
Returns:
|
|
2750
|
-
|
|
2751
|
-
|
|
2752
|
-
|
|
2753
|
-
|
|
2754
|
-
|
|
2818
|
+
ScrapeResponse with:
|
|
2819
|
+
* success - Whether scrape was successful
|
|
2820
|
+
* markdown - Markdown content if requested
|
|
2821
|
+
* html - HTML content if requested
|
|
2822
|
+
* rawHtml - Raw HTML content if requested
|
|
2823
|
+
* links - Extracted links if requested
|
|
2824
|
+
* screenshot - Screenshot if requested
|
|
2825
|
+
* extract - Extracted data if requested
|
|
2826
|
+
* json - JSON data if requested
|
|
2827
|
+
* error - Error message if scrape failed
|
|
2755
2828
|
|
|
2756
2829
|
Raises:
|
|
2757
|
-
|
|
2830
|
+
Exception: If scraping fails
|
|
2758
2831
|
"""
|
|
2832
|
+
# Validate any additional kwargs
|
|
2833
|
+
self._validate_kwargs(kwargs, "scrape_url")
|
|
2834
|
+
|
|
2759
2835
|
headers = self._prepare_headers()
|
|
2760
2836
|
|
|
2761
2837
|
# Build scrape parameters
|
|
@@ -2879,6 +2955,9 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
2879
2955
|
Raises:
|
|
2880
2956
|
Exception: If batch scrape fails
|
|
2881
2957
|
"""
|
|
2958
|
+
# Validate any additional kwargs
|
|
2959
|
+
self._validate_kwargs(kwargs, "batch_scrape_urls")
|
|
2960
|
+
|
|
2882
2961
|
scrape_params = {}
|
|
2883
2962
|
|
|
2884
2963
|
# Add individual parameters
|
|
@@ -3007,6 +3086,9 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3007
3086
|
Raises:
|
|
3008
3087
|
Exception: If job initiation fails
|
|
3009
3088
|
"""
|
|
3089
|
+
# Validate any additional kwargs
|
|
3090
|
+
self._validate_kwargs(kwargs, "async_batch_scrape_urls")
|
|
3091
|
+
|
|
3010
3092
|
scrape_params = {}
|
|
3011
3093
|
|
|
3012
3094
|
# Add individual parameters
|
|
@@ -3126,6 +3208,9 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3126
3208
|
Raises:
|
|
3127
3209
|
Exception: If crawl fails
|
|
3128
3210
|
"""
|
|
3211
|
+
# Validate any additional kwargs
|
|
3212
|
+
self._validate_kwargs(kwargs, "crawl_url")
|
|
3213
|
+
|
|
3129
3214
|
crawl_params = {}
|
|
3130
3215
|
|
|
3131
3216
|
# Add individual parameters
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|