firecrawl 2.0.1__tar.gz → 2.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 2.0.1
3
+ Version: 2.1.0
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
@@ -11,9 +11,9 @@ For more information visit https://github.com/firecrawl/
11
11
  import logging
12
12
  import os
13
13
 
14
- from .firecrawl import FirecrawlApp, ExtractConfig # noqa
14
+ from .firecrawl import FirecrawlApp, JsonConfig, ScrapeOptions # noqa
15
15
 
16
- __version__ = "2.0.1"
16
+ __version__ = "2.1.0"
17
17
 
18
18
  # Define the logger for the Firecrawl project
19
19
  logger: logging.Logger = logging.getLogger("firecrawl")
@@ -27,7 +27,7 @@ from pydantic import Field
27
27
  # Suppress Pydantic warnings about attribute shadowing
28
28
  warnings.filterwarnings("ignore", message="Field name \"json\" in \"FirecrawlDocument\" shadows an attribute in parent \"BaseModel\"")
29
29
  warnings.filterwarnings("ignore", message="Field name \"json\" in \"ChangeTrackingData\" shadows an attribute in parent \"BaseModel\"")
30
- warnings.filterwarnings("ignore", message="Field name \"schema\" in \"ExtractConfig\" shadows an attribute in parent \"BaseModel\"")
30
+ warnings.filterwarnings("ignore", message="Field name \"schema\" in \"JsonConfig\" shadows an attribute in parent \"BaseModel\"")
31
31
  warnings.filterwarnings("ignore", message="Field name \"schema\" in \"ExtractParams\" shadows an attribute in parent \"BaseModel\"")
32
32
 
33
33
 
@@ -84,7 +84,6 @@ T = TypeVar('T')
84
84
  # statusCode: Optional[int] = None
85
85
  # error: Optional[str] = None
86
86
 
87
-
88
87
  class AgentOptions(pydantic.BaseModel):
89
88
  """Configuration for the agent."""
90
89
  model: Literal["FIRE-1"] = "FIRE-1"
@@ -98,6 +97,16 @@ class ActionsResult(pydantic.BaseModel):
98
97
  """Result of actions performed during scraping."""
99
98
  screenshots: List[str]
100
99
 
100
+ class ChangeTrackingData(pydantic.BaseModel):
101
+ """
102
+ Data for the change tracking format.
103
+ """
104
+ previousScrapeAt: Optional[str] = None
105
+ changeStatus: str # "new" | "same" | "changed" | "removed"
106
+ visibility: str # "visible" | "hidden"
107
+ diff: Optional[Dict[str, Any]] = None
108
+ json: Optional[Any] = None
109
+
101
110
  class FirecrawlDocument(pydantic.BaseModel, Generic[T]):
102
111
  """Document retrieved or processed by Firecrawl."""
103
112
  url: Optional[str] = None
@@ -112,6 +121,7 @@ class FirecrawlDocument(pydantic.BaseModel, Generic[T]):
112
121
  actions: Optional[ActionsResult] = None
113
122
  title: Optional[str] = None # v1 search only
114
123
  description: Optional[str] = None # v1 search only
124
+ changeTracking: Optional[ChangeTrackingData] = None
115
125
 
116
126
  class LocationConfig(pydantic.BaseModel):
117
127
  """Location configuration for scraping."""
@@ -125,9 +135,9 @@ class WebhookConfig(pydantic.BaseModel):
125
135
  metadata: Optional[Dict[str, str]] = None
126
136
  events: Optional[List[Literal["completed", "failed", "page", "started"]]] = None
127
137
 
128
- class CommonOptions(pydantic.BaseModel):
138
+ class ScrapeOptions(pydantic.BaseModel):
129
139
  """Parameters for scraping operations."""
130
- formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json"]]] = None
140
+ formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json", "changeTracking"]]] = None
131
141
  headers: Optional[Dict[str, str]] = None
132
142
  includeTags: Optional[List[str]] = None
133
143
  excludeTags: Optional[List[str]] = None
@@ -187,17 +197,17 @@ class ExtractAgent(pydantic.BaseModel):
187
197
  """Configuration for the agent in extract operations."""
188
198
  model: Literal["FIRE-1"] = "FIRE-1"
189
199
 
190
- class ExtractConfig(pydantic.BaseModel):
200
+ class JsonConfig(pydantic.BaseModel):
191
201
  """Configuration for extraction."""
192
202
  prompt: Optional[str] = None
193
203
  schema: Optional[Any] = None
194
204
  systemPrompt: Optional[str] = None
195
205
  agent: Optional[ExtractAgent] = None
196
206
 
197
- class ScrapeParams(CommonOptions):
207
+ class ScrapeParams(ScrapeOptions):
198
208
  """Parameters for scraping operations."""
199
- extract: Optional[ExtractConfig] = None
200
- jsonOptions: Optional[ExtractConfig] = None
209
+ extract: Optional[JsonConfig] = None
210
+ jsonOptions: Optional[JsonConfig] = None
201
211
  actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None
202
212
  agent: Optional[AgentOptions] = None
203
213
 
@@ -236,7 +246,7 @@ class CrawlParams(pydantic.BaseModel):
236
246
  allowBackwardLinks: Optional[bool] = None
237
247
  allowExternalLinks: Optional[bool] = None
238
248
  ignoreSitemap: Optional[bool] = None
239
- scrapeOptions: Optional[CommonOptions] = None
249
+ scrapeOptions: Optional[ScrapeOptions] = None
240
250
  webhook: Optional[Union[str, WebhookConfig]] = None
241
251
  deduplicateSimilarURLs: Optional[bool] = None
242
252
  ignoreQueryParameters: Optional[bool] = None
@@ -290,7 +300,7 @@ class ExtractParams(pydantic.BaseModel):
290
300
  includeSubdomains: Optional[bool] = None
291
301
  origin: Optional[str] = None
292
302
  showSources: Optional[bool] = None
293
- scrapeOptions: Optional[CommonOptions] = None
303
+ scrapeOptions: Optional[ScrapeOptions] = None
294
304
 
295
305
  class ExtractResponse(pydantic.BaseModel, Generic[T]):
296
306
  """Response from extract operations."""
@@ -310,7 +320,7 @@ class SearchParams(pydantic.BaseModel):
310
320
  location: Optional[str] = None
311
321
  origin: Optional[str] = "api"
312
322
  timeout: Optional[int] = 60000
313
- scrapeOptions: Optional[CommonOptions] = None
323
+ scrapeOptions: Optional[ScrapeOptions] = None
314
324
 
315
325
  class SearchResponse(pydantic.BaseModel):
316
326
  """Response from search operations."""
@@ -378,16 +388,6 @@ class GenerateLLMsTextStatusResponse(pydantic.BaseModel):
378
388
  status: Literal["processing", "completed", "failed"]
379
389
  error: Optional[str] = None
380
390
  expiresAt: str
381
-
382
- class ChangeTrackingData(pydantic.BaseModel):
383
- """
384
- Data for the change tracking format.
385
- """
386
- previousScrapeAt: Optional[str] = None
387
- changeStatus: str # "new" | "same" | "changed" | "removed"
388
- visibility: str # "visible" | "hidden"
389
- diff: Optional[Dict[str, Any]] = None
390
- json: Optional[Any] = None
391
391
 
392
392
  class SearchResponse(pydantic.BaseModel):
393
393
  """
@@ -443,7 +443,7 @@ class FirecrawlApp:
443
443
  self,
444
444
  url: str,
445
445
  *,
446
- formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json"]]] = None,
446
+ formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json", "changeTracking"]]] = None,
447
447
  include_tags: Optional[List[str]] = None,
448
448
  exclude_tags: Optional[List[str]] = None,
449
449
  only_main_content: Optional[bool] = None,
@@ -455,8 +455,8 @@ class FirecrawlApp:
455
455
  remove_base64_images: Optional[bool] = None,
456
456
  block_ads: Optional[bool] = None,
457
457
  proxy: Optional[Literal["basic", "stealth"]] = None,
458
- extract: Optional[ExtractConfig] = None,
459
- json_options: Optional[ExtractConfig] = None,
458
+ extract: Optional[JsonConfig] = None,
459
+ json_options: Optional[JsonConfig] = None,
460
460
  actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
461
461
  **kwargs) -> ScrapeResponse[Any]:
462
462
  """
@@ -476,8 +476,8 @@ class FirecrawlApp:
476
476
  remove_base64_images (Optional[bool]): Remove base64 images
477
477
  block_ads (Optional[bool]): Block ads
478
478
  proxy (Optional[Literal["basic", "stealth"]]): Proxy type (basic/stealth)
479
- extract (Optional[ExtractConfig]): Content extraction settings
480
- json_options (Optional[ExtractConfig]): JSON extraction settings
479
+ extract (Optional[JsonConfig]): Content extraction settings
480
+ json_options (Optional[JsonConfig]): JSON extraction settings
481
481
  actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]]): Actions to perform
482
482
 
483
483
 
@@ -569,7 +569,7 @@ class FirecrawlApp:
569
569
  country: Optional[str] = None,
570
570
  location: Optional[str] = None,
571
571
  timeout: Optional[int] = None,
572
- scrape_options: Optional[CommonOptions] = None,
572
+ scrape_options: Optional[ScrapeOptions] = None,
573
573
  params: Optional[Union[Dict[str, Any], SearchParams]] = None,
574
574
  **kwargs) -> SearchResponse:
575
575
  """
@@ -584,7 +584,7 @@ class FirecrawlApp:
584
584
  country (Optional[str]): Country code (default: "us")
585
585
  location (Optional[str]): Geo-targeting
586
586
  timeout (Optional[int]): Request timeout in milliseconds
587
- scrape_options (Optional[CommonOptions]): Result scraping configuration
587
+ scrape_options (Optional[ScrapeOptions]): Result scraping configuration
588
588
  params (Optional[Union[Dict[str, Any], SearchParams]]): Additional search parameters
589
589
  **kwargs: Additional keyword arguments for future compatibility
590
590
 
@@ -665,7 +665,7 @@ class FirecrawlApp:
665
665
  allow_backward_links: Optional[bool] = None,
666
666
  allow_external_links: Optional[bool] = None,
667
667
  ignore_sitemap: Optional[bool] = None,
668
- scrape_options: Optional[CommonOptions] = None,
668
+ scrape_options: Optional[ScrapeOptions] = None,
669
669
  webhook: Optional[Union[str, WebhookConfig]] = None,
670
670
  deduplicate_similar_urls: Optional[bool] = None,
671
671
  ignore_query_parameters: Optional[bool] = None,
@@ -687,7 +687,7 @@ class FirecrawlApp:
687
687
  allow_backward_links (Optional[bool]): Follow parent directory links
688
688
  allow_external_links (Optional[bool]): Follow external domain links
689
689
  ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
690
- scrape_options (Optional[CommonOptions]): Page scraping configuration
690
+ scrape_options (Optional[ScrapeOptions]): Page scraping configuration
691
691
  webhook (Optional[Union[str, WebhookConfig]]): Notification webhook settings
692
692
  deduplicate_similar_urls (Optional[bool]): Remove similar URLs
693
693
  ignore_query_parameters (Optional[bool]): Ignore URL parameters
@@ -769,7 +769,7 @@ class FirecrawlApp:
769
769
  allow_backward_links: Optional[bool] = None,
770
770
  allow_external_links: Optional[bool] = None,
771
771
  ignore_sitemap: Optional[bool] = None,
772
- scrape_options: Optional[CommonOptions] = None,
772
+ scrape_options: Optional[ScrapeOptions] = None,
773
773
  webhook: Optional[Union[str, WebhookConfig]] = None,
774
774
  deduplicate_similar_urls: Optional[bool] = None,
775
775
  ignore_query_parameters: Optional[bool] = None,
@@ -790,7 +790,7 @@ class FirecrawlApp:
790
790
  allow_backward_links (Optional[bool]): Follow parent directory links
791
791
  allow_external_links (Optional[bool]): Follow external domain links
792
792
  ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
793
- scrape_options (Optional[CommonOptions]): Page scraping configuration
793
+ scrape_options (Optional[ScrapeOptions]): Page scraping configuration
794
794
  webhook (Optional[Union[str, WebhookConfig]]): Notification webhook settings
795
795
  deduplicate_similar_urls (Optional[bool]): Remove similar URLs
796
796
  ignore_query_parameters (Optional[bool]): Ignore URL parameters
@@ -1008,7 +1008,7 @@ class FirecrawlApp:
1008
1008
  allow_backward_links: Optional[bool] = None,
1009
1009
  allow_external_links: Optional[bool] = None,
1010
1010
  ignore_sitemap: Optional[bool] = None,
1011
- scrape_options: Optional[CommonOptions] = None,
1011
+ scrape_options: Optional[ScrapeOptions] = None,
1012
1012
  webhook: Optional[Union[str, WebhookConfig]] = None,
1013
1013
  deduplicate_similar_urls: Optional[bool] = None,
1014
1014
  ignore_query_parameters: Optional[bool] = None,
@@ -1029,7 +1029,7 @@ class FirecrawlApp:
1029
1029
  allow_backward_links (Optional[bool]): Follow parent directory links
1030
1030
  allow_external_links (Optional[bool]): Follow external domain links
1031
1031
  ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
1032
- scrape_options (Optional[CommonOptions]): Page scraping configuration
1032
+ scrape_options (Optional[ScrapeOptions]): Page scraping configuration
1033
1033
  webhook (Optional[Union[str, WebhookConfig]]): Notification webhook settings
1034
1034
  deduplicate_similar_urls (Optional[bool]): Remove similar URLs
1035
1035
  ignore_query_parameters (Optional[bool]): Ignore URL parameters
@@ -1162,8 +1162,8 @@ class FirecrawlApp:
1162
1162
  remove_base64_images: Optional[bool] = None,
1163
1163
  block_ads: Optional[bool] = None,
1164
1164
  proxy: Optional[Literal["basic", "stealth"]] = None,
1165
- extract: Optional[ExtractConfig] = None,
1166
- json_options: Optional[ExtractConfig] = None,
1165
+ extract: Optional[JsonConfig] = None,
1166
+ json_options: Optional[JsonConfig] = None,
1167
1167
  actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
1168
1168
  agent: Optional[AgentOptions] = None,
1169
1169
  poll_interval: Optional[int] = 2,
@@ -1188,8 +1188,8 @@ class FirecrawlApp:
1188
1188
  remove_base64_images (Optional[bool]): Remove base64 encoded images
1189
1189
  block_ads (Optional[bool]): Block advertisements
1190
1190
  proxy (Optional[Literal]): Proxy type to use
1191
- extract (Optional[ExtractConfig]): Content extraction config
1192
- json_options (Optional[ExtractConfig]): JSON extraction config
1191
+ extract (Optional[JsonConfig]): Content extraction config
1192
+ json_options (Optional[JsonConfig]): JSON extraction config
1193
1193
  actions (Optional[List[Union]]): Actions to perform
1194
1194
  agent (Optional[AgentOptions]): Agent configuration
1195
1195
  poll_interval (Optional[int]): Seconds between status checks (default: 2)
@@ -1286,8 +1286,8 @@ class FirecrawlApp:
1286
1286
  remove_base64_images: Optional[bool] = None,
1287
1287
  block_ads: Optional[bool] = None,
1288
1288
  proxy: Optional[Literal["basic", "stealth"]] = None,
1289
- extract: Optional[ExtractConfig] = None,
1290
- json_options: Optional[ExtractConfig] = None,
1289
+ extract: Optional[JsonConfig] = None,
1290
+ json_options: Optional[JsonConfig] = None,
1291
1291
  actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
1292
1292
  agent: Optional[AgentOptions] = None,
1293
1293
  idempotency_key: Optional[str] = None,
@@ -1311,8 +1311,8 @@ class FirecrawlApp:
1311
1311
  remove_base64_images (Optional[bool]): Remove base64 encoded images
1312
1312
  block_ads (Optional[bool]): Block advertisements
1313
1313
  proxy (Optional[Literal]): Proxy type to use
1314
- extract (Optional[ExtractConfig]): Content extraction config
1315
- json_options (Optional[ExtractConfig]): JSON extraction config
1314
+ extract (Optional[JsonConfig]): Content extraction config
1315
+ json_options (Optional[JsonConfig]): JSON extraction config
1316
1316
  actions (Optional[List[Union]]): Actions to perform
1317
1317
  agent (Optional[AgentOptions]): Agent configuration
1318
1318
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
@@ -1408,8 +1408,8 @@ class FirecrawlApp:
1408
1408
  remove_base64_images: Optional[bool] = None,
1409
1409
  block_ads: Optional[bool] = None,
1410
1410
  proxy: Optional[Literal["basic", "stealth"]] = None,
1411
- extract: Optional[ExtractConfig] = None,
1412
- json_options: Optional[ExtractConfig] = None,
1411
+ extract: Optional[JsonConfig] = None,
1412
+ json_options: Optional[JsonConfig] = None,
1413
1413
  actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
1414
1414
  agent: Optional[AgentOptions] = None,
1415
1415
  idempotency_key: Optional[str] = None,
@@ -1433,8 +1433,8 @@ class FirecrawlApp:
1433
1433
  remove_base64_images (Optional[bool]): Remove base64 encoded images
1434
1434
  block_ads (Optional[bool]): Block advertisements
1435
1435
  proxy (Optional[Literal]): Proxy type to use
1436
- extract (Optional[ExtractConfig]): Content extraction config
1437
- json_options (Optional[ExtractConfig]): JSON extraction config
1436
+ extract (Optional[JsonConfig]): Content extraction config
1437
+ json_options (Optional[JsonConfig]): JSON extraction config
1438
1438
  actions (Optional[List[Union]]): Actions to perform
1439
1439
  agent (Optional[AgentOptions]): Agent configuration
1440
1440
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
@@ -1742,7 +1742,7 @@ class FirecrawlApp:
1742
1742
 
1743
1743
  def async_extract(
1744
1744
  self,
1745
- urls: List[str],
1745
+ urls: Optional[List[str]] = None,
1746
1746
  *,
1747
1747
  prompt: Optional[str] = None,
1748
1748
  schema: Optional[Any] = None,
@@ -1750,8 +1750,7 @@ class FirecrawlApp:
1750
1750
  allow_external_links: Optional[bool] = False,
1751
1751
  enable_web_search: Optional[bool] = False,
1752
1752
  show_sources: Optional[bool] = False,
1753
- agent: Optional[Dict[str, Any]] = None,
1754
- idempotency_key: Optional[str] = None) -> ExtractResponse[Any]:
1753
+ agent: Optional[Dict[str, Any]] = None) -> ExtractResponse[Any]:
1755
1754
  """
1756
1755
  Initiate an asynchronous extract job.
1757
1756
 
@@ -1775,7 +1774,7 @@ class FirecrawlApp:
1775
1774
  Raises:
1776
1775
  ValueError: If job initiation fails
1777
1776
  """
1778
- headers = self._prepare_headers(idempotency_key)
1777
+ headers = self._prepare_headers()
1779
1778
 
1780
1779
  schema = schema
1781
1780
  if schema:
@@ -2707,8 +2706,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
2707
2706
  remove_base64_images: Optional[bool] = None,
2708
2707
  block_ads: Optional[bool] = None,
2709
2708
  proxy: Optional[Literal["basic", "stealth"]] = None,
2710
- extract: Optional[ExtractConfig] = None,
2711
- json_options: Optional[ExtractConfig] = None,
2709
+ extract: Optional[JsonConfig] = None,
2710
+ json_options: Optional[JsonConfig] = None,
2712
2711
  actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None) -> ScrapeResponse[Any]:
2713
2712
  """
2714
2713
  Scrape and extract content from a URL asynchronously.
@@ -2727,8 +2726,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
2727
2726
  remove_base64_images (Optional[bool]): Remove base64 images
2728
2727
  block_ads (Optional[bool]): Block ads
2729
2728
  proxy (Optional[Literal["basic", "stealth"]]): Proxy type (basic/stealth)
2730
- extract (Optional[ExtractConfig]): Content extraction settings
2731
- json_options (Optional[ExtractConfig]): JSON extraction settings
2729
+ extract (Optional[JsonConfig]): Content extraction settings
2730
+ json_options (Optional[JsonConfig]): JSON extraction settings
2732
2731
  actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]]): Actions to perform
2733
2732
 
2734
2733
  Returns:
@@ -2821,8 +2820,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
2821
2820
  remove_base64_images: Optional[bool] = None,
2822
2821
  block_ads: Optional[bool] = None,
2823
2822
  proxy: Optional[Literal["basic", "stealth"]] = None,
2824
- extract: Optional[ExtractConfig] = None,
2825
- json_options: Optional[ExtractConfig] = None,
2823
+ extract: Optional[JsonConfig] = None,
2824
+ json_options: Optional[JsonConfig] = None,
2826
2825
  actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
2827
2826
  agent: Optional[AgentOptions] = None,
2828
2827
  poll_interval: Optional[int] = 2,
@@ -2847,8 +2846,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
2847
2846
  remove_base64_images (Optional[bool]): Remove base64 encoded images
2848
2847
  block_ads (Optional[bool]): Block advertisements
2849
2848
  proxy (Optional[Literal]): Proxy type to use
2850
- extract (Optional[ExtractConfig]): Content extraction config
2851
- json_options (Optional[ExtractConfig]): JSON extraction config
2849
+ extract (Optional[JsonConfig]): Content extraction config
2850
+ json_options (Optional[JsonConfig]): JSON extraction config
2852
2851
  actions (Optional[List[Union]]): Actions to perform
2853
2852
  agent (Optional[AgentOptions]): Agent configuration
2854
2853
  poll_interval (Optional[int]): Seconds between status checks (default: 2)
@@ -2923,9 +2922,9 @@ class AsyncFirecrawlApp(FirecrawlApp):
2923
2922
  headers
2924
2923
  )
2925
2924
 
2926
- if response.status_code == 200:
2925
+ if response.get('success'):
2927
2926
  try:
2928
- id = response.json().get('id')
2927
+ id = response.get('id')
2929
2928
  except:
2930
2929
  raise Exception(f'Failed to parse Firecrawl response as JSON.')
2931
2930
  return self._monitor_job_status(id, headers, poll_interval)
@@ -2950,8 +2949,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
2950
2949
  remove_base64_images: Optional[bool] = None,
2951
2950
  block_ads: Optional[bool] = None,
2952
2951
  proxy: Optional[Literal["basic", "stealth"]] = None,
2953
- extract: Optional[ExtractConfig] = None,
2954
- json_options: Optional[ExtractConfig] = None,
2952
+ extract: Optional[JsonConfig] = None,
2953
+ json_options: Optional[JsonConfig] = None,
2955
2954
  actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
2956
2955
  agent: Optional[AgentOptions] = None,
2957
2956
  idempotency_key: Optional[str] = None,
@@ -2975,8 +2974,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
2975
2974
  remove_base64_images (Optional[bool]): Remove base64 encoded images
2976
2975
  block_ads (Optional[bool]): Block advertisements
2977
2976
  proxy (Optional[Literal]): Proxy type to use
2978
- extract (Optional[ExtractConfig]): Content extraction config
2979
- json_options (Optional[ExtractConfig]): JSON extraction config
2977
+ extract (Optional[JsonConfig]): Content extraction config
2978
+ json_options (Optional[JsonConfig]): JSON extraction config
2980
2979
  actions (Optional[List[Union]]): Actions to perform
2981
2980
  agent (Optional[AgentOptions]): Agent configuration
2982
2981
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
@@ -3051,7 +3050,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3051
3050
  headers
3052
3051
  )
3053
3052
 
3054
- if response.status_code == 200:
3053
+ if response.get('status_code') == 200:
3055
3054
  try:
3056
3055
  return BatchScrapeResponse(**response.json())
3057
3056
  except:
@@ -3060,7 +3059,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3060
3059
  self._handle_error(response, 'start batch scrape job')
3061
3060
 
3062
3061
  async def crawl_url(
3063
- self,
3062
+ self,
3064
3063
  url: str,
3065
3064
  *,
3066
3065
  include_paths: Optional[List[str]] = None,
@@ -3071,7 +3070,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3071
3070
  allow_backward_links: Optional[bool] = None,
3072
3071
  allow_external_links: Optional[bool] = None,
3073
3072
  ignore_sitemap: Optional[bool] = None,
3074
- scrape_options: Optional[CommonOptions] = None,
3073
+ scrape_options: Optional[ScrapeOptions] = None,
3075
3074
  webhook: Optional[Union[str, WebhookConfig]] = None,
3076
3075
  deduplicate_similar_urls: Optional[bool] = None,
3077
3076
  ignore_query_parameters: Optional[bool] = None,
@@ -3093,7 +3092,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3093
3092
  allow_backward_links (Optional[bool]): Follow parent directory links
3094
3093
  allow_external_links (Optional[bool]): Follow external domain links
3095
3094
  ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
3096
- scrape_options (Optional[CommonOptions]): Page scraping configuration
3095
+ scrape_options (Optional[ScrapeOptions]): Page scraping configuration
3097
3096
  webhook (Optional[Union[str, WebhookConfig]]): Notification webhook settings
3098
3097
  deduplicate_similar_urls (Optional[bool]): Remove similar URLs
3099
3098
  ignore_query_parameters (Optional[bool]): Ignore URL parameters
@@ -3149,15 +3148,15 @@ class AsyncFirecrawlApp(FirecrawlApp):
3149
3148
  params_dict = final_params.dict(exclude_none=True)
3150
3149
  params_dict['url'] = url
3151
3150
  params_dict['origin'] = f"python-sdk@{version}"
3152
-
3153
3151
  # Make request
3154
3152
  headers = self._prepare_headers(idempotency_key)
3155
3153
  response = await self._async_post_request(
3156
3154
  f'{self.api_url}/v1/crawl', params_dict, headers)
3157
3155
 
3158
- if response.status_code == 200:
3156
+ print(response)
3157
+ if response.get('success'):
3159
3158
  try:
3160
- id = response.json().get('id')
3159
+ id = response.get('id')
3161
3160
  except:
3162
3161
  raise Exception(f'Failed to parse Firecrawl response as JSON.')
3163
3162
  return self._monitor_job_status(id, headers, poll_interval)
@@ -3177,11 +3176,12 @@ class AsyncFirecrawlApp(FirecrawlApp):
3177
3176
  allow_backward_links: Optional[bool] = None,
3178
3177
  allow_external_links: Optional[bool] = None,
3179
3178
  ignore_sitemap: Optional[bool] = None,
3180
- scrape_options: Optional[CommonOptions] = None,
3179
+ scrape_options: Optional[ScrapeOptions] = None,
3181
3180
  webhook: Optional[Union[str, WebhookConfig]] = None,
3182
3181
  deduplicate_similar_urls: Optional[bool] = None,
3183
3182
  ignore_query_parameters: Optional[bool] = None,
3184
3183
  regex_on_full_url: Optional[bool] = None,
3184
+ poll_interval: Optional[int] = 2,
3185
3185
  idempotency_key: Optional[str] = None,
3186
3186
  **kwargs
3187
3187
  ) -> CrawlResponse:
@@ -3198,7 +3198,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3198
3198
  allow_backward_links (Optional[bool]): Follow parent directory links
3199
3199
  allow_external_links (Optional[bool]): Follow external domain links
3200
3200
  ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
3201
- scrape_options (Optional[CommonOptions]): Page scraping configuration
3201
+ scrape_options (Optional[ScrapeOptions]): Page scraping configuration
3202
3202
  webhook (Optional[Union[str, WebhookConfig]]): Notification webhook settings
3203
3203
  deduplicate_similar_urls (Optional[bool]): Remove similar URLs
3204
3204
  ignore_query_parameters (Optional[bool]): Ignore URL parameters
@@ -3263,9 +3263,9 @@ class AsyncFirecrawlApp(FirecrawlApp):
3263
3263
  headers
3264
3264
  )
3265
3265
 
3266
- if response.status_code == 200:
3266
+ if response.get('success'):
3267
3267
  try:
3268
- return CrawlResponse(**response.json())
3268
+ return CrawlResponse(**response)
3269
3269
  except:
3270
3270
  raise Exception(f'Failed to parse Firecrawl response as JSON.')
3271
3271
  else:
@@ -3304,7 +3304,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3304
3304
  headers
3305
3305
  )
3306
3306
 
3307
- if status_data['status'] == 'completed':
3307
+ if status_data.get('status') == 'completed':
3308
3308
  if 'data' in status_data:
3309
3309
  data = status_data['data']
3310
3310
  while 'next' in status_data:
@@ -3318,26 +3318,24 @@ class AsyncFirecrawlApp(FirecrawlApp):
3318
3318
  data.extend(next_data.get('data', []))
3319
3319
  status_data = next_data
3320
3320
  status_data['data'] = data
3321
-
3322
- response = {
3323
- 'status': status_data.get('status'),
3324
- 'total': status_data.get('total'),
3325
- 'completed': status_data.get('completed'),
3326
- 'creditsUsed': status_data.get('creditsUsed'),
3327
- 'expiresAt': status_data.get('expiresAt'),
3328
- 'data': status_data.get('data')
3329
- }
3321
+ # Create CrawlStatusResponse object from status data
3322
+ response = CrawlStatusResponse(
3323
+ status=status_data.get('status'),
3324
+ total=status_data.get('total'),
3325
+ completed=status_data.get('completed'),
3326
+ creditsUsed=status_data.get('creditsUsed'),
3327
+ expiresAt=status_data.get('expiresAt'),
3328
+ data=status_data.get('data'),
3329
+ success=False if 'error' in status_data else True
3330
+ )
3330
3331
 
3331
3332
  if 'error' in status_data:
3332
- response['error'] = status_data['error']
3333
+ response.error = status_data.get('error')
3333
3334
 
3334
3335
  if 'next' in status_data:
3335
- response['next'] = status_data['next']
3336
+ response.next = status_data.get('next')
3336
3337
 
3337
- return {
3338
- 'success': False if 'error' in status_data else True,
3339
- **response
3340
- }
3338
+ return response
3341
3339
 
3342
3340
  async def _async_monitor_job_status(self, id: str, headers: Dict[str, str], poll_interval: int = 2) -> CrawlStatusResponse:
3343
3341
  """
@@ -3360,7 +3358,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3360
3358
  headers
3361
3359
  )
3362
3360
 
3363
- if status_data['status'] == 'completed':
3361
+ if status_data.get('status') == 'completed':
3364
3362
  if 'data' in status_data:
3365
3363
  data = status_data['data']
3366
3364
  while 'next' in status_data:
@@ -3377,15 +3375,22 @@ class AsyncFirecrawlApp(FirecrawlApp):
3377
3375
  return status_data
3378
3376
  else:
3379
3377
  raise Exception('Job completed but no data was returned')
3380
- elif status_data['status'] in ['active', 'paused', 'pending', 'queued', 'waiting', 'scraping']:
3378
+ elif status_data.get('status') in ['active', 'paused', 'pending', 'queued', 'waiting', 'scraping']:
3381
3379
  await asyncio.sleep(max(poll_interval, 2))
3382
3380
  else:
3383
3381
  raise Exception(f'Job failed or was stopped. Status: {status_data["status"]}')
3384
3382
 
3385
3383
  async def map_url(
3386
- self,
3387
- url: str,
3388
- params: Optional[MapParams] = None) -> MapResponse:
3384
+ self,
3385
+ url: str,
3386
+ *,
3387
+ search: Optional[str] = None,
3388
+ ignore_sitemap: Optional[bool] = None,
3389
+ include_subdomains: Optional[bool] = None,
3390
+ sitemap_only: Optional[bool] = None,
3391
+ limit: Optional[int] = None,
3392
+ timeout: Optional[int] = None,
3393
+ params: Optional[MapParams] = None) -> MapResponse:
3389
3394
  """
3390
3395
  Asynchronously map and discover links from a URL.
3391
3396
 
@@ -3410,21 +3415,40 @@ class AsyncFirecrawlApp(FirecrawlApp):
3410
3415
  Raises:
3411
3416
  Exception: If mapping fails
3412
3417
  """
3413
- headers = self._prepare_headers()
3414
- json_data = {'url': url}
3418
+ map_params = {}
3415
3419
  if params:
3416
- json_data.update(params)
3417
- json_data['origin'] = f"python-sdk@{version}"
3420
+ map_params.update(params.dict(exclude_none=True))
3421
+
3422
+ # Add individual parameters
3423
+ if search is not None:
3424
+ map_params['search'] = search
3425
+ if ignore_sitemap is not None:
3426
+ map_params['ignoreSitemap'] = ignore_sitemap
3427
+ if include_subdomains is not None:
3428
+ map_params['includeSubdomains'] = include_subdomains
3429
+ if sitemap_only is not None:
3430
+ map_params['sitemapOnly'] = sitemap_only
3431
+ if limit is not None:
3432
+ map_params['limit'] = limit
3433
+ if timeout is not None:
3434
+ map_params['timeout'] = timeout
3435
+
3436
+ # Create final params object
3437
+ final_params = MapParams(**map_params)
3438
+ params_dict = final_params.dict(exclude_none=True)
3439
+ params_dict['url'] = url
3440
+ params_dict['origin'] = f"python-sdk@{version}"
3418
3441
 
3442
+ # Make request
3419
3443
  endpoint = f'/v1/map'
3420
3444
  response = await self._async_post_request(
3421
3445
  f'{self.api_url}{endpoint}',
3422
- json_data,
3423
- headers
3446
+ params_dict,
3447
+ headers={"Authorization": f"Bearer {self.api_key}"}
3424
3448
  )
3425
3449
 
3426
3450
  if response.get('success') and 'links' in response:
3427
- return response
3451
+ return MapResponse(**response)
3428
3452
  elif 'error' in response:
3429
3453
  raise Exception(f'Failed to map URL. Error: {response["error"]}')
3430
3454
  else:
@@ -3432,27 +3456,28 @@ class AsyncFirecrawlApp(FirecrawlApp):
3432
3456
 
3433
3457
  async def extract(
3434
3458
  self,
3435
- urls: List[str],
3436
- params: Optional[ExtractParams] = None) -> ExtractResponse[Any]:
3459
+ urls: Optional[List[str]] = None,
3460
+ *,
3461
+ prompt: Optional[str] = None,
3462
+ schema: Optional[Any] = None,
3463
+ system_prompt: Optional[str] = None,
3464
+ allow_external_links: Optional[bool] = False,
3465
+ enable_web_search: Optional[bool] = False,
3466
+ show_sources: Optional[bool] = False,
3467
+ agent: Optional[Dict[str, Any]] = None) -> ExtractResponse[Any]:
3468
+
3437
3469
  """
3438
3470
  Asynchronously extract structured information from URLs.
3439
3471
 
3440
3472
  Args:
3441
- urls (List[str]): URLs to extract from
3442
- params (Optional[ExtractParams]): See ExtractParams model:
3443
- Extraction Config:
3444
- * prompt - Custom extraction prompt
3445
- * schema - JSON schema/Pydantic model
3446
- * systemPrompt - System context
3447
-
3448
- Behavior Options:
3449
- * allowExternalLinks - Follow external links
3450
- * enableWebSearch - Enable web search
3451
- * includeSubdomains - Include subdomains
3452
- * showSources - Include source URLs
3453
-
3454
- Scraping Options:
3455
- * scrapeOptions - Page scraping config
3473
+ urls (Optional[List[str]]): URLs to extract from
3474
+ prompt (Optional[str]): Custom extraction prompt
3475
+ schema (Optional[Any]): JSON schema/Pydantic model
3476
+ system_prompt (Optional[str]): System context
3477
+ allow_external_links (Optional[bool]): Follow external links
3478
+ enable_web_search (Optional[bool]): Enable web search
3479
+ show_sources (Optional[bool]): Include source URLs
3480
+ agent (Optional[Dict[str, Any]]): Agent configuration
3456
3481
 
3457
3482
  Returns:
3458
3483
  ExtractResponse with:
@@ -3465,29 +3490,35 @@ class AsyncFirecrawlApp(FirecrawlApp):
3465
3490
  """
3466
3491
  headers = self._prepare_headers()
3467
3492
 
3468
- if not params or (not params.get('prompt') and not params.get('schema')):
3493
+ if not prompt and not schema:
3469
3494
  raise ValueError("Either prompt or schema is required")
3470
3495
 
3471
- schema = params.get('schema')
3496
+ if not urls and not prompt:
3497
+ raise ValueError("Either urls or prompt is required")
3498
+
3472
3499
  if schema:
3473
3500
  if hasattr(schema, 'model_json_schema'):
3501
+ # Convert Pydantic model to JSON schema
3474
3502
  schema = schema.model_json_schema()
3503
+ # Otherwise assume it's already a JSON schema dict
3475
3504
 
3476
3505
  request_data = {
3477
- 'urls': urls,
3478
- 'allowExternalLinks': params.get('allow_external_links', params.get('allowExternalLinks', False)),
3479
- 'enableWebSearch': params.get('enable_web_search', params.get('enableWebSearch', False)),
3480
- 'showSources': params.get('show_sources', params.get('showSources', False)),
3506
+ 'urls': urls or [],
3507
+ 'allowExternalLinks': allow_external_links,
3508
+ 'enableWebSearch': enable_web_search,
3509
+ 'showSources': show_sources,
3481
3510
  'schema': schema,
3482
- 'origin': f'python-sdk@{version}'
3511
+ 'origin': f'python-sdk@{get_version()}'
3483
3512
  }
3484
3513
 
3485
- if params.get('prompt'):
3486
- request_data['prompt'] = params['prompt']
3487
- if params.get('system_prompt'):
3488
- request_data['systemPrompt'] = params['system_prompt']
3489
- elif params.get('systemPrompt'):
3490
- request_data['systemPrompt'] = params['systemPrompt']
3514
+ # Only add prompt and systemPrompt if they exist
3515
+ if prompt:
3516
+ request_data['prompt'] = prompt
3517
+ if system_prompt:
3518
+ request_data['systemPrompt'] = system_prompt
3519
+
3520
+ if agent:
3521
+ request_data['agent'] = agent
3491
3522
 
3492
3523
  response = await self._async_post_request(
3493
3524
  f'{self.api_url}/v1/extract',
@@ -3507,7 +3538,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3507
3538
  )
3508
3539
 
3509
3540
  if status_data['status'] == 'completed':
3510
- return status_data
3541
+ return ExtractResponse(**status_data)
3511
3542
  elif status_data['status'] in ['failed', 'cancelled']:
3512
3543
  raise Exception(f'Extract job {status_data["status"]}. Error: {status_data["error"]}')
3513
3544
 
@@ -3563,14 +3594,14 @@ class AsyncFirecrawlApp(FirecrawlApp):
3563
3594
  status_data = next_data
3564
3595
  status_data['data'] = data
3565
3596
 
3566
- response = {
3567
- 'status': status_data.get('status'),
3568
- 'total': status_data.get('total'),
3569
- 'completed': status_data.get('completed'),
3570
- 'creditsUsed': status_data.get('creditsUsed'),
3571
- 'expiresAt': status_data.get('expiresAt'),
3572
- 'data': status_data.get('data')
3573
- }
3597
+ response = BatchScrapeStatusResponse(
3598
+ status=status_data.get('status'),
3599
+ total=status_data.get('total'),
3600
+ completed=status_data.get('completed'),
3601
+ creditsUsed=status_data.get('creditsUsed'),
3602
+ expiresAt=status_data.get('expiresAt'),
3603
+ data=status_data.get('data')
3604
+ )
3574
3605
 
3575
3606
  if 'error' in status_data:
3576
3607
  response['error'] = status_data['error']
@@ -3690,8 +3721,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3690
3721
  allow_external_links: Optional[bool] = False,
3691
3722
  enable_web_search: Optional[bool] = False,
3692
3723
  show_sources: Optional[bool] = False,
3693
- agent: Optional[Dict[str, Any]] = None,
3694
- idempotency_key: Optional[str] = None) -> ExtractResponse[Any]:
3724
+ agent: Optional[Dict[str, Any]] = None) -> ExtractResponse[Any]:
3695
3725
  """
3696
3726
  Initiate an asynchronous extraction job without waiting for completion.
3697
3727
 
@@ -3715,7 +3745,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3715
3745
  Raises:
3716
3746
  ValueError: If job initiation fails
3717
3747
  """
3718
- headers = self._prepare_headers(idempotency_key)
3748
+ headers = self._prepare_headers()
3719
3749
 
3720
3750
  if not prompt and not schema:
3721
3751
  raise ValueError("Either prompt or schema is required")
@@ -3727,14 +3757,14 @@ class AsyncFirecrawlApp(FirecrawlApp):
3727
3757
  if hasattr(schema, 'model_json_schema'):
3728
3758
  schema = schema.model_json_schema()
3729
3759
 
3730
- request_data = {
3731
- 'urls': urls or [],
3732
- 'allowExternalLinks': allow_external_links,
3733
- 'enableWebSearch': enable_web_search,
3734
- 'showSources': show_sources,
3735
- 'schema': schema,
3736
- 'origin': f'python-sdk@{version}'
3737
- }
3760
+ request_data = ExtractResponse(
3761
+ urls=urls or [],
3762
+ allowExternalLinks=allow_external_links,
3763
+ enableWebSearch=enable_web_search,
3764
+ showSources=show_sources,
3765
+ schema=schema,
3766
+ origin=f'python-sdk@{version}'
3767
+ )
3738
3768
 
3739
3769
  if prompt:
3740
3770
  request_data['prompt'] = prompt
@@ -3811,7 +3841,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3811
3841
 
3812
3842
  await asyncio.sleep(2)
3813
3843
 
3814
- return {'success': False, 'error': 'LLMs.txt generation job terminated unexpectedly'}
3844
+ return GenerateLLMsTextStatusResponse(success=False, error='LLMs.txt generation job terminated unexpectedly')
3815
3845
 
3816
3846
  async def async_generate_llms_text(
3817
3847
  self,
@@ -3846,6 +3876,12 @@ class AsyncFirecrawlApp(FirecrawlApp):
3846
3876
  if experimental_stream is not None:
3847
3877
  params['__experimental_stream'] = experimental_stream
3848
3878
 
3879
+ params = GenerateLLMsTextParams(
3880
+ maxUrls=max_urls,
3881
+ showFullText=show_full_text,
3882
+ __experimental_stream=experimental_stream
3883
+ )
3884
+
3849
3885
  headers = self._prepare_headers()
3850
3886
  json_data = {'url': url, **params.dict(exclude_none=True)}
3851
3887
  json_data['origin'] = f"python-sdk@{version}"
@@ -3982,7 +4018,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3982
4018
 
3983
4019
  await asyncio.sleep(2)
3984
4020
 
3985
- return {'success': False, 'error': 'Deep research job terminated unexpectedly'}
4021
+ return DeepResearchStatusResponse(success=False, error='Deep research job terminated unexpectedly')
3986
4022
 
3987
4023
  async def async_deep_research(
3988
4024
  self,
@@ -4089,7 +4125,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
4089
4125
  country: Optional[str] = None,
4090
4126
  location: Optional[str] = None,
4091
4127
  timeout: Optional[int] = None,
4092
- scrape_options: Optional[CommonOptions] = None,
4128
+ scrape_options: Optional[ScrapeOptions] = None,
4093
4129
  params: Optional[Union[Dict[str, Any], SearchParams]] = None,
4094
4130
  **kwargs) -> SearchResponse:
4095
4131
  """
@@ -4104,7 +4140,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
4104
4140
  country (Optional[str]): Country code (default: "us")
4105
4141
  location (Optional[str]): Geo-targeting
4106
4142
  timeout (Optional[int]): Request timeout in milliseconds
4107
- scrape_options (Optional[CommonOptions]): Result scraping configuration
4143
+ scrape_options (Optional[ScrapeOptions]): Result scraping configuration
4108
4144
  params (Optional[Union[Dict[str, Any], SearchParams]]): Additional search parameters
4109
4145
  **kwargs: Additional keyword arguments for future compatibility
4110
4146
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 2.0.1
3
+ Version: 2.1.0
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
File without changes
File without changes
File without changes
File without changes
File without changes