firecrawl 2.0.2__py3-none-any.whl → 2.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

firecrawl/__init__.py CHANGED
@@ -11,9 +11,9 @@ For more information visit https://github.com/firecrawl/
11
11
  import logging
12
12
  import os
13
13
 
14
- from .firecrawl import FirecrawlApp, JsonConfig # noqa
14
+ from .firecrawl import FirecrawlApp, JsonConfig, ScrapeOptions # noqa
15
15
 
16
- __version__ = "2.0.2"
16
+ __version__ = "2.1.1"
17
17
 
18
18
  # Define the logger for the Firecrawl project
19
19
  logger: logging.Logger = logging.getLogger("firecrawl")
firecrawl/firecrawl.py CHANGED
@@ -97,6 +97,16 @@ class ActionsResult(pydantic.BaseModel):
97
97
  """Result of actions performed during scraping."""
98
98
  screenshots: List[str]
99
99
 
100
+ class ChangeTrackingData(pydantic.BaseModel):
101
+ """
102
+ Data for the change tracking format.
103
+ """
104
+ previousScrapeAt: Optional[str] = None
105
+ changeStatus: str # "new" | "same" | "changed" | "removed"
106
+ visibility: str # "visible" | "hidden"
107
+ diff: Optional[Dict[str, Any]] = None
108
+ json: Optional[Any] = None
109
+
100
110
  class FirecrawlDocument(pydantic.BaseModel, Generic[T]):
101
111
  """Document retrieved or processed by Firecrawl."""
102
112
  url: Optional[str] = None
@@ -111,6 +121,7 @@ class FirecrawlDocument(pydantic.BaseModel, Generic[T]):
111
121
  actions: Optional[ActionsResult] = None
112
122
  title: Optional[str] = None # v1 search only
113
123
  description: Optional[str] = None # v1 search only
124
+ changeTracking: Optional[ChangeTrackingData] = None
114
125
 
115
126
  class LocationConfig(pydantic.BaseModel):
116
127
  """Location configuration for scraping."""
@@ -124,9 +135,9 @@ class WebhookConfig(pydantic.BaseModel):
124
135
  metadata: Optional[Dict[str, str]] = None
125
136
  events: Optional[List[Literal["completed", "failed", "page", "started"]]] = None
126
137
 
127
- class CommonOptions(pydantic.BaseModel):
138
+ class ScrapeOptions(pydantic.BaseModel):
128
139
  """Parameters for scraping operations."""
129
- formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json"]]] = None
140
+ formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json", "changeTracking"]]] = None
130
141
  headers: Optional[Dict[str, str]] = None
131
142
  includeTags: Optional[List[str]] = None
132
143
  excludeTags: Optional[List[str]] = None
@@ -193,7 +204,7 @@ class JsonConfig(pydantic.BaseModel):
193
204
  systemPrompt: Optional[str] = None
194
205
  agent: Optional[ExtractAgent] = None
195
206
 
196
- class ScrapeParams(CommonOptions):
207
+ class ScrapeParams(ScrapeOptions):
197
208
  """Parameters for scraping operations."""
198
209
  extract: Optional[JsonConfig] = None
199
210
  jsonOptions: Optional[JsonConfig] = None
@@ -235,7 +246,7 @@ class CrawlParams(pydantic.BaseModel):
235
246
  allowBackwardLinks: Optional[bool] = None
236
247
  allowExternalLinks: Optional[bool] = None
237
248
  ignoreSitemap: Optional[bool] = None
238
- scrapeOptions: Optional[CommonOptions] = None
249
+ scrapeOptions: Optional[ScrapeOptions] = None
239
250
  webhook: Optional[Union[str, WebhookConfig]] = None
240
251
  deduplicateSimilarURLs: Optional[bool] = None
241
252
  ignoreQueryParameters: Optional[bool] = None
@@ -289,7 +300,7 @@ class ExtractParams(pydantic.BaseModel):
289
300
  includeSubdomains: Optional[bool] = None
290
301
  origin: Optional[str] = None
291
302
  showSources: Optional[bool] = None
292
- scrapeOptions: Optional[CommonOptions] = None
303
+ scrapeOptions: Optional[ScrapeOptions] = None
293
304
 
294
305
  class ExtractResponse(pydantic.BaseModel, Generic[T]):
295
306
  """Response from extract operations."""
@@ -309,7 +320,7 @@ class SearchParams(pydantic.BaseModel):
309
320
  location: Optional[str] = None
310
321
  origin: Optional[str] = "api"
311
322
  timeout: Optional[int] = 60000
312
- scrapeOptions: Optional[CommonOptions] = None
323
+ scrapeOptions: Optional[ScrapeOptions] = None
313
324
 
314
325
  class SearchResponse(pydantic.BaseModel):
315
326
  """Response from search operations."""
@@ -377,16 +388,6 @@ class GenerateLLMsTextStatusResponse(pydantic.BaseModel):
377
388
  status: Literal["processing", "completed", "failed"]
378
389
  error: Optional[str] = None
379
390
  expiresAt: str
380
-
381
- class ChangeTrackingData(pydantic.BaseModel):
382
- """
383
- Data for the change tracking format.
384
- """
385
- previousScrapeAt: Optional[str] = None
386
- changeStatus: str # "new" | "same" | "changed" | "removed"
387
- visibility: str # "visible" | "hidden"
388
- diff: Optional[Dict[str, Any]] = None
389
- json: Optional[Any] = None
390
391
 
391
392
  class SearchResponse(pydantic.BaseModel):
392
393
  """
@@ -442,7 +443,7 @@ class FirecrawlApp:
442
443
  self,
443
444
  url: str,
444
445
  *,
445
- formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json"]]] = None,
446
+ formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json", "changeTracking"]]] = None,
446
447
  include_tags: Optional[List[str]] = None,
447
448
  exclude_tags: Optional[List[str]] = None,
448
449
  only_main_content: Optional[bool] = None,
@@ -568,7 +569,7 @@ class FirecrawlApp:
568
569
  country: Optional[str] = None,
569
570
  location: Optional[str] = None,
570
571
  timeout: Optional[int] = None,
571
- scrape_options: Optional[CommonOptions] = None,
572
+ scrape_options: Optional[ScrapeOptions] = None,
572
573
  params: Optional[Union[Dict[str, Any], SearchParams]] = None,
573
574
  **kwargs) -> SearchResponse:
574
575
  """
@@ -583,7 +584,7 @@ class FirecrawlApp:
583
584
  country (Optional[str]): Country code (default: "us")
584
585
  location (Optional[str]): Geo-targeting
585
586
  timeout (Optional[int]): Request timeout in milliseconds
586
- scrape_options (Optional[CommonOptions]): Result scraping configuration
587
+ scrape_options (Optional[ScrapeOptions]): Result scraping configuration
587
588
  params (Optional[Union[Dict[str, Any], SearchParams]]): Additional search parameters
588
589
  **kwargs: Additional keyword arguments for future compatibility
589
590
 
@@ -664,7 +665,7 @@ class FirecrawlApp:
664
665
  allow_backward_links: Optional[bool] = None,
665
666
  allow_external_links: Optional[bool] = None,
666
667
  ignore_sitemap: Optional[bool] = None,
667
- scrape_options: Optional[CommonOptions] = None,
668
+ scrape_options: Optional[ScrapeOptions] = None,
668
669
  webhook: Optional[Union[str, WebhookConfig]] = None,
669
670
  deduplicate_similar_urls: Optional[bool] = None,
670
671
  ignore_query_parameters: Optional[bool] = None,
@@ -686,7 +687,7 @@ class FirecrawlApp:
686
687
  allow_backward_links (Optional[bool]): Follow parent directory links
687
688
  allow_external_links (Optional[bool]): Follow external domain links
688
689
  ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
689
- scrape_options (Optional[CommonOptions]): Page scraping configuration
690
+ scrape_options (Optional[ScrapeOptions]): Page scraping configuration
690
691
  webhook (Optional[Union[str, WebhookConfig]]): Notification webhook settings
691
692
  deduplicate_similar_urls (Optional[bool]): Remove similar URLs
692
693
  ignore_query_parameters (Optional[bool]): Ignore URL parameters
@@ -768,7 +769,7 @@ class FirecrawlApp:
768
769
  allow_backward_links: Optional[bool] = None,
769
770
  allow_external_links: Optional[bool] = None,
770
771
  ignore_sitemap: Optional[bool] = None,
771
- scrape_options: Optional[CommonOptions] = None,
772
+ scrape_options: Optional[ScrapeOptions] = None,
772
773
  webhook: Optional[Union[str, WebhookConfig]] = None,
773
774
  deduplicate_similar_urls: Optional[bool] = None,
774
775
  ignore_query_parameters: Optional[bool] = None,
@@ -789,7 +790,7 @@ class FirecrawlApp:
789
790
  allow_backward_links (Optional[bool]): Follow parent directory links
790
791
  allow_external_links (Optional[bool]): Follow external domain links
791
792
  ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
792
- scrape_options (Optional[CommonOptions]): Page scraping configuration
793
+ scrape_options (Optional[ScrapeOptions]): Page scraping configuration
793
794
  webhook (Optional[Union[str, WebhookConfig]]): Notification webhook settings
794
795
  deduplicate_similar_urls (Optional[bool]): Remove similar URLs
795
796
  ignore_query_parameters (Optional[bool]): Ignore URL parameters
@@ -1007,7 +1008,7 @@ class FirecrawlApp:
1007
1008
  allow_backward_links: Optional[bool] = None,
1008
1009
  allow_external_links: Optional[bool] = None,
1009
1010
  ignore_sitemap: Optional[bool] = None,
1010
- scrape_options: Optional[CommonOptions] = None,
1011
+ scrape_options: Optional[ScrapeOptions] = None,
1011
1012
  webhook: Optional[Union[str, WebhookConfig]] = None,
1012
1013
  deduplicate_similar_urls: Optional[bool] = None,
1013
1014
  ignore_query_parameters: Optional[bool] = None,
@@ -1028,7 +1029,7 @@ class FirecrawlApp:
1028
1029
  allow_backward_links (Optional[bool]): Follow parent directory links
1029
1030
  allow_external_links (Optional[bool]): Follow external domain links
1030
1031
  ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
1031
- scrape_options (Optional[CommonOptions]): Page scraping configuration
1032
+ scrape_options (Optional[ScrapeOptions]): Page scraping configuration
1032
1033
  webhook (Optional[Union[str, WebhookConfig]]): Notification webhook settings
1033
1034
  deduplicate_similar_urls (Optional[bool]): Remove similar URLs
1034
1035
  ignore_query_parameters (Optional[bool]): Ignore URL parameters
@@ -1741,7 +1742,7 @@ class FirecrawlApp:
1741
1742
 
1742
1743
  def async_extract(
1743
1744
  self,
1744
- urls: List[str],
1745
+ urls: Optional[List[str]] = None,
1745
1746
  *,
1746
1747
  prompt: Optional[str] = None,
1747
1748
  schema: Optional[Any] = None,
@@ -1749,8 +1750,7 @@ class FirecrawlApp:
1749
1750
  allow_external_links: Optional[bool] = False,
1750
1751
  enable_web_search: Optional[bool] = False,
1751
1752
  show_sources: Optional[bool] = False,
1752
- agent: Optional[Dict[str, Any]] = None,
1753
- idempotency_key: Optional[str] = None) -> ExtractResponse[Any]:
1753
+ agent: Optional[Dict[str, Any]] = None) -> ExtractResponse[Any]:
1754
1754
  """
1755
1755
  Initiate an asynchronous extract job.
1756
1756
 
@@ -1774,7 +1774,7 @@ class FirecrawlApp:
1774
1774
  Raises:
1775
1775
  ValueError: If job initiation fails
1776
1776
  """
1777
- headers = self._prepare_headers(idempotency_key)
1777
+ headers = self._prepare_headers()
1778
1778
 
1779
1779
  schema = schema
1780
1780
  if schema:
@@ -2922,9 +2922,9 @@ class AsyncFirecrawlApp(FirecrawlApp):
2922
2922
  headers
2923
2923
  )
2924
2924
 
2925
- if response.status_code == 200:
2925
+ if response.get('success'):
2926
2926
  try:
2927
- id = response.json().get('id')
2927
+ id = response.get('id')
2928
2928
  except:
2929
2929
  raise Exception(f'Failed to parse Firecrawl response as JSON.')
2930
2930
  return self._monitor_job_status(id, headers, poll_interval)
@@ -3050,7 +3050,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3050
3050
  headers
3051
3051
  )
3052
3052
 
3053
- if response.status_code == 200:
3053
+ if response.get('status_code') == 200:
3054
3054
  try:
3055
3055
  return BatchScrapeResponse(**response.json())
3056
3056
  except:
@@ -3059,7 +3059,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3059
3059
  self._handle_error(response, 'start batch scrape job')
3060
3060
 
3061
3061
  async def crawl_url(
3062
- self,
3062
+ self,
3063
3063
  url: str,
3064
3064
  *,
3065
3065
  include_paths: Optional[List[str]] = None,
@@ -3070,7 +3070,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3070
3070
  allow_backward_links: Optional[bool] = None,
3071
3071
  allow_external_links: Optional[bool] = None,
3072
3072
  ignore_sitemap: Optional[bool] = None,
3073
- scrape_options: Optional[CommonOptions] = None,
3073
+ scrape_options: Optional[ScrapeOptions] = None,
3074
3074
  webhook: Optional[Union[str, WebhookConfig]] = None,
3075
3075
  deduplicate_similar_urls: Optional[bool] = None,
3076
3076
  ignore_query_parameters: Optional[bool] = None,
@@ -3092,7 +3092,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3092
3092
  allow_backward_links (Optional[bool]): Follow parent directory links
3093
3093
  allow_external_links (Optional[bool]): Follow external domain links
3094
3094
  ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
3095
- scrape_options (Optional[CommonOptions]): Page scraping configuration
3095
+ scrape_options (Optional[ScrapeOptions]): Page scraping configuration
3096
3096
  webhook (Optional[Union[str, WebhookConfig]]): Notification webhook settings
3097
3097
  deduplicate_similar_urls (Optional[bool]): Remove similar URLs
3098
3098
  ignore_query_parameters (Optional[bool]): Ignore URL parameters
@@ -3148,15 +3148,14 @@ class AsyncFirecrawlApp(FirecrawlApp):
3148
3148
  params_dict = final_params.dict(exclude_none=True)
3149
3149
  params_dict['url'] = url
3150
3150
  params_dict['origin'] = f"python-sdk@{version}"
3151
-
3152
3151
  # Make request
3153
3152
  headers = self._prepare_headers(idempotency_key)
3154
3153
  response = await self._async_post_request(
3155
3154
  f'{self.api_url}/v1/crawl', params_dict, headers)
3156
3155
 
3157
- if response.status_code == 200:
3156
+ if response.get('success'):
3158
3157
  try:
3159
- id = response.json().get('id')
3158
+ id = response.get('id')
3160
3159
  except:
3161
3160
  raise Exception(f'Failed to parse Firecrawl response as JSON.')
3162
3161
  return self._monitor_job_status(id, headers, poll_interval)
@@ -3176,11 +3175,12 @@ class AsyncFirecrawlApp(FirecrawlApp):
3176
3175
  allow_backward_links: Optional[bool] = None,
3177
3176
  allow_external_links: Optional[bool] = None,
3178
3177
  ignore_sitemap: Optional[bool] = None,
3179
- scrape_options: Optional[CommonOptions] = None,
3178
+ scrape_options: Optional[ScrapeOptions] = None,
3180
3179
  webhook: Optional[Union[str, WebhookConfig]] = None,
3181
3180
  deduplicate_similar_urls: Optional[bool] = None,
3182
3181
  ignore_query_parameters: Optional[bool] = None,
3183
3182
  regex_on_full_url: Optional[bool] = None,
3183
+ poll_interval: Optional[int] = 2,
3184
3184
  idempotency_key: Optional[str] = None,
3185
3185
  **kwargs
3186
3186
  ) -> CrawlResponse:
@@ -3197,7 +3197,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3197
3197
  allow_backward_links (Optional[bool]): Follow parent directory links
3198
3198
  allow_external_links (Optional[bool]): Follow external domain links
3199
3199
  ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
3200
- scrape_options (Optional[CommonOptions]): Page scraping configuration
3200
+ scrape_options (Optional[ScrapeOptions]): Page scraping configuration
3201
3201
  webhook (Optional[Union[str, WebhookConfig]]): Notification webhook settings
3202
3202
  deduplicate_similar_urls (Optional[bool]): Remove similar URLs
3203
3203
  ignore_query_parameters (Optional[bool]): Ignore URL parameters
@@ -3262,9 +3262,9 @@ class AsyncFirecrawlApp(FirecrawlApp):
3262
3262
  headers
3263
3263
  )
3264
3264
 
3265
- if response.status_code == 200:
3265
+ if response.get('success'):
3266
3266
  try:
3267
- return CrawlResponse(**response.json())
3267
+ return CrawlResponse(**response)
3268
3268
  except:
3269
3269
  raise Exception(f'Failed to parse Firecrawl response as JSON.')
3270
3270
  else:
@@ -3303,7 +3303,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3303
3303
  headers
3304
3304
  )
3305
3305
 
3306
- if status_data['status'] == 'completed':
3306
+ if status_data.get('status') == 'completed':
3307
3307
  if 'data' in status_data:
3308
3308
  data = status_data['data']
3309
3309
  while 'next' in status_data:
@@ -3317,26 +3317,24 @@ class AsyncFirecrawlApp(FirecrawlApp):
3317
3317
  data.extend(next_data.get('data', []))
3318
3318
  status_data = next_data
3319
3319
  status_data['data'] = data
3320
-
3321
- response = {
3322
- 'status': status_data.get('status'),
3323
- 'total': status_data.get('total'),
3324
- 'completed': status_data.get('completed'),
3325
- 'creditsUsed': status_data.get('creditsUsed'),
3326
- 'expiresAt': status_data.get('expiresAt'),
3327
- 'data': status_data.get('data')
3328
- }
3320
+ # Create CrawlStatusResponse object from status data
3321
+ response = CrawlStatusResponse(
3322
+ status=status_data.get('status'),
3323
+ total=status_data.get('total'),
3324
+ completed=status_data.get('completed'),
3325
+ creditsUsed=status_data.get('creditsUsed'),
3326
+ expiresAt=status_data.get('expiresAt'),
3327
+ data=status_data.get('data'),
3328
+ success=False if 'error' in status_data else True
3329
+ )
3329
3330
 
3330
3331
  if 'error' in status_data:
3331
- response['error'] = status_data['error']
3332
+ response.error = status_data.get('error')
3332
3333
 
3333
3334
  if 'next' in status_data:
3334
- response['next'] = status_data['next']
3335
+ response.next = status_data.get('next')
3335
3336
 
3336
- return {
3337
- 'success': False if 'error' in status_data else True,
3338
- **response
3339
- }
3337
+ return response
3340
3338
 
3341
3339
  async def _async_monitor_job_status(self, id: str, headers: Dict[str, str], poll_interval: int = 2) -> CrawlStatusResponse:
3342
3340
  """
@@ -3359,7 +3357,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3359
3357
  headers
3360
3358
  )
3361
3359
 
3362
- if status_data['status'] == 'completed':
3360
+ if status_data.get('status') == 'completed':
3363
3361
  if 'data' in status_data:
3364
3362
  data = status_data['data']
3365
3363
  while 'next' in status_data:
@@ -3376,15 +3374,22 @@ class AsyncFirecrawlApp(FirecrawlApp):
3376
3374
  return status_data
3377
3375
  else:
3378
3376
  raise Exception('Job completed but no data was returned')
3379
- elif status_data['status'] in ['active', 'paused', 'pending', 'queued', 'waiting', 'scraping']:
3377
+ elif status_data.get('status') in ['active', 'paused', 'pending', 'queued', 'waiting', 'scraping']:
3380
3378
  await asyncio.sleep(max(poll_interval, 2))
3381
3379
  else:
3382
3380
  raise Exception(f'Job failed or was stopped. Status: {status_data["status"]}')
3383
3381
 
3384
3382
  async def map_url(
3385
- self,
3386
- url: str,
3387
- params: Optional[MapParams] = None) -> MapResponse:
3383
+ self,
3384
+ url: str,
3385
+ *,
3386
+ search: Optional[str] = None,
3387
+ ignore_sitemap: Optional[bool] = None,
3388
+ include_subdomains: Optional[bool] = None,
3389
+ sitemap_only: Optional[bool] = None,
3390
+ limit: Optional[int] = None,
3391
+ timeout: Optional[int] = None,
3392
+ params: Optional[MapParams] = None) -> MapResponse:
3388
3393
  """
3389
3394
  Asynchronously map and discover links from a URL.
3390
3395
 
@@ -3409,21 +3414,40 @@ class AsyncFirecrawlApp(FirecrawlApp):
3409
3414
  Raises:
3410
3415
  Exception: If mapping fails
3411
3416
  """
3412
- headers = self._prepare_headers()
3413
- json_data = {'url': url}
3417
+ map_params = {}
3414
3418
  if params:
3415
- json_data.update(params)
3416
- json_data['origin'] = f"python-sdk@{version}"
3419
+ map_params.update(params.dict(exclude_none=True))
3417
3420
 
3421
+ # Add individual parameters
3422
+ if search is not None:
3423
+ map_params['search'] = search
3424
+ if ignore_sitemap is not None:
3425
+ map_params['ignoreSitemap'] = ignore_sitemap
3426
+ if include_subdomains is not None:
3427
+ map_params['includeSubdomains'] = include_subdomains
3428
+ if sitemap_only is not None:
3429
+ map_params['sitemapOnly'] = sitemap_only
3430
+ if limit is not None:
3431
+ map_params['limit'] = limit
3432
+ if timeout is not None:
3433
+ map_params['timeout'] = timeout
3434
+
3435
+ # Create final params object
3436
+ final_params = MapParams(**map_params)
3437
+ params_dict = final_params.dict(exclude_none=True)
3438
+ params_dict['url'] = url
3439
+ params_dict['origin'] = f"python-sdk@{version}"
3440
+
3441
+ # Make request
3418
3442
  endpoint = f'/v1/map'
3419
3443
  response = await self._async_post_request(
3420
3444
  f'{self.api_url}{endpoint}',
3421
- json_data,
3422
- headers
3445
+ params_dict,
3446
+ headers={"Authorization": f"Bearer {self.api_key}"}
3423
3447
  )
3424
3448
 
3425
3449
  if response.get('success') and 'links' in response:
3426
- return response
3450
+ return MapResponse(**response)
3427
3451
  elif 'error' in response:
3428
3452
  raise Exception(f'Failed to map URL. Error: {response["error"]}')
3429
3453
  else:
@@ -3431,27 +3455,28 @@ class AsyncFirecrawlApp(FirecrawlApp):
3431
3455
 
3432
3456
  async def extract(
3433
3457
  self,
3434
- urls: List[str],
3435
- params: Optional[ExtractParams] = None) -> ExtractResponse[Any]:
3458
+ urls: Optional[List[str]] = None,
3459
+ *,
3460
+ prompt: Optional[str] = None,
3461
+ schema: Optional[Any] = None,
3462
+ system_prompt: Optional[str] = None,
3463
+ allow_external_links: Optional[bool] = False,
3464
+ enable_web_search: Optional[bool] = False,
3465
+ show_sources: Optional[bool] = False,
3466
+ agent: Optional[Dict[str, Any]] = None) -> ExtractResponse[Any]:
3467
+
3436
3468
  """
3437
3469
  Asynchronously extract structured information from URLs.
3438
3470
 
3439
3471
  Args:
3440
- urls (List[str]): URLs to extract from
3441
- params (Optional[ExtractParams]): See ExtractParams model:
3442
- Extraction Config:
3443
- * prompt - Custom extraction prompt
3444
- * schema - JSON schema/Pydantic model
3445
- * systemPrompt - System context
3446
-
3447
- Behavior Options:
3448
- * allowExternalLinks - Follow external links
3449
- * enableWebSearch - Enable web search
3450
- * includeSubdomains - Include subdomains
3451
- * showSources - Include source URLs
3452
-
3453
- Scraping Options:
3454
- * scrapeOptions - Page scraping config
3472
+ urls (Optional[List[str]]): URLs to extract from
3473
+ prompt (Optional[str]): Custom extraction prompt
3474
+ schema (Optional[Any]): JSON schema/Pydantic model
3475
+ system_prompt (Optional[str]): System context
3476
+ allow_external_links (Optional[bool]): Follow external links
3477
+ enable_web_search (Optional[bool]): Enable web search
3478
+ show_sources (Optional[bool]): Include source URLs
3479
+ agent (Optional[Dict[str, Any]]): Agent configuration
3455
3480
 
3456
3481
  Returns:
3457
3482
  ExtractResponse with:
@@ -3464,29 +3489,35 @@ class AsyncFirecrawlApp(FirecrawlApp):
3464
3489
  """
3465
3490
  headers = self._prepare_headers()
3466
3491
 
3467
- if not params or (not params.get('prompt') and not params.get('schema')):
3492
+ if not prompt and not schema:
3468
3493
  raise ValueError("Either prompt or schema is required")
3469
3494
 
3470
- schema = params.get('schema')
3495
+ if not urls and not prompt:
3496
+ raise ValueError("Either urls or prompt is required")
3497
+
3471
3498
  if schema:
3472
3499
  if hasattr(schema, 'model_json_schema'):
3500
+ # Convert Pydantic model to JSON schema
3473
3501
  schema = schema.model_json_schema()
3502
+ # Otherwise assume it's already a JSON schema dict
3474
3503
 
3475
3504
  request_data = {
3476
- 'urls': urls,
3477
- 'allowExternalLinks': params.get('allow_external_links', params.get('allowExternalLinks', False)),
3478
- 'enableWebSearch': params.get('enable_web_search', params.get('enableWebSearch', False)),
3479
- 'showSources': params.get('show_sources', params.get('showSources', False)),
3505
+ 'urls': urls or [],
3506
+ 'allowExternalLinks': allow_external_links,
3507
+ 'enableWebSearch': enable_web_search,
3508
+ 'showSources': show_sources,
3480
3509
  'schema': schema,
3481
- 'origin': f'python-sdk@{version}'
3510
+ 'origin': f'python-sdk@{get_version()}'
3482
3511
  }
3483
3512
 
3484
- if params.get('prompt'):
3485
- request_data['prompt'] = params['prompt']
3486
- if params.get('system_prompt'):
3487
- request_data['systemPrompt'] = params['system_prompt']
3488
- elif params.get('systemPrompt'):
3489
- request_data['systemPrompt'] = params['systemPrompt']
3513
+ # Only add prompt and systemPrompt if they exist
3514
+ if prompt:
3515
+ request_data['prompt'] = prompt
3516
+ if system_prompt:
3517
+ request_data['systemPrompt'] = system_prompt
3518
+
3519
+ if agent:
3520
+ request_data['agent'] = agent
3490
3521
 
3491
3522
  response = await self._async_post_request(
3492
3523
  f'{self.api_url}/v1/extract',
@@ -3506,7 +3537,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3506
3537
  )
3507
3538
 
3508
3539
  if status_data['status'] == 'completed':
3509
- return status_data
3540
+ return ExtractResponse(**status_data)
3510
3541
  elif status_data['status'] in ['failed', 'cancelled']:
3511
3542
  raise Exception(f'Extract job {status_data["status"]}. Error: {status_data["error"]}')
3512
3543
 
@@ -3562,14 +3593,14 @@ class AsyncFirecrawlApp(FirecrawlApp):
3562
3593
  status_data = next_data
3563
3594
  status_data['data'] = data
3564
3595
 
3565
- response = {
3566
- 'status': status_data.get('status'),
3567
- 'total': status_data.get('total'),
3568
- 'completed': status_data.get('completed'),
3569
- 'creditsUsed': status_data.get('creditsUsed'),
3570
- 'expiresAt': status_data.get('expiresAt'),
3571
- 'data': status_data.get('data')
3572
- }
3596
+ response = BatchScrapeStatusResponse(
3597
+ status=status_data.get('status'),
3598
+ total=status_data.get('total'),
3599
+ completed=status_data.get('completed'),
3600
+ creditsUsed=status_data.get('creditsUsed'),
3601
+ expiresAt=status_data.get('expiresAt'),
3602
+ data=status_data.get('data')
3603
+ )
3573
3604
 
3574
3605
  if 'error' in status_data:
3575
3606
  response['error'] = status_data['error']
@@ -3689,8 +3720,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3689
3720
  allow_external_links: Optional[bool] = False,
3690
3721
  enable_web_search: Optional[bool] = False,
3691
3722
  show_sources: Optional[bool] = False,
3692
- agent: Optional[Dict[str, Any]] = None,
3693
- idempotency_key: Optional[str] = None) -> ExtractResponse[Any]:
3723
+ agent: Optional[Dict[str, Any]] = None) -> ExtractResponse[Any]:
3694
3724
  """
3695
3725
  Initiate an asynchronous extraction job without waiting for completion.
3696
3726
 
@@ -3714,7 +3744,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3714
3744
  Raises:
3715
3745
  ValueError: If job initiation fails
3716
3746
  """
3717
- headers = self._prepare_headers(idempotency_key)
3747
+ headers = self._prepare_headers()
3718
3748
 
3719
3749
  if not prompt and not schema:
3720
3750
  raise ValueError("Either prompt or schema is required")
@@ -3726,14 +3756,14 @@ class AsyncFirecrawlApp(FirecrawlApp):
3726
3756
  if hasattr(schema, 'model_json_schema'):
3727
3757
  schema = schema.model_json_schema()
3728
3758
 
3729
- request_data = {
3730
- 'urls': urls or [],
3731
- 'allowExternalLinks': allow_external_links,
3732
- 'enableWebSearch': enable_web_search,
3733
- 'showSources': show_sources,
3734
- 'schema': schema,
3735
- 'origin': f'python-sdk@{version}'
3736
- }
3759
+ request_data = ExtractResponse(
3760
+ urls=urls or [],
3761
+ allowExternalLinks=allow_external_links,
3762
+ enableWebSearch=enable_web_search,
3763
+ showSources=show_sources,
3764
+ schema=schema,
3765
+ origin=f'python-sdk@{version}'
3766
+ )
3737
3767
 
3738
3768
  if prompt:
3739
3769
  request_data['prompt'] = prompt
@@ -3810,7 +3840,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3810
3840
 
3811
3841
  await asyncio.sleep(2)
3812
3842
 
3813
- return {'success': False, 'error': 'LLMs.txt generation job terminated unexpectedly'}
3843
+ return GenerateLLMsTextStatusResponse(success=False, error='LLMs.txt generation job terminated unexpectedly')
3814
3844
 
3815
3845
  async def async_generate_llms_text(
3816
3846
  self,
@@ -3845,6 +3875,12 @@ class AsyncFirecrawlApp(FirecrawlApp):
3845
3875
  if experimental_stream is not None:
3846
3876
  params['__experimental_stream'] = experimental_stream
3847
3877
 
3878
+ params = GenerateLLMsTextParams(
3879
+ maxUrls=max_urls,
3880
+ showFullText=show_full_text,
3881
+ __experimental_stream=experimental_stream
3882
+ )
3883
+
3848
3884
  headers = self._prepare_headers()
3849
3885
  json_data = {'url': url, **params.dict(exclude_none=True)}
3850
3886
  json_data['origin'] = f"python-sdk@{version}"
@@ -3981,7 +4017,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3981
4017
 
3982
4018
  await asyncio.sleep(2)
3983
4019
 
3984
- return {'success': False, 'error': 'Deep research job terminated unexpectedly'}
4020
+ return DeepResearchStatusResponse(success=False, error='Deep research job terminated unexpectedly')
3985
4021
 
3986
4022
  async def async_deep_research(
3987
4023
  self,
@@ -4088,7 +4124,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
4088
4124
  country: Optional[str] = None,
4089
4125
  location: Optional[str] = None,
4090
4126
  timeout: Optional[int] = None,
4091
- scrape_options: Optional[CommonOptions] = None,
4127
+ scrape_options: Optional[ScrapeOptions] = None,
4092
4128
  params: Optional[Union[Dict[str, Any], SearchParams]] = None,
4093
4129
  **kwargs) -> SearchResponse:
4094
4130
  """
@@ -4103,7 +4139,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
4103
4139
  country (Optional[str]): Country code (default: "us")
4104
4140
  location (Optional[str]): Geo-targeting
4105
4141
  timeout (Optional[int]): Request timeout in milliseconds
4106
- scrape_options (Optional[CommonOptions]): Result scraping configuration
4142
+ scrape_options (Optional[ScrapeOptions]): Result scraping configuration
4107
4143
  params (Optional[Union[Dict[str, Any], SearchParams]]): Additional search parameters
4108
4144
  **kwargs: Additional keyword arguments for future compatibility
4109
4145
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 2.0.2
3
+ Version: 2.1.1
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
@@ -1,12 +1,12 @@
1
- firecrawl/__init__.py,sha256=xqJCUbvvJi2ruXSmRS3Q9V7uZ0CpjK8gUFheUx0qXow,2555
2
- firecrawl/firecrawl.py,sha256=k_yRUlYssszubZuZZ0rMOyOJsRb7kDTxo5JZPEc8yJY,176255
1
+ firecrawl/__init__.py,sha256=NU9Qcom12t48ym3ovFMpCYI4-uH-Ac1jnddqSUzxEIE,2570
2
+ firecrawl/firecrawl.py,sha256=bXjJKt2UAdszpoCspBOPen_2lz5ysmVWP5vDMZUbyUo,177726
3
3
  firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  firecrawl/__tests__/e2e_withAuth/test.py,sha256=-Fq2vPcMo0iQi4dwsUkkCd931ybDaTxMBnZbRfGdDcA,7931
5
5
  firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=DcCw-cohtnL-t9XPekUtRoQrgg3UCWu8Ikqudf9ory8,19880
7
7
  tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
8
- firecrawl-2.0.2.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
9
- firecrawl-2.0.2.dist-info/METADATA,sha256=IVvPFtJZ9dMCNAKbI0dT9VpaC2RAZgE0iFUd_Gr_K6c,10583
10
- firecrawl-2.0.2.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
11
- firecrawl-2.0.2.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
12
- firecrawl-2.0.2.dist-info/RECORD,,
8
+ firecrawl-2.1.1.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
9
+ firecrawl-2.1.1.dist-info/METADATA,sha256=5_5qIPtR-xSv8jAkZLqBP1i-xefxucWl3rZo2OfPsLo,10583
10
+ firecrawl-2.1.1.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
11
+ firecrawl-2.1.1.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
12
+ firecrawl-2.1.1.dist-info/RECORD,,