firecrawl 2.0.2__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

firecrawl/__init__.py CHANGED
@@ -11,9 +11,9 @@ For more information visit https://github.com/firecrawl/
11
11
  import logging
12
12
  import os
13
13
 
14
- from .firecrawl import FirecrawlApp, JsonConfig # noqa
14
+ from .firecrawl import FirecrawlApp, JsonConfig, ScrapeOptions # noqa
15
15
 
16
- __version__ = "2.0.2"
16
+ __version__ = "2.1.0"
17
17
 
18
18
  # Define the logger for the Firecrawl project
19
19
  logger: logging.Logger = logging.getLogger("firecrawl")
firecrawl/firecrawl.py CHANGED
@@ -97,6 +97,16 @@ class ActionsResult(pydantic.BaseModel):
97
97
  """Result of actions performed during scraping."""
98
98
  screenshots: List[str]
99
99
 
100
+ class ChangeTrackingData(pydantic.BaseModel):
101
+ """
102
+ Data for the change tracking format.
103
+ """
104
+ previousScrapeAt: Optional[str] = None
105
+ changeStatus: str # "new" | "same" | "changed" | "removed"
106
+ visibility: str # "visible" | "hidden"
107
+ diff: Optional[Dict[str, Any]] = None
108
+ json: Optional[Any] = None
109
+
100
110
  class FirecrawlDocument(pydantic.BaseModel, Generic[T]):
101
111
  """Document retrieved or processed by Firecrawl."""
102
112
  url: Optional[str] = None
@@ -111,6 +121,7 @@ class FirecrawlDocument(pydantic.BaseModel, Generic[T]):
111
121
  actions: Optional[ActionsResult] = None
112
122
  title: Optional[str] = None # v1 search only
113
123
  description: Optional[str] = None # v1 search only
124
+ changeTracking: Optional[ChangeTrackingData] = None
114
125
 
115
126
  class LocationConfig(pydantic.BaseModel):
116
127
  """Location configuration for scraping."""
@@ -124,9 +135,9 @@ class WebhookConfig(pydantic.BaseModel):
124
135
  metadata: Optional[Dict[str, str]] = None
125
136
  events: Optional[List[Literal["completed", "failed", "page", "started"]]] = None
126
137
 
127
- class CommonOptions(pydantic.BaseModel):
138
+ class ScrapeOptions(pydantic.BaseModel):
128
139
  """Parameters for scraping operations."""
129
- formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json"]]] = None
140
+ formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json", "changeTracking"]]] = None
130
141
  headers: Optional[Dict[str, str]] = None
131
142
  includeTags: Optional[List[str]] = None
132
143
  excludeTags: Optional[List[str]] = None
@@ -193,7 +204,7 @@ class JsonConfig(pydantic.BaseModel):
193
204
  systemPrompt: Optional[str] = None
194
205
  agent: Optional[ExtractAgent] = None
195
206
 
196
- class ScrapeParams(CommonOptions):
207
+ class ScrapeParams(ScrapeOptions):
197
208
  """Parameters for scraping operations."""
198
209
  extract: Optional[JsonConfig] = None
199
210
  jsonOptions: Optional[JsonConfig] = None
@@ -235,7 +246,7 @@ class CrawlParams(pydantic.BaseModel):
235
246
  allowBackwardLinks: Optional[bool] = None
236
247
  allowExternalLinks: Optional[bool] = None
237
248
  ignoreSitemap: Optional[bool] = None
238
- scrapeOptions: Optional[CommonOptions] = None
249
+ scrapeOptions: Optional[ScrapeOptions] = None
239
250
  webhook: Optional[Union[str, WebhookConfig]] = None
240
251
  deduplicateSimilarURLs: Optional[bool] = None
241
252
  ignoreQueryParameters: Optional[bool] = None
@@ -289,7 +300,7 @@ class ExtractParams(pydantic.BaseModel):
289
300
  includeSubdomains: Optional[bool] = None
290
301
  origin: Optional[str] = None
291
302
  showSources: Optional[bool] = None
292
- scrapeOptions: Optional[CommonOptions] = None
303
+ scrapeOptions: Optional[ScrapeOptions] = None
293
304
 
294
305
  class ExtractResponse(pydantic.BaseModel, Generic[T]):
295
306
  """Response from extract operations."""
@@ -309,7 +320,7 @@ class SearchParams(pydantic.BaseModel):
309
320
  location: Optional[str] = None
310
321
  origin: Optional[str] = "api"
311
322
  timeout: Optional[int] = 60000
312
- scrapeOptions: Optional[CommonOptions] = None
323
+ scrapeOptions: Optional[ScrapeOptions] = None
313
324
 
314
325
  class SearchResponse(pydantic.BaseModel):
315
326
  """Response from search operations."""
@@ -377,16 +388,6 @@ class GenerateLLMsTextStatusResponse(pydantic.BaseModel):
377
388
  status: Literal["processing", "completed", "failed"]
378
389
  error: Optional[str] = None
379
390
  expiresAt: str
380
-
381
- class ChangeTrackingData(pydantic.BaseModel):
382
- """
383
- Data for the change tracking format.
384
- """
385
- previousScrapeAt: Optional[str] = None
386
- changeStatus: str # "new" | "same" | "changed" | "removed"
387
- visibility: str # "visible" | "hidden"
388
- diff: Optional[Dict[str, Any]] = None
389
- json: Optional[Any] = None
390
391
 
391
392
  class SearchResponse(pydantic.BaseModel):
392
393
  """
@@ -442,7 +443,7 @@ class FirecrawlApp:
442
443
  self,
443
444
  url: str,
444
445
  *,
445
- formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json"]]] = None,
446
+ formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json", "changeTracking"]]] = None,
446
447
  include_tags: Optional[List[str]] = None,
447
448
  exclude_tags: Optional[List[str]] = None,
448
449
  only_main_content: Optional[bool] = None,
@@ -568,7 +569,7 @@ class FirecrawlApp:
568
569
  country: Optional[str] = None,
569
570
  location: Optional[str] = None,
570
571
  timeout: Optional[int] = None,
571
- scrape_options: Optional[CommonOptions] = None,
572
+ scrape_options: Optional[ScrapeOptions] = None,
572
573
  params: Optional[Union[Dict[str, Any], SearchParams]] = None,
573
574
  **kwargs) -> SearchResponse:
574
575
  """
@@ -583,7 +584,7 @@ class FirecrawlApp:
583
584
  country (Optional[str]): Country code (default: "us")
584
585
  location (Optional[str]): Geo-targeting
585
586
  timeout (Optional[int]): Request timeout in milliseconds
586
- scrape_options (Optional[CommonOptions]): Result scraping configuration
587
+ scrape_options (Optional[ScrapeOptions]): Result scraping configuration
587
588
  params (Optional[Union[Dict[str, Any], SearchParams]]): Additional search parameters
588
589
  **kwargs: Additional keyword arguments for future compatibility
589
590
 
@@ -664,7 +665,7 @@ class FirecrawlApp:
664
665
  allow_backward_links: Optional[bool] = None,
665
666
  allow_external_links: Optional[bool] = None,
666
667
  ignore_sitemap: Optional[bool] = None,
667
- scrape_options: Optional[CommonOptions] = None,
668
+ scrape_options: Optional[ScrapeOptions] = None,
668
669
  webhook: Optional[Union[str, WebhookConfig]] = None,
669
670
  deduplicate_similar_urls: Optional[bool] = None,
670
671
  ignore_query_parameters: Optional[bool] = None,
@@ -686,7 +687,7 @@ class FirecrawlApp:
686
687
  allow_backward_links (Optional[bool]): Follow parent directory links
687
688
  allow_external_links (Optional[bool]): Follow external domain links
688
689
  ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
689
- scrape_options (Optional[CommonOptions]): Page scraping configuration
690
+ scrape_options (Optional[ScrapeOptions]): Page scraping configuration
690
691
  webhook (Optional[Union[str, WebhookConfig]]): Notification webhook settings
691
692
  deduplicate_similar_urls (Optional[bool]): Remove similar URLs
692
693
  ignore_query_parameters (Optional[bool]): Ignore URL parameters
@@ -768,7 +769,7 @@ class FirecrawlApp:
768
769
  allow_backward_links: Optional[bool] = None,
769
770
  allow_external_links: Optional[bool] = None,
770
771
  ignore_sitemap: Optional[bool] = None,
771
- scrape_options: Optional[CommonOptions] = None,
772
+ scrape_options: Optional[ScrapeOptions] = None,
772
773
  webhook: Optional[Union[str, WebhookConfig]] = None,
773
774
  deduplicate_similar_urls: Optional[bool] = None,
774
775
  ignore_query_parameters: Optional[bool] = None,
@@ -789,7 +790,7 @@ class FirecrawlApp:
789
790
  allow_backward_links (Optional[bool]): Follow parent directory links
790
791
  allow_external_links (Optional[bool]): Follow external domain links
791
792
  ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
792
- scrape_options (Optional[CommonOptions]): Page scraping configuration
793
+ scrape_options (Optional[ScrapeOptions]): Page scraping configuration
793
794
  webhook (Optional[Union[str, WebhookConfig]]): Notification webhook settings
794
795
  deduplicate_similar_urls (Optional[bool]): Remove similar URLs
795
796
  ignore_query_parameters (Optional[bool]): Ignore URL parameters
@@ -1007,7 +1008,7 @@ class FirecrawlApp:
1007
1008
  allow_backward_links: Optional[bool] = None,
1008
1009
  allow_external_links: Optional[bool] = None,
1009
1010
  ignore_sitemap: Optional[bool] = None,
1010
- scrape_options: Optional[CommonOptions] = None,
1011
+ scrape_options: Optional[ScrapeOptions] = None,
1011
1012
  webhook: Optional[Union[str, WebhookConfig]] = None,
1012
1013
  deduplicate_similar_urls: Optional[bool] = None,
1013
1014
  ignore_query_parameters: Optional[bool] = None,
@@ -1028,7 +1029,7 @@ class FirecrawlApp:
1028
1029
  allow_backward_links (Optional[bool]): Follow parent directory links
1029
1030
  allow_external_links (Optional[bool]): Follow external domain links
1030
1031
  ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
1031
- scrape_options (Optional[CommonOptions]): Page scraping configuration
1032
+ scrape_options (Optional[ScrapeOptions]): Page scraping configuration
1032
1033
  webhook (Optional[Union[str, WebhookConfig]]): Notification webhook settings
1033
1034
  deduplicate_similar_urls (Optional[bool]): Remove similar URLs
1034
1035
  ignore_query_parameters (Optional[bool]): Ignore URL parameters
@@ -1741,7 +1742,7 @@ class FirecrawlApp:
1741
1742
 
1742
1743
  def async_extract(
1743
1744
  self,
1744
- urls: List[str],
1745
+ urls: Optional[List[str]] = None,
1745
1746
  *,
1746
1747
  prompt: Optional[str] = None,
1747
1748
  schema: Optional[Any] = None,
@@ -1749,8 +1750,7 @@ class FirecrawlApp:
1749
1750
  allow_external_links: Optional[bool] = False,
1750
1751
  enable_web_search: Optional[bool] = False,
1751
1752
  show_sources: Optional[bool] = False,
1752
- agent: Optional[Dict[str, Any]] = None,
1753
- idempotency_key: Optional[str] = None) -> ExtractResponse[Any]:
1753
+ agent: Optional[Dict[str, Any]] = None) -> ExtractResponse[Any]:
1754
1754
  """
1755
1755
  Initiate an asynchronous extract job.
1756
1756
 
@@ -1774,7 +1774,7 @@ class FirecrawlApp:
1774
1774
  Raises:
1775
1775
  ValueError: If job initiation fails
1776
1776
  """
1777
- headers = self._prepare_headers(idempotency_key)
1777
+ headers = self._prepare_headers()
1778
1778
 
1779
1779
  schema = schema
1780
1780
  if schema:
@@ -2922,9 +2922,9 @@ class AsyncFirecrawlApp(FirecrawlApp):
2922
2922
  headers
2923
2923
  )
2924
2924
 
2925
- if response.status_code == 200:
2925
+ if response.get('success'):
2926
2926
  try:
2927
- id = response.json().get('id')
2927
+ id = response.get('id')
2928
2928
  except:
2929
2929
  raise Exception(f'Failed to parse Firecrawl response as JSON.')
2930
2930
  return self._monitor_job_status(id, headers, poll_interval)
@@ -3050,7 +3050,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3050
3050
  headers
3051
3051
  )
3052
3052
 
3053
- if response.status_code == 200:
3053
+ if response.get('status_code') == 200:
3054
3054
  try:
3055
3055
  return BatchScrapeResponse(**response.json())
3056
3056
  except:
@@ -3059,7 +3059,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3059
3059
  self._handle_error(response, 'start batch scrape job')
3060
3060
 
3061
3061
  async def crawl_url(
3062
- self,
3062
+ self,
3063
3063
  url: str,
3064
3064
  *,
3065
3065
  include_paths: Optional[List[str]] = None,
@@ -3070,7 +3070,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3070
3070
  allow_backward_links: Optional[bool] = None,
3071
3071
  allow_external_links: Optional[bool] = None,
3072
3072
  ignore_sitemap: Optional[bool] = None,
3073
- scrape_options: Optional[CommonOptions] = None,
3073
+ scrape_options: Optional[ScrapeOptions] = None,
3074
3074
  webhook: Optional[Union[str, WebhookConfig]] = None,
3075
3075
  deduplicate_similar_urls: Optional[bool] = None,
3076
3076
  ignore_query_parameters: Optional[bool] = None,
@@ -3092,7 +3092,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3092
3092
  allow_backward_links (Optional[bool]): Follow parent directory links
3093
3093
  allow_external_links (Optional[bool]): Follow external domain links
3094
3094
  ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
3095
- scrape_options (Optional[CommonOptions]): Page scraping configuration
3095
+ scrape_options (Optional[ScrapeOptions]): Page scraping configuration
3096
3096
  webhook (Optional[Union[str, WebhookConfig]]): Notification webhook settings
3097
3097
  deduplicate_similar_urls (Optional[bool]): Remove similar URLs
3098
3098
  ignore_query_parameters (Optional[bool]): Ignore URL parameters
@@ -3148,15 +3148,15 @@ class AsyncFirecrawlApp(FirecrawlApp):
3148
3148
  params_dict = final_params.dict(exclude_none=True)
3149
3149
  params_dict['url'] = url
3150
3150
  params_dict['origin'] = f"python-sdk@{version}"
3151
-
3152
3151
  # Make request
3153
3152
  headers = self._prepare_headers(idempotency_key)
3154
3153
  response = await self._async_post_request(
3155
3154
  f'{self.api_url}/v1/crawl', params_dict, headers)
3156
3155
 
3157
- if response.status_code == 200:
3156
+ print(response)
3157
+ if response.get('success'):
3158
3158
  try:
3159
- id = response.json().get('id')
3159
+ id = response.get('id')
3160
3160
  except:
3161
3161
  raise Exception(f'Failed to parse Firecrawl response as JSON.')
3162
3162
  return self._monitor_job_status(id, headers, poll_interval)
@@ -3176,11 +3176,12 @@ class AsyncFirecrawlApp(FirecrawlApp):
3176
3176
  allow_backward_links: Optional[bool] = None,
3177
3177
  allow_external_links: Optional[bool] = None,
3178
3178
  ignore_sitemap: Optional[bool] = None,
3179
- scrape_options: Optional[CommonOptions] = None,
3179
+ scrape_options: Optional[ScrapeOptions] = None,
3180
3180
  webhook: Optional[Union[str, WebhookConfig]] = None,
3181
3181
  deduplicate_similar_urls: Optional[bool] = None,
3182
3182
  ignore_query_parameters: Optional[bool] = None,
3183
3183
  regex_on_full_url: Optional[bool] = None,
3184
+ poll_interval: Optional[int] = 2,
3184
3185
  idempotency_key: Optional[str] = None,
3185
3186
  **kwargs
3186
3187
  ) -> CrawlResponse:
@@ -3197,7 +3198,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3197
3198
  allow_backward_links (Optional[bool]): Follow parent directory links
3198
3199
  allow_external_links (Optional[bool]): Follow external domain links
3199
3200
  ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
3200
- scrape_options (Optional[CommonOptions]): Page scraping configuration
3201
+ scrape_options (Optional[ScrapeOptions]): Page scraping configuration
3201
3202
  webhook (Optional[Union[str, WebhookConfig]]): Notification webhook settings
3202
3203
  deduplicate_similar_urls (Optional[bool]): Remove similar URLs
3203
3204
  ignore_query_parameters (Optional[bool]): Ignore URL parameters
@@ -3262,9 +3263,9 @@ class AsyncFirecrawlApp(FirecrawlApp):
3262
3263
  headers
3263
3264
  )
3264
3265
 
3265
- if response.status_code == 200:
3266
+ if response.get('success'):
3266
3267
  try:
3267
- return CrawlResponse(**response.json())
3268
+ return CrawlResponse(**response)
3268
3269
  except:
3269
3270
  raise Exception(f'Failed to parse Firecrawl response as JSON.')
3270
3271
  else:
@@ -3303,7 +3304,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3303
3304
  headers
3304
3305
  )
3305
3306
 
3306
- if status_data['status'] == 'completed':
3307
+ if status_data.get('status') == 'completed':
3307
3308
  if 'data' in status_data:
3308
3309
  data = status_data['data']
3309
3310
  while 'next' in status_data:
@@ -3317,26 +3318,24 @@ class AsyncFirecrawlApp(FirecrawlApp):
3317
3318
  data.extend(next_data.get('data', []))
3318
3319
  status_data = next_data
3319
3320
  status_data['data'] = data
3320
-
3321
- response = {
3322
- 'status': status_data.get('status'),
3323
- 'total': status_data.get('total'),
3324
- 'completed': status_data.get('completed'),
3325
- 'creditsUsed': status_data.get('creditsUsed'),
3326
- 'expiresAt': status_data.get('expiresAt'),
3327
- 'data': status_data.get('data')
3328
- }
3321
+ # Create CrawlStatusResponse object from status data
3322
+ response = CrawlStatusResponse(
3323
+ status=status_data.get('status'),
3324
+ total=status_data.get('total'),
3325
+ completed=status_data.get('completed'),
3326
+ creditsUsed=status_data.get('creditsUsed'),
3327
+ expiresAt=status_data.get('expiresAt'),
3328
+ data=status_data.get('data'),
3329
+ success=False if 'error' in status_data else True
3330
+ )
3329
3331
 
3330
3332
  if 'error' in status_data:
3331
- response['error'] = status_data['error']
3333
+ response.error = status_data.get('error')
3332
3334
 
3333
3335
  if 'next' in status_data:
3334
- response['next'] = status_data['next']
3336
+ response.next = status_data.get('next')
3335
3337
 
3336
- return {
3337
- 'success': False if 'error' in status_data else True,
3338
- **response
3339
- }
3338
+ return response
3340
3339
 
3341
3340
  async def _async_monitor_job_status(self, id: str, headers: Dict[str, str], poll_interval: int = 2) -> CrawlStatusResponse:
3342
3341
  """
@@ -3359,7 +3358,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3359
3358
  headers
3360
3359
  )
3361
3360
 
3362
- if status_data['status'] == 'completed':
3361
+ if status_data.get('status') == 'completed':
3363
3362
  if 'data' in status_data:
3364
3363
  data = status_data['data']
3365
3364
  while 'next' in status_data:
@@ -3376,15 +3375,22 @@ class AsyncFirecrawlApp(FirecrawlApp):
3376
3375
  return status_data
3377
3376
  else:
3378
3377
  raise Exception('Job completed but no data was returned')
3379
- elif status_data['status'] in ['active', 'paused', 'pending', 'queued', 'waiting', 'scraping']:
3378
+ elif status_data.get('status') in ['active', 'paused', 'pending', 'queued', 'waiting', 'scraping']:
3380
3379
  await asyncio.sleep(max(poll_interval, 2))
3381
3380
  else:
3382
3381
  raise Exception(f'Job failed or was stopped. Status: {status_data["status"]}')
3383
3382
 
3384
3383
  async def map_url(
3385
- self,
3386
- url: str,
3387
- params: Optional[MapParams] = None) -> MapResponse:
3384
+ self,
3385
+ url: str,
3386
+ *,
3387
+ search: Optional[str] = None,
3388
+ ignore_sitemap: Optional[bool] = None,
3389
+ include_subdomains: Optional[bool] = None,
3390
+ sitemap_only: Optional[bool] = None,
3391
+ limit: Optional[int] = None,
3392
+ timeout: Optional[int] = None,
3393
+ params: Optional[MapParams] = None) -> MapResponse:
3388
3394
  """
3389
3395
  Asynchronously map and discover links from a URL.
3390
3396
 
@@ -3409,21 +3415,40 @@ class AsyncFirecrawlApp(FirecrawlApp):
3409
3415
  Raises:
3410
3416
  Exception: If mapping fails
3411
3417
  """
3412
- headers = self._prepare_headers()
3413
- json_data = {'url': url}
3418
+ map_params = {}
3414
3419
  if params:
3415
- json_data.update(params)
3416
- json_data['origin'] = f"python-sdk@{version}"
3420
+ map_params.update(params.dict(exclude_none=True))
3417
3421
 
3422
+ # Add individual parameters
3423
+ if search is not None:
3424
+ map_params['search'] = search
3425
+ if ignore_sitemap is not None:
3426
+ map_params['ignoreSitemap'] = ignore_sitemap
3427
+ if include_subdomains is not None:
3428
+ map_params['includeSubdomains'] = include_subdomains
3429
+ if sitemap_only is not None:
3430
+ map_params['sitemapOnly'] = sitemap_only
3431
+ if limit is not None:
3432
+ map_params['limit'] = limit
3433
+ if timeout is not None:
3434
+ map_params['timeout'] = timeout
3435
+
3436
+ # Create final params object
3437
+ final_params = MapParams(**map_params)
3438
+ params_dict = final_params.dict(exclude_none=True)
3439
+ params_dict['url'] = url
3440
+ params_dict['origin'] = f"python-sdk@{version}"
3441
+
3442
+ # Make request
3418
3443
  endpoint = f'/v1/map'
3419
3444
  response = await self._async_post_request(
3420
3445
  f'{self.api_url}{endpoint}',
3421
- json_data,
3422
- headers
3446
+ params_dict,
3447
+ headers={"Authorization": f"Bearer {self.api_key}"}
3423
3448
  )
3424
3449
 
3425
3450
  if response.get('success') and 'links' in response:
3426
- return response
3451
+ return MapResponse(**response)
3427
3452
  elif 'error' in response:
3428
3453
  raise Exception(f'Failed to map URL. Error: {response["error"]}')
3429
3454
  else:
@@ -3431,27 +3456,28 @@ class AsyncFirecrawlApp(FirecrawlApp):
3431
3456
 
3432
3457
  async def extract(
3433
3458
  self,
3434
- urls: List[str],
3435
- params: Optional[ExtractParams] = None) -> ExtractResponse[Any]:
3459
+ urls: Optional[List[str]] = None,
3460
+ *,
3461
+ prompt: Optional[str] = None,
3462
+ schema: Optional[Any] = None,
3463
+ system_prompt: Optional[str] = None,
3464
+ allow_external_links: Optional[bool] = False,
3465
+ enable_web_search: Optional[bool] = False,
3466
+ show_sources: Optional[bool] = False,
3467
+ agent: Optional[Dict[str, Any]] = None) -> ExtractResponse[Any]:
3468
+
3436
3469
  """
3437
3470
  Asynchronously extract structured information from URLs.
3438
3471
 
3439
3472
  Args:
3440
- urls (List[str]): URLs to extract from
3441
- params (Optional[ExtractParams]): See ExtractParams model:
3442
- Extraction Config:
3443
- * prompt - Custom extraction prompt
3444
- * schema - JSON schema/Pydantic model
3445
- * systemPrompt - System context
3446
-
3447
- Behavior Options:
3448
- * allowExternalLinks - Follow external links
3449
- * enableWebSearch - Enable web search
3450
- * includeSubdomains - Include subdomains
3451
- * showSources - Include source URLs
3452
-
3453
- Scraping Options:
3454
- * scrapeOptions - Page scraping config
3473
+ urls (Optional[List[str]]): URLs to extract from
3474
+ prompt (Optional[str]): Custom extraction prompt
3475
+ schema (Optional[Any]): JSON schema/Pydantic model
3476
+ system_prompt (Optional[str]): System context
3477
+ allow_external_links (Optional[bool]): Follow external links
3478
+ enable_web_search (Optional[bool]): Enable web search
3479
+ show_sources (Optional[bool]): Include source URLs
3480
+ agent (Optional[Dict[str, Any]]): Agent configuration
3455
3481
 
3456
3482
  Returns:
3457
3483
  ExtractResponse with:
@@ -3464,29 +3490,35 @@ class AsyncFirecrawlApp(FirecrawlApp):
3464
3490
  """
3465
3491
  headers = self._prepare_headers()
3466
3492
 
3467
- if not params or (not params.get('prompt') and not params.get('schema')):
3493
+ if not prompt and not schema:
3468
3494
  raise ValueError("Either prompt or schema is required")
3469
3495
 
3470
- schema = params.get('schema')
3496
+ if not urls and not prompt:
3497
+ raise ValueError("Either urls or prompt is required")
3498
+
3471
3499
  if schema:
3472
3500
  if hasattr(schema, 'model_json_schema'):
3501
+ # Convert Pydantic model to JSON schema
3473
3502
  schema = schema.model_json_schema()
3503
+ # Otherwise assume it's already a JSON schema dict
3474
3504
 
3475
3505
  request_data = {
3476
- 'urls': urls,
3477
- 'allowExternalLinks': params.get('allow_external_links', params.get('allowExternalLinks', False)),
3478
- 'enableWebSearch': params.get('enable_web_search', params.get('enableWebSearch', False)),
3479
- 'showSources': params.get('show_sources', params.get('showSources', False)),
3506
+ 'urls': urls or [],
3507
+ 'allowExternalLinks': allow_external_links,
3508
+ 'enableWebSearch': enable_web_search,
3509
+ 'showSources': show_sources,
3480
3510
  'schema': schema,
3481
- 'origin': f'python-sdk@{version}'
3511
+ 'origin': f'python-sdk@{get_version()}'
3482
3512
  }
3483
3513
 
3484
- if params.get('prompt'):
3485
- request_data['prompt'] = params['prompt']
3486
- if params.get('system_prompt'):
3487
- request_data['systemPrompt'] = params['system_prompt']
3488
- elif params.get('systemPrompt'):
3489
- request_data['systemPrompt'] = params['systemPrompt']
3514
+ # Only add prompt and systemPrompt if they exist
3515
+ if prompt:
3516
+ request_data['prompt'] = prompt
3517
+ if system_prompt:
3518
+ request_data['systemPrompt'] = system_prompt
3519
+
3520
+ if agent:
3521
+ request_data['agent'] = agent
3490
3522
 
3491
3523
  response = await self._async_post_request(
3492
3524
  f'{self.api_url}/v1/extract',
@@ -3506,7 +3538,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3506
3538
  )
3507
3539
 
3508
3540
  if status_data['status'] == 'completed':
3509
- return status_data
3541
+ return ExtractResponse(**status_data)
3510
3542
  elif status_data['status'] in ['failed', 'cancelled']:
3511
3543
  raise Exception(f'Extract job {status_data["status"]}. Error: {status_data["error"]}')
3512
3544
 
@@ -3562,14 +3594,14 @@ class AsyncFirecrawlApp(FirecrawlApp):
3562
3594
  status_data = next_data
3563
3595
  status_data['data'] = data
3564
3596
 
3565
- response = {
3566
- 'status': status_data.get('status'),
3567
- 'total': status_data.get('total'),
3568
- 'completed': status_data.get('completed'),
3569
- 'creditsUsed': status_data.get('creditsUsed'),
3570
- 'expiresAt': status_data.get('expiresAt'),
3571
- 'data': status_data.get('data')
3572
- }
3597
+ response = BatchScrapeStatusResponse(
3598
+ status=status_data.get('status'),
3599
+ total=status_data.get('total'),
3600
+ completed=status_data.get('completed'),
3601
+ creditsUsed=status_data.get('creditsUsed'),
3602
+ expiresAt=status_data.get('expiresAt'),
3603
+ data=status_data.get('data')
3604
+ )
3573
3605
 
3574
3606
  if 'error' in status_data:
3575
3607
  response['error'] = status_data['error']
@@ -3689,8 +3721,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3689
3721
  allow_external_links: Optional[bool] = False,
3690
3722
  enable_web_search: Optional[bool] = False,
3691
3723
  show_sources: Optional[bool] = False,
3692
- agent: Optional[Dict[str, Any]] = None,
3693
- idempotency_key: Optional[str] = None) -> ExtractResponse[Any]:
3724
+ agent: Optional[Dict[str, Any]] = None) -> ExtractResponse[Any]:
3694
3725
  """
3695
3726
  Initiate an asynchronous extraction job without waiting for completion.
3696
3727
 
@@ -3714,7 +3745,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3714
3745
  Raises:
3715
3746
  ValueError: If job initiation fails
3716
3747
  """
3717
- headers = self._prepare_headers(idempotency_key)
3748
+ headers = self._prepare_headers()
3718
3749
 
3719
3750
  if not prompt and not schema:
3720
3751
  raise ValueError("Either prompt or schema is required")
@@ -3726,14 +3757,14 @@ class AsyncFirecrawlApp(FirecrawlApp):
3726
3757
  if hasattr(schema, 'model_json_schema'):
3727
3758
  schema = schema.model_json_schema()
3728
3759
 
3729
- request_data = {
3730
- 'urls': urls or [],
3731
- 'allowExternalLinks': allow_external_links,
3732
- 'enableWebSearch': enable_web_search,
3733
- 'showSources': show_sources,
3734
- 'schema': schema,
3735
- 'origin': f'python-sdk@{version}'
3736
- }
3760
+ request_data = ExtractResponse(
3761
+ urls=urls or [],
3762
+ allowExternalLinks=allow_external_links,
3763
+ enableWebSearch=enable_web_search,
3764
+ showSources=show_sources,
3765
+ schema=schema,
3766
+ origin=f'python-sdk@{version}'
3767
+ )
3737
3768
 
3738
3769
  if prompt:
3739
3770
  request_data['prompt'] = prompt
@@ -3810,7 +3841,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3810
3841
 
3811
3842
  await asyncio.sleep(2)
3812
3843
 
3813
- return {'success': False, 'error': 'LLMs.txt generation job terminated unexpectedly'}
3844
+ return GenerateLLMsTextStatusResponse(success=False, error='LLMs.txt generation job terminated unexpectedly')
3814
3845
 
3815
3846
  async def async_generate_llms_text(
3816
3847
  self,
@@ -3845,6 +3876,12 @@ class AsyncFirecrawlApp(FirecrawlApp):
3845
3876
  if experimental_stream is not None:
3846
3877
  params['__experimental_stream'] = experimental_stream
3847
3878
 
3879
+ params = GenerateLLMsTextParams(
3880
+ maxUrls=max_urls,
3881
+ showFullText=show_full_text,
3882
+ __experimental_stream=experimental_stream
3883
+ )
3884
+
3848
3885
  headers = self._prepare_headers()
3849
3886
  json_data = {'url': url, **params.dict(exclude_none=True)}
3850
3887
  json_data['origin'] = f"python-sdk@{version}"
@@ -3981,7 +4018,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3981
4018
 
3982
4019
  await asyncio.sleep(2)
3983
4020
 
3984
- return {'success': False, 'error': 'Deep research job terminated unexpectedly'}
4021
+ return DeepResearchStatusResponse(success=False, error='Deep research job terminated unexpectedly')
3985
4022
 
3986
4023
  async def async_deep_research(
3987
4024
  self,
@@ -4088,7 +4125,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
4088
4125
  country: Optional[str] = None,
4089
4126
  location: Optional[str] = None,
4090
4127
  timeout: Optional[int] = None,
4091
- scrape_options: Optional[CommonOptions] = None,
4128
+ scrape_options: Optional[ScrapeOptions] = None,
4092
4129
  params: Optional[Union[Dict[str, Any], SearchParams]] = None,
4093
4130
  **kwargs) -> SearchResponse:
4094
4131
  """
@@ -4103,7 +4140,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
4103
4140
  country (Optional[str]): Country code (default: "us")
4104
4141
  location (Optional[str]): Geo-targeting
4105
4142
  timeout (Optional[int]): Request timeout in milliseconds
4106
- scrape_options (Optional[CommonOptions]): Result scraping configuration
4143
+ scrape_options (Optional[ScrapeOptions]): Result scraping configuration
4107
4144
  params (Optional[Union[Dict[str, Any], SearchParams]]): Additional search parameters
4108
4145
  **kwargs: Additional keyword arguments for future compatibility
4109
4146
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 2.0.2
3
+ Version: 2.1.0
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
@@ -1,12 +1,12 @@
1
- firecrawl/__init__.py,sha256=xqJCUbvvJi2ruXSmRS3Q9V7uZ0CpjK8gUFheUx0qXow,2555
2
- firecrawl/firecrawl.py,sha256=k_yRUlYssszubZuZZ0rMOyOJsRb7kDTxo5JZPEc8yJY,176255
1
+ firecrawl/__init__.py,sha256=iHizMdAIoTmkymj1pSBrh7ktCGYU3kZ1kXZgntQPm3g,2570
2
+ firecrawl/firecrawl.py,sha256=O-wyUWL9VnfRhZWgVAnmwpwIe0M3MPz9ek95KfYcHPQ,177750
3
3
  firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  firecrawl/__tests__/e2e_withAuth/test.py,sha256=-Fq2vPcMo0iQi4dwsUkkCd931ybDaTxMBnZbRfGdDcA,7931
5
5
  firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=DcCw-cohtnL-t9XPekUtRoQrgg3UCWu8Ikqudf9ory8,19880
7
7
  tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
8
- firecrawl-2.0.2.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
9
- firecrawl-2.0.2.dist-info/METADATA,sha256=IVvPFtJZ9dMCNAKbI0dT9VpaC2RAZgE0iFUd_Gr_K6c,10583
10
- firecrawl-2.0.2.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
11
- firecrawl-2.0.2.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
12
- firecrawl-2.0.2.dist-info/RECORD,,
8
+ firecrawl-2.1.0.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
9
+ firecrawl-2.1.0.dist-info/METADATA,sha256=l-XNBUPSE1sFvGZ1wBvesKC7fRlEIGI0DTfY7BNPAWI,10583
10
+ firecrawl-2.1.0.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
11
+ firecrawl-2.1.0.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
12
+ firecrawl-2.1.0.dist-info/RECORD,,