firecrawl 1.5.0__py3-none-any.whl → 1.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

firecrawl/__init__.py CHANGED
@@ -13,7 +13,7 @@ import os
13
13
 
14
14
  from .firecrawl import FirecrawlApp # noqa
15
15
 
16
- __version__ = "1.5.0"
16
+ __version__ = "1.6.1"
17
17
 
18
18
  # Define the logger for the Firecrawl project
19
19
  logger: logging.Logger = logging.getLogger("firecrawl")
firecrawl/firecrawl.py CHANGED
@@ -12,15 +12,33 @@ Classes:
12
12
  import logging
13
13
  import os
14
14
  import time
15
- from typing import Any, Dict, Optional, List
15
+ from typing import Any, Dict, Optional, List, Union
16
16
  import json
17
17
 
18
18
  import requests
19
+ import pydantic
19
20
  import websockets
20
21
 
21
22
  logger : logging.Logger = logging.getLogger("firecrawl")
22
23
 
23
24
  class FirecrawlApp:
25
+ class ExtractParams(pydantic.BaseModel):
26
+ """
27
+ Parameters for the extract operation.
28
+ """
29
+ prompt: str
30
+ schema: Optional[Any] = None
31
+ system_prompt: Optional[str] = None
32
+ allow_external_links: Optional[bool] = False
33
+
34
+ class ExtractResponse(pydantic.BaseModel):
35
+ """
36
+ Response from the extract operation.
37
+ """
38
+ success: bool
39
+ data: Optional[Any] = None
40
+ error: Optional[str] = None
41
+
24
42
  def __init__(self, api_key: Optional[str] = None, api_url: Optional[str] = None) -> None:
25
43
  """
26
44
  Initialize the FirecrawlApp instance with API key, API URL.
@@ -434,6 +452,52 @@ class FirecrawlApp:
434
452
  else:
435
453
  self._handle_error(response, 'check batch scrape status')
436
454
 
455
+
456
+ def extract(self, urls: List[str], params: Optional[ExtractParams] = None) -> Any:
457
+ """
458
+ Extracts information from a URL using the Firecrawl API.
459
+
460
+ Args:
461
+ urls (List[str]): The URLs to extract information from.
462
+ params (Optional[ExtractParams]): Additional parameters for the extract request.
463
+
464
+ Returns:
465
+ Union[ExtractResponse, ErrorResponse]: The response from the extract operation.
466
+ """
467
+ headers = self._prepare_headers()
468
+
469
+ if not params or not params.get('prompt'):
470
+ raise ValueError("Prompt is required")
471
+
472
+ if not params.get('schema'):
473
+ raise ValueError("Schema is required for extraction")
474
+
475
+ jsonData = {'urls': urls, **params}
476
+ jsonSchema = params['schema'].schema() if hasattr(params['schema'], 'schema') else None
477
+
478
+ try:
479
+ response = self._post_request(
480
+ f'{self.api_url}/v1/extract',
481
+ {
482
+ **jsonData,
483
+ 'allowExternalLinks': params.get('allow_external_links', False),
484
+ 'schema': jsonSchema
485
+ },
486
+ headers
487
+ )
488
+ if response.status_code == 200:
489
+ data = response.json()
490
+ if data['success']:
491
+ return data
492
+ else:
493
+ raise Exception(f'Failed to extract. Error: {data["error"]}')
494
+ else:
495
+ self._handle_error(response, "extract")
496
+ except Exception as e:
497
+ raise ValueError(str(e), 500)
498
+
499
+ return {'success': False, 'error': "Internal server error."}
500
+
437
501
  def _prepare_headers(self, idempotency_key: Optional[str] = None) -> Dict[str, str]:
438
502
  """
439
503
  Prepare the headers for API requests.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 1.5.0
3
+ Version: 1.6.1
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
@@ -0,0 +1,11 @@
1
+ firecrawl/__init__.py,sha256=TYVUAqBdGLanvONLqDTalnRvVgT6bsdusGx-tN-KCEM,2543
2
+ firecrawl/firecrawl.py,sha256=szpOBWjFhNGtQ65xda__VWTRiF_77kg37NRz-XU_rp8,30210
3
+ firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ firecrawl/__tests__/e2e_withAuth/test.py,sha256=L-umFR3WyrJso1EwqkxjbTMr5AEI4t5zDfhQcCzitOI,7911
5
+ firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=KQMmGAtJAIafja6EGtJ-W9162w2Hm6PNjqKl3_RQXLA,16456
7
+ firecrawl-1.6.1.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
8
+ firecrawl-1.6.1.dist-info/METADATA,sha256=2kVO2LrhCtoxoQ9K9oTdX_AWf9ForvPtyAZ7iguxghg,10596
9
+ firecrawl-1.6.1.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
10
+ firecrawl-1.6.1.dist-info/top_level.txt,sha256=jTvz79zWhiyAezfmmHe4FQ-hR60C59UU5FrjMjijLu8,10
11
+ firecrawl-1.6.1.dist-info/RECORD,,
@@ -1,11 +0,0 @@
1
- firecrawl/__init__.py,sha256=6UEPRP09ZHMJ99KG4N8qiTyWW7RNaGzY18bYkrXRAqw,2543
2
- firecrawl/firecrawl.py,sha256=09QENx-ME8455WiRpKV53-2cFh7T4MX0fX5vRP7It0M,28045
3
- firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- firecrawl/__tests__/e2e_withAuth/test.py,sha256=L-umFR3WyrJso1EwqkxjbTMr5AEI4t5zDfhQcCzitOI,7911
5
- firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=KQMmGAtJAIafja6EGtJ-W9162w2Hm6PNjqKl3_RQXLA,16456
7
- firecrawl-1.5.0.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
8
- firecrawl-1.5.0.dist-info/METADATA,sha256=Wwo7Do4R_42Fbkq0jhOMxhgCTC4iZxRJ1aILD2ijFzs,10596
9
- firecrawl-1.5.0.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
10
- firecrawl-1.5.0.dist-info/top_level.txt,sha256=jTvz79zWhiyAezfmmHe4FQ-hR60C59UU5FrjMjijLu8,10
11
- firecrawl-1.5.0.dist-info/RECORD,,