firecrawl 1.5.0__py3-none-any.whl → 1.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of firecrawl might be problematic. Click here for more details.
- firecrawl/__init__.py +1 -1
- firecrawl/firecrawl.py +65 -1
- {firecrawl-1.5.0.dist-info → firecrawl-1.6.1.dist-info}/METADATA +1 -1
- firecrawl-1.6.1.dist-info/RECORD +11 -0
- firecrawl-1.5.0.dist-info/RECORD +0 -11
- {firecrawl-1.5.0.dist-info → firecrawl-1.6.1.dist-info}/LICENSE +0 -0
- {firecrawl-1.5.0.dist-info → firecrawl-1.6.1.dist-info}/WHEEL +0 -0
- {firecrawl-1.5.0.dist-info → firecrawl-1.6.1.dist-info}/top_level.txt +0 -0
firecrawl/__init__.py
CHANGED
firecrawl/firecrawl.py
CHANGED
|
@@ -12,15 +12,33 @@ Classes:
|
|
|
12
12
|
import logging
|
|
13
13
|
import os
|
|
14
14
|
import time
|
|
15
|
-
from typing import Any, Dict, Optional, List
|
|
15
|
+
from typing import Any, Dict, Optional, List, Union
|
|
16
16
|
import json
|
|
17
17
|
|
|
18
18
|
import requests
|
|
19
|
+
import pydantic
|
|
19
20
|
import websockets
|
|
20
21
|
|
|
21
22
|
logger : logging.Logger = logging.getLogger("firecrawl")
|
|
22
23
|
|
|
23
24
|
class FirecrawlApp:
|
|
25
|
+
class ExtractParams(pydantic.BaseModel):
|
|
26
|
+
"""
|
|
27
|
+
Parameters for the extract operation.
|
|
28
|
+
"""
|
|
29
|
+
prompt: str
|
|
30
|
+
schema: Optional[Any] = None
|
|
31
|
+
system_prompt: Optional[str] = None
|
|
32
|
+
allow_external_links: Optional[bool] = False
|
|
33
|
+
|
|
34
|
+
class ExtractResponse(pydantic.BaseModel):
|
|
35
|
+
"""
|
|
36
|
+
Response from the extract operation.
|
|
37
|
+
"""
|
|
38
|
+
success: bool
|
|
39
|
+
data: Optional[Any] = None
|
|
40
|
+
error: Optional[str] = None
|
|
41
|
+
|
|
24
42
|
def __init__(self, api_key: Optional[str] = None, api_url: Optional[str] = None) -> None:
|
|
25
43
|
"""
|
|
26
44
|
Initialize the FirecrawlApp instance with API key, API URL.
|
|
@@ -434,6 +452,52 @@ class FirecrawlApp:
|
|
|
434
452
|
else:
|
|
435
453
|
self._handle_error(response, 'check batch scrape status')
|
|
436
454
|
|
|
455
|
+
|
|
456
|
+
def extract(self, urls: List[str], params: Optional[ExtractParams] = None) -> Any:
|
|
457
|
+
"""
|
|
458
|
+
Extracts information from a URL using the Firecrawl API.
|
|
459
|
+
|
|
460
|
+
Args:
|
|
461
|
+
urls (List[str]): The URLs to extract information from.
|
|
462
|
+
params (Optional[ExtractParams]): Additional parameters for the extract request.
|
|
463
|
+
|
|
464
|
+
Returns:
|
|
465
|
+
Union[ExtractResponse, ErrorResponse]: The response from the extract operation.
|
|
466
|
+
"""
|
|
467
|
+
headers = self._prepare_headers()
|
|
468
|
+
|
|
469
|
+
if not params or not params.get('prompt'):
|
|
470
|
+
raise ValueError("Prompt is required")
|
|
471
|
+
|
|
472
|
+
if not params.get('schema'):
|
|
473
|
+
raise ValueError("Schema is required for extraction")
|
|
474
|
+
|
|
475
|
+
jsonData = {'urls': urls, **params}
|
|
476
|
+
jsonSchema = params['schema'].schema() if hasattr(params['schema'], 'schema') else None
|
|
477
|
+
|
|
478
|
+
try:
|
|
479
|
+
response = self._post_request(
|
|
480
|
+
f'{self.api_url}/v1/extract',
|
|
481
|
+
{
|
|
482
|
+
**jsonData,
|
|
483
|
+
'allowExternalLinks': params.get('allow_external_links', False),
|
|
484
|
+
'schema': jsonSchema
|
|
485
|
+
},
|
|
486
|
+
headers
|
|
487
|
+
)
|
|
488
|
+
if response.status_code == 200:
|
|
489
|
+
data = response.json()
|
|
490
|
+
if data['success']:
|
|
491
|
+
return data
|
|
492
|
+
else:
|
|
493
|
+
raise Exception(f'Failed to extract. Error: {data["error"]}')
|
|
494
|
+
else:
|
|
495
|
+
self._handle_error(response, "extract")
|
|
496
|
+
except Exception as e:
|
|
497
|
+
raise ValueError(str(e), 500)
|
|
498
|
+
|
|
499
|
+
return {'success': False, 'error': "Internal server error."}
|
|
500
|
+
|
|
437
501
|
def _prepare_headers(self, idempotency_key: Optional[str] = None) -> Dict[str, str]:
|
|
438
502
|
"""
|
|
439
503
|
Prepare the headers for API requests.
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
firecrawl/__init__.py,sha256=TYVUAqBdGLanvONLqDTalnRvVgT6bsdusGx-tN-KCEM,2543
|
|
2
|
+
firecrawl/firecrawl.py,sha256=szpOBWjFhNGtQ65xda__VWTRiF_77kg37NRz-XU_rp8,30210
|
|
3
|
+
firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
firecrawl/__tests__/e2e_withAuth/test.py,sha256=L-umFR3WyrJso1EwqkxjbTMr5AEI4t5zDfhQcCzitOI,7911
|
|
5
|
+
firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=KQMmGAtJAIafja6EGtJ-W9162w2Hm6PNjqKl3_RQXLA,16456
|
|
7
|
+
firecrawl-1.6.1.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
|
|
8
|
+
firecrawl-1.6.1.dist-info/METADATA,sha256=2kVO2LrhCtoxoQ9K9oTdX_AWf9ForvPtyAZ7iguxghg,10596
|
|
9
|
+
firecrawl-1.6.1.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
|
10
|
+
firecrawl-1.6.1.dist-info/top_level.txt,sha256=jTvz79zWhiyAezfmmHe4FQ-hR60C59UU5FrjMjijLu8,10
|
|
11
|
+
firecrawl-1.6.1.dist-info/RECORD,,
|
firecrawl-1.5.0.dist-info/RECORD
DELETED
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
firecrawl/__init__.py,sha256=6UEPRP09ZHMJ99KG4N8qiTyWW7RNaGzY18bYkrXRAqw,2543
|
|
2
|
-
firecrawl/firecrawl.py,sha256=09QENx-ME8455WiRpKV53-2cFh7T4MX0fX5vRP7It0M,28045
|
|
3
|
-
firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
firecrawl/__tests__/e2e_withAuth/test.py,sha256=L-umFR3WyrJso1EwqkxjbTMr5AEI4t5zDfhQcCzitOI,7911
|
|
5
|
-
firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
-
firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=KQMmGAtJAIafja6EGtJ-W9162w2Hm6PNjqKl3_RQXLA,16456
|
|
7
|
-
firecrawl-1.5.0.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
|
|
8
|
-
firecrawl-1.5.0.dist-info/METADATA,sha256=Wwo7Do4R_42Fbkq0jhOMxhgCTC4iZxRJ1aILD2ijFzs,10596
|
|
9
|
-
firecrawl-1.5.0.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
|
10
|
-
firecrawl-1.5.0.dist-info/top_level.txt,sha256=jTvz79zWhiyAezfmmHe4FQ-hR60C59UU5FrjMjijLu8,10
|
|
11
|
-
firecrawl-1.5.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|