firecrawl 1.7.0__py3-none-any.whl → 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of firecrawl might be problematic. Click here for more details.
- firecrawl/__init__.py +1 -1
- firecrawl/__tests__/v1/e2e_withAuth/test.py +66 -0
- firecrawl/firecrawl.py +57 -18
- {firecrawl-1.7.0.dist-info → firecrawl-1.8.0.dist-info}/METADATA +1 -1
- firecrawl-1.8.0.dist-info/RECORD +11 -0
- firecrawl-1.7.0.dist-info/RECORD +0 -11
- {firecrawl-1.7.0.dist-info → firecrawl-1.8.0.dist-info}/LICENSE +0 -0
- {firecrawl-1.7.0.dist-info → firecrawl-1.8.0.dist-info}/WHEEL +0 -0
- {firecrawl-1.7.0.dist-info → firecrawl-1.8.0.dist-info}/top_level.txt +0 -0
firecrawl/__init__.py
CHANGED
|
@@ -371,4 +371,70 @@ def test_search_e2e():
|
|
|
371
371
|
# assert isinstance(llm_extraction['supports_sso'], bool)
|
|
372
372
|
# assert isinstance(llm_extraction['is_open_source'], bool)
|
|
373
373
|
|
|
374
|
+
def test_search_with_string_query():
|
|
375
|
+
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
|
376
|
+
response = app.search("firecrawl")
|
|
377
|
+
assert response["success"] is True
|
|
378
|
+
assert len(response["data"]) > 0
|
|
379
|
+
assert response["data"][0]["markdown"] is not None
|
|
380
|
+
assert response["data"][0]["metadata"] is not None
|
|
381
|
+
assert response["data"][0]["metadata"]["title"] is not None
|
|
382
|
+
assert response["data"][0]["metadata"]["description"] is not None
|
|
383
|
+
|
|
384
|
+
def test_search_with_params_dict():
|
|
385
|
+
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
|
386
|
+
response = app.search("firecrawl", {
|
|
387
|
+
"limit": 3,
|
|
388
|
+
"lang": "en",
|
|
389
|
+
"country": "us",
|
|
390
|
+
"scrapeOptions": {
|
|
391
|
+
"formats": ["markdown", "html", "links"],
|
|
392
|
+
"onlyMainContent": True
|
|
393
|
+
}
|
|
394
|
+
})
|
|
395
|
+
assert response["success"] is True
|
|
396
|
+
assert len(response["data"]) <= 3
|
|
397
|
+
for doc in response["data"]:
|
|
398
|
+
assert doc["markdown"] is not None
|
|
399
|
+
assert doc["html"] is not None
|
|
400
|
+
assert doc["links"] is not None
|
|
401
|
+
assert doc["metadata"] is not None
|
|
402
|
+
assert doc["metadata"]["title"] is not None
|
|
403
|
+
assert doc["metadata"]["description"] is not None
|
|
404
|
+
|
|
405
|
+
def test_search_with_params_object():
|
|
406
|
+
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
|
407
|
+
params = SearchParams(
|
|
408
|
+
query="firecrawl",
|
|
409
|
+
limit=3,
|
|
410
|
+
lang="en",
|
|
411
|
+
country="us",
|
|
412
|
+
scrapeOptions={
|
|
413
|
+
"formats": ["markdown", "html", "links"],
|
|
414
|
+
"onlyMainContent": True
|
|
415
|
+
}
|
|
416
|
+
)
|
|
417
|
+
response = app.search(params.query, params)
|
|
418
|
+
assert response["success"] is True
|
|
419
|
+
assert len(response["data"]) <= 3
|
|
420
|
+
for doc in response["data"]:
|
|
421
|
+
assert doc["markdown"] is not None
|
|
422
|
+
assert doc["html"] is not None
|
|
423
|
+
assert doc["links"] is not None
|
|
424
|
+
assert doc["metadata"] is not None
|
|
425
|
+
assert doc["metadata"]["title"] is not None
|
|
426
|
+
assert doc["metadata"]["description"] is not None
|
|
427
|
+
|
|
428
|
+
def test_search_invalid_api_key():
|
|
429
|
+
app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key")
|
|
430
|
+
with pytest.raises(Exception) as e:
|
|
431
|
+
app.search("test query")
|
|
432
|
+
assert "404" in str(e.value)
|
|
433
|
+
|
|
434
|
+
def test_search_with_invalid_params():
|
|
435
|
+
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
|
436
|
+
with pytest.raises(Exception) as e:
|
|
437
|
+
app.search("test query", {"invalid_param": "value"})
|
|
438
|
+
assert "ValidationError" in str(e.value)
|
|
439
|
+
|
|
374
440
|
|
firecrawl/firecrawl.py
CHANGED
|
@@ -21,7 +21,28 @@ import websockets
|
|
|
21
21
|
|
|
22
22
|
logger : logging.Logger = logging.getLogger("firecrawl")
|
|
23
23
|
|
|
24
|
+
class SearchParams(pydantic.BaseModel):
|
|
25
|
+
query: str
|
|
26
|
+
limit: Optional[int] = 5
|
|
27
|
+
tbs: Optional[str] = None
|
|
28
|
+
filter: Optional[str] = None
|
|
29
|
+
lang: Optional[str] = "en"
|
|
30
|
+
country: Optional[str] = "us"
|
|
31
|
+
location: Optional[str] = None
|
|
32
|
+
origin: Optional[str] = "api"
|
|
33
|
+
timeout: Optional[int] = 60000
|
|
34
|
+
scrapeOptions: Optional[Dict[str, Any]] = None
|
|
35
|
+
|
|
24
36
|
class FirecrawlApp:
|
|
37
|
+
class SearchResponse(pydantic.BaseModel):
|
|
38
|
+
"""
|
|
39
|
+
Response from the search operation.
|
|
40
|
+
"""
|
|
41
|
+
success: bool
|
|
42
|
+
data: List[Dict[str, Any]]
|
|
43
|
+
warning: Optional[str] = None
|
|
44
|
+
error: Optional[str] = None
|
|
45
|
+
|
|
25
46
|
class ExtractParams(pydantic.BaseModel):
|
|
26
47
|
"""
|
|
27
48
|
Parameters for the extract operation.
|
|
@@ -109,22 +130,36 @@ class FirecrawlApp:
|
|
|
109
130
|
else:
|
|
110
131
|
self._handle_error(response, 'scrape URL')
|
|
111
132
|
|
|
112
|
-
def search(self, query: str, params: Optional[Dict[str, Any]] = None) -> Any:
|
|
133
|
+
def search(self, query: str, params: Optional[Union[Dict[str, Any], SearchParams]] = None) -> Dict[str, Any]:
|
|
113
134
|
"""
|
|
114
|
-
|
|
135
|
+
Search for content using the Firecrawl API.
|
|
115
136
|
|
|
116
137
|
Args:
|
|
117
|
-
query (str): The search query.
|
|
118
|
-
params (Optional[Dict[str, Any]]): Additional
|
|
138
|
+
query (str): The search query string.
|
|
139
|
+
params (Optional[Union[Dict[str, Any], SearchParams]]): Additional search parameters.
|
|
119
140
|
|
|
120
141
|
Returns:
|
|
121
|
-
Any: The search
|
|
122
|
-
|
|
123
|
-
Raises:
|
|
124
|
-
NotImplementedError: If the search request is attempted on API version v1.
|
|
125
|
-
Exception: If the search request fails.
|
|
142
|
+
Dict[str, Any]: The search response containing success status and search results.
|
|
126
143
|
"""
|
|
127
|
-
|
|
144
|
+
if params is None:
|
|
145
|
+
params = {}
|
|
146
|
+
|
|
147
|
+
if isinstance(params, dict):
|
|
148
|
+
search_params = SearchParams(query=query, **params)
|
|
149
|
+
else:
|
|
150
|
+
search_params = params
|
|
151
|
+
search_params.query = query
|
|
152
|
+
|
|
153
|
+
response = requests.post(
|
|
154
|
+
f"{self.api_url}/v1/search",
|
|
155
|
+
headers={"Authorization": f"Bearer {self.api_key}"},
|
|
156
|
+
json=search_params.dict(exclude_none=True)
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
if response.status_code != 200:
|
|
160
|
+
raise Exception(f"Request failed with status code {response.status_code}")
|
|
161
|
+
|
|
162
|
+
return response.json()
|
|
128
163
|
|
|
129
164
|
def crawl_url(self, url: str,
|
|
130
165
|
params: Optional[Dict[str, Any]] = None,
|
|
@@ -472,20 +507,24 @@ class FirecrawlApp:
|
|
|
472
507
|
if not params or not params.get('prompt'):
|
|
473
508
|
raise ValueError("Prompt is required")
|
|
474
509
|
|
|
475
|
-
|
|
476
|
-
|
|
510
|
+
schema = params.get('schema')
|
|
511
|
+
if schema:
|
|
512
|
+
if hasattr(schema, 'model_json_schema'):
|
|
513
|
+
# Convert Pydantic model to JSON schema
|
|
514
|
+
schema = schema.model_json_schema()
|
|
515
|
+
# Otherwise assume it's already a JSON schema dict
|
|
477
516
|
|
|
478
517
|
jsonData = {'urls': urls, **params}
|
|
479
|
-
|
|
518
|
+
request_data = {
|
|
519
|
+
**jsonData,
|
|
520
|
+
'allowExternalLinks': params.get('allow_external_links', False),
|
|
521
|
+
'schema': schema
|
|
522
|
+
}
|
|
480
523
|
|
|
481
524
|
try:
|
|
482
525
|
response = self._post_request(
|
|
483
526
|
f'{self.api_url}/v1/extract',
|
|
484
|
-
|
|
485
|
-
**jsonData,
|
|
486
|
-
'allowExternalLinks': params.get('allow_external_links', False),
|
|
487
|
-
'schema': jsonSchema
|
|
488
|
-
},
|
|
527
|
+
request_data,
|
|
489
528
|
headers
|
|
490
529
|
)
|
|
491
530
|
if response.status_code == 200:
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
firecrawl/__init__.py,sha256=3jDnDwAg-3SU8XRq2E8HWtJ0Umi4PLKGf4JEsR7ESig,2543
|
|
2
|
+
firecrawl/firecrawl.py,sha256=0l5WOmiy5OxEwZvgIS0TpsFx39F3X6zADjHMzg6Q8iI,31650
|
|
3
|
+
firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
firecrawl/__tests__/e2e_withAuth/test.py,sha256=6OawnVF4IPeGyXg_Izi3t8U7MyT90roaJBJIG5UfllM,7935
|
|
5
|
+
firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=tL5kJJ4el37Wc-Z2TRSuSWwWG2M40h3VPxHYuWijD00,19888
|
|
7
|
+
firecrawl-1.8.0.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
|
|
8
|
+
firecrawl-1.8.0.dist-info/METADATA,sha256=FCEt8ZVtXgyaGc2bNXssb2AC4hLDVI-LPoa-Qa_s2cM,10631
|
|
9
|
+
firecrawl-1.8.0.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
|
10
|
+
firecrawl-1.8.0.dist-info/top_level.txt,sha256=jTvz79zWhiyAezfmmHe4FQ-hR60C59UU5FrjMjijLu8,10
|
|
11
|
+
firecrawl-1.8.0.dist-info/RECORD,,
|
firecrawl-1.7.0.dist-info/RECORD
DELETED
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
firecrawl/__init__.py,sha256=tgTkQWBxVVdPVtsIRHuyzQtuCpMij1NL-JNNpZY8dNM,2543
|
|
2
|
-
firecrawl/firecrawl.py,sha256=XgDT01kF0t4TpGUvsA2HFgSg9-EGRsy0YDGkfo11Iyw,30496
|
|
3
|
-
firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
firecrawl/__tests__/e2e_withAuth/test.py,sha256=6OawnVF4IPeGyXg_Izi3t8U7MyT90roaJBJIG5UfllM,7935
|
|
5
|
-
firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
-
firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=Qad0xRPboRdlH6Q5o2985b4xjpjw2jr9LCik-GbXaZ0,17470
|
|
7
|
-
firecrawl-1.7.0.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
|
|
8
|
-
firecrawl-1.7.0.dist-info/METADATA,sha256=miIvKFogOdrN3wiPnATMJD_jmrWk-8O5FS57r9op28g,10631
|
|
9
|
-
firecrawl-1.7.0.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
|
10
|
-
firecrawl-1.7.0.dist-info/top_level.txt,sha256=jTvz79zWhiyAezfmmHe4FQ-hR60C59UU5FrjMjijLu8,10
|
|
11
|
-
firecrawl-1.7.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|