firecrawl 1.7.0__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

firecrawl/__init__.py CHANGED
@@ -13,7 +13,7 @@ import os
13
13
 
14
14
  from .firecrawl import FirecrawlApp # noqa
15
15
 
16
- __version__ = "1.7.0"
16
+ __version__ = "1.8.0"
17
17
 
18
18
  # Define the logger for the Firecrawl project
19
19
  logger: logging.Logger = logging.getLogger("firecrawl")
@@ -371,4 +371,70 @@ def test_search_e2e():
371
371
  # assert isinstance(llm_extraction['supports_sso'], bool)
372
372
  # assert isinstance(llm_extraction['is_open_source'], bool)
373
373
 
374
+ def test_search_with_string_query():
375
+ app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
376
+ response = app.search("firecrawl")
377
+ assert response["success"] is True
378
+ assert len(response["data"]) > 0
379
+ assert response["data"][0]["markdown"] is not None
380
+ assert response["data"][0]["metadata"] is not None
381
+ assert response["data"][0]["metadata"]["title"] is not None
382
+ assert response["data"][0]["metadata"]["description"] is not None
383
+
384
+ def test_search_with_params_dict():
385
+ app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
386
+ response = app.search("firecrawl", {
387
+ "limit": 3,
388
+ "lang": "en",
389
+ "country": "us",
390
+ "scrapeOptions": {
391
+ "formats": ["markdown", "html", "links"],
392
+ "onlyMainContent": True
393
+ }
394
+ })
395
+ assert response["success"] is True
396
+ assert len(response["data"]) <= 3
397
+ for doc in response["data"]:
398
+ assert doc["markdown"] is not None
399
+ assert doc["html"] is not None
400
+ assert doc["links"] is not None
401
+ assert doc["metadata"] is not None
402
+ assert doc["metadata"]["title"] is not None
403
+ assert doc["metadata"]["description"] is not None
404
+
405
+ def test_search_with_params_object():
406
+ app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
407
+ params = SearchParams(
408
+ query="firecrawl",
409
+ limit=3,
410
+ lang="en",
411
+ country="us",
412
+ scrapeOptions={
413
+ "formats": ["markdown", "html", "links"],
414
+ "onlyMainContent": True
415
+ }
416
+ )
417
+ response = app.search(params.query, params)
418
+ assert response["success"] is True
419
+ assert len(response["data"]) <= 3
420
+ for doc in response["data"]:
421
+ assert doc["markdown"] is not None
422
+ assert doc["html"] is not None
423
+ assert doc["links"] is not None
424
+ assert doc["metadata"] is not None
425
+ assert doc["metadata"]["title"] is not None
426
+ assert doc["metadata"]["description"] is not None
427
+
428
+ def test_search_invalid_api_key():
429
+ app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key")
430
+ with pytest.raises(Exception) as e:
431
+ app.search("test query")
432
+ assert "404" in str(e.value)
433
+
434
+ def test_search_with_invalid_params():
435
+ app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
436
+ with pytest.raises(Exception) as e:
437
+ app.search("test query", {"invalid_param": "value"})
438
+ assert "ValidationError" in str(e.value)
439
+
374
440
 
firecrawl/firecrawl.py CHANGED
@@ -21,7 +21,28 @@ import websockets
21
21
 
22
22
  logger : logging.Logger = logging.getLogger("firecrawl")
23
23
 
24
+ class SearchParams(pydantic.BaseModel):
25
+ query: str
26
+ limit: Optional[int] = 5
27
+ tbs: Optional[str] = None
28
+ filter: Optional[str] = None
29
+ lang: Optional[str] = "en"
30
+ country: Optional[str] = "us"
31
+ location: Optional[str] = None
32
+ origin: Optional[str] = "api"
33
+ timeout: Optional[int] = 60000
34
+ scrapeOptions: Optional[Dict[str, Any]] = None
35
+
24
36
  class FirecrawlApp:
37
+ class SearchResponse(pydantic.BaseModel):
38
+ """
39
+ Response from the search operation.
40
+ """
41
+ success: bool
42
+ data: List[Dict[str, Any]]
43
+ warning: Optional[str] = None
44
+ error: Optional[str] = None
45
+
25
46
  class ExtractParams(pydantic.BaseModel):
26
47
  """
27
48
  Parameters for the extract operation.
@@ -109,22 +130,36 @@ class FirecrawlApp:
109
130
  else:
110
131
  self._handle_error(response, 'scrape URL')
111
132
 
112
- def search(self, query: str, params: Optional[Dict[str, Any]] = None) -> Any:
133
+ def search(self, query: str, params: Optional[Union[Dict[str, Any], SearchParams]] = None) -> Dict[str, Any]:
113
134
  """
114
- Perform a search using the Firecrawl API.
135
+ Search for content using the Firecrawl API.
115
136
 
116
137
  Args:
117
- query (str): The search query.
118
- params (Optional[Dict[str, Any]]): Additional parameters for the search request.
138
+ query (str): The search query string.
139
+ params (Optional[Union[Dict[str, Any], SearchParams]]): Additional search parameters.
119
140
 
120
141
  Returns:
121
- Any: The search results if the request is successful.
122
-
123
- Raises:
124
- NotImplementedError: If the search request is attempted on API version v1.
125
- Exception: If the search request fails.
142
+ Dict[str, Any]: The search response containing success status and search results.
126
143
  """
127
- raise NotImplementedError("Search is not supported in v1.")
144
+ if params is None:
145
+ params = {}
146
+
147
+ if isinstance(params, dict):
148
+ search_params = SearchParams(query=query, **params)
149
+ else:
150
+ search_params = params
151
+ search_params.query = query
152
+
153
+ response = requests.post(
154
+ f"{self.api_url}/v1/search",
155
+ headers={"Authorization": f"Bearer {self.api_key}"},
156
+ json=search_params.dict(exclude_none=True)
157
+ )
158
+
159
+ if response.status_code != 200:
160
+ raise Exception(f"Request failed with status code {response.status_code}")
161
+
162
+ return response.json()
128
163
 
129
164
  def crawl_url(self, url: str,
130
165
  params: Optional[Dict[str, Any]] = None,
@@ -472,20 +507,24 @@ class FirecrawlApp:
472
507
  if not params or not params.get('prompt'):
473
508
  raise ValueError("Prompt is required")
474
509
 
475
- if not params.get('schema'):
476
- raise ValueError("Schema is required for extraction")
510
+ schema = params.get('schema')
511
+ if schema:
512
+ if hasattr(schema, 'model_json_schema'):
513
+ # Convert Pydantic model to JSON schema
514
+ schema = schema.model_json_schema()
515
+ # Otherwise assume it's already a JSON schema dict
477
516
 
478
517
  jsonData = {'urls': urls, **params}
479
- jsonSchema = params['schema'].schema() if hasattr(params['schema'], 'schema') else None
518
+ request_data = {
519
+ **jsonData,
520
+ 'allowExternalLinks': params.get('allow_external_links', False),
521
+ 'schema': schema
522
+ }
480
523
 
481
524
  try:
482
525
  response = self._post_request(
483
526
  f'{self.api_url}/v1/extract',
484
- {
485
- **jsonData,
486
- 'allowExternalLinks': params.get('allow_external_links', False),
487
- 'schema': jsonSchema
488
- },
527
+ request_data,
489
528
  headers
490
529
  )
491
530
  if response.status_code == 200:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 1.7.0
3
+ Version: 1.8.0
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
@@ -0,0 +1,11 @@
1
+ firecrawl/__init__.py,sha256=3jDnDwAg-3SU8XRq2E8HWtJ0Umi4PLKGf4JEsR7ESig,2543
2
+ firecrawl/firecrawl.py,sha256=0l5WOmiy5OxEwZvgIS0TpsFx39F3X6zADjHMzg6Q8iI,31650
3
+ firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ firecrawl/__tests__/e2e_withAuth/test.py,sha256=6OawnVF4IPeGyXg_Izi3t8U7MyT90roaJBJIG5UfllM,7935
5
+ firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=tL5kJJ4el37Wc-Z2TRSuSWwWG2M40h3VPxHYuWijD00,19888
7
+ firecrawl-1.8.0.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
8
+ firecrawl-1.8.0.dist-info/METADATA,sha256=FCEt8ZVtXgyaGc2bNXssb2AC4hLDVI-LPoa-Qa_s2cM,10631
9
+ firecrawl-1.8.0.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
10
+ firecrawl-1.8.0.dist-info/top_level.txt,sha256=jTvz79zWhiyAezfmmHe4FQ-hR60C59UU5FrjMjijLu8,10
11
+ firecrawl-1.8.0.dist-info/RECORD,,
@@ -1,11 +0,0 @@
1
- firecrawl/__init__.py,sha256=tgTkQWBxVVdPVtsIRHuyzQtuCpMij1NL-JNNpZY8dNM,2543
2
- firecrawl/firecrawl.py,sha256=XgDT01kF0t4TpGUvsA2HFgSg9-EGRsy0YDGkfo11Iyw,30496
3
- firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- firecrawl/__tests__/e2e_withAuth/test.py,sha256=6OawnVF4IPeGyXg_Izi3t8U7MyT90roaJBJIG5UfllM,7935
5
- firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=Qad0xRPboRdlH6Q5o2985b4xjpjw2jr9LCik-GbXaZ0,17470
7
- firecrawl-1.7.0.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
8
- firecrawl-1.7.0.dist-info/METADATA,sha256=miIvKFogOdrN3wiPnATMJD_jmrWk-8O5FS57r9op28g,10631
9
- firecrawl-1.7.0.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
10
- firecrawl-1.7.0.dist-info/top_level.txt,sha256=jTvz79zWhiyAezfmmHe4FQ-hR60C59UU5FrjMjijLu8,10
11
- firecrawl-1.7.0.dist-info/RECORD,,