firecrawl 2.9.0__py3-none-any.whl → 2.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

firecrawl/__init__.py CHANGED
@@ -13,7 +13,7 @@ import os
13
13
 
14
14
  from .firecrawl import FirecrawlApp, AsyncFirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
15
15
 
16
- __version__ = "2.9.0"
16
+ __version__ = "2.10.0"
17
17
 
18
18
  # Define the logger for the Firecrawl project
19
19
  logger: logging.Logger = logging.getLogger("firecrawl")
@@ -437,4 +437,29 @@ def test_search_with_invalid_params():
437
437
  app.search("test query", {"invalid_param": "value"})
438
438
  assert "ValidationError" in str(e.value)
439
439
 
440
+ # def test_scrape_url_with_parse_pdf_true():
441
+ # if TEST_API_KEY:
442
+ # app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
443
+ # response = app.scrape_url('https://arxiv.org/pdf/astro-ph/9301001.pdf', parse_pdf=True)
444
+ # assert response is not None
445
+ # assert 'markdown' in response
446
+ # assert len(response['markdown']) > 100
447
+
448
+ # def test_scrape_url_with_parse_pdf_false():
449
+ # if TEST_API_KEY:
450
+ # app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
451
+ # response = app.scrape_url('https://arxiv.org/pdf/astro-ph/9301001.pdf', parse_pdf=False)
452
+ # assert response is not None
453
+ # assert 'markdown' in response
454
+ # assert 'h7uKu14adDL6yGfnGf2qycY5uq8kC3OKCWkPxm' in response['markdown']
455
+
456
+ # def test_scrape_options_with_parse_pdf():
457
+ # if TEST_API_KEY:
458
+ # from firecrawl.firecrawl import ScrapeOptions
459
+ # app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
460
+ # scrape_options = ScrapeOptions(parsePDF=False, formats=['markdown'])
461
+ # response = app.search("firecrawl", limit=1, scrape_options=scrape_options)
462
+ # assert response is not None
463
+ # assert 'data' in response
464
+
440
465
 
firecrawl/firecrawl.py CHANGED
@@ -160,6 +160,7 @@ class ScrapeOptions(pydantic.BaseModel):
160
160
  changeTrackingOptions: Optional[ChangeTrackingOptions] = None
161
161
  maxAge: Optional[int] = None
162
162
  storeInCache: Optional[bool] = None
163
+ parsePDF: Optional[bool] = None
163
164
 
164
165
  class WaitAction(pydantic.BaseModel):
165
166
  """Wait action to perform during scraping."""
@@ -465,6 +466,7 @@ class FirecrawlApp:
465
466
  remove_base64_images: Optional[bool] = None,
466
467
  block_ads: Optional[bool] = None,
467
468
  proxy: Optional[Literal["basic", "stealth", "auto"]] = None,
469
+ parse_pdf: Optional[bool] = None,
468
470
  extract: Optional[JsonConfig] = None,
469
471
  json_options: Optional[JsonConfig] = None,
470
472
  actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
@@ -538,6 +540,8 @@ class FirecrawlApp:
538
540
  scrape_params['blockAds'] = block_ads
539
541
  if proxy:
540
542
  scrape_params['proxy'] = proxy
543
+ if parse_pdf is not None:
544
+ scrape_params['parsePDF'] = parse_pdf
541
545
  if extract is not None:
542
546
  extract = self._ensure_schema_dict(extract)
543
547
  if isinstance(extract, dict) and "schema" in extract:
@@ -2904,6 +2908,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
2904
2908
  remove_base64_images: Optional[bool] = None,
2905
2909
  block_ads: Optional[bool] = None,
2906
2910
  proxy: Optional[Literal["basic", "stealth", "auto"]] = None,
2911
+ parse_pdf: Optional[bool] = None,
2907
2912
  extract: Optional[JsonConfig] = None,
2908
2913
  json_options: Optional[JsonConfig] = None,
2909
2914
  actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
@@ -2981,6 +2986,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
2981
2986
  scrape_params['blockAds'] = block_ads
2982
2987
  if proxy:
2983
2988
  scrape_params['proxy'] = proxy
2989
+ if parse_pdf is not None:
2990
+ scrape_params['parsePDF'] = parse_pdf
2984
2991
  if extract is not None:
2985
2992
  extract = self._ensure_schema_dict(extract)
2986
2993
  if isinstance(extract, dict) and "schema" in extract:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 2.9.0
3
+ Version: 2.10.0
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
@@ -0,0 +1,12 @@
1
+ firecrawl/__init__.py,sha256=qDOTVOIN0WXrkEEWPqy2UfFzbNDbimvD7HOPhXvTkC4,2613
2
+ firecrawl/firecrawl.py,sha256=Bi7n0U94YJicUYnbjKKOmbkrpWh-kSe1ttPpil3rZl4,193869
3
+ firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ firecrawl/__tests__/e2e_withAuth/test.py,sha256=-Fq2vPcMo0iQi4dwsUkkCd931ybDaTxMBnZbRfGdDcA,7931
5
+ firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=k9IsEbdTHL9Cu49M4FpnQDEo2rnG6RqwmZAsK_EVJr4,21069
7
+ tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
8
+ firecrawl-2.10.0.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
9
+ firecrawl-2.10.0.dist-info/METADATA,sha256=r0ytUZrMwcrvFMIUB6J7yG7LI8Lz6GszkwFvGMF2nms,7166
10
+ firecrawl-2.10.0.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
11
+ firecrawl-2.10.0.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
12
+ firecrawl-2.10.0.dist-info/RECORD,,
@@ -1,12 +0,0 @@
1
- firecrawl/__init__.py,sha256=nWGTmoKRj6qHs3mzjKE4d3giXsTeeXIO-Ujw0S0oy7k,2612
2
- firecrawl/firecrawl.py,sha256=ICCfDvhpsV3OT5kwwuiS2_6tiq9kmCca4Elum7mKhxg,193573
3
- firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- firecrawl/__tests__/e2e_withAuth/test.py,sha256=-Fq2vPcMo0iQi4dwsUkkCd931ybDaTxMBnZbRfGdDcA,7931
5
- firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=DcCw-cohtnL-t9XPekUtRoQrgg3UCWu8Ikqudf9ory8,19880
7
- tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
8
- firecrawl-2.9.0.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
9
- firecrawl-2.9.0.dist-info/METADATA,sha256=7V6RGueUF-gnebxMeXVW6Lpc22vcRyU8Fe6xa58Ep7Q,7165
10
- firecrawl-2.9.0.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
11
- firecrawl-2.9.0.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
12
- firecrawl-2.9.0.dist-info/RECORD,,