firecrawl 4.0.0__py3-none-any.whl → 4.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

firecrawl/__init__.py CHANGED
@@ -17,7 +17,7 @@ from .v1 import (
17
17
  V1ChangeTrackingOptions,
18
18
  )
19
19
 
20
- __version__ = "4.0.0"
20
+ __version__ = "4.1.1"
21
21
 
22
22
  # Define the logger for the Firecrawl project
23
23
  logger: logging.Logger = logging.getLogger("firecrawl")
firecrawl/v2/client.py CHANGED
@@ -18,6 +18,7 @@ from .types import (
18
18
  CrawlResponse,
19
19
  CrawlJob,
20
20
  CrawlParamsRequest,
21
+ PDFParser,
21
22
  CrawlParamsData,
22
23
  WebhookConfig,
23
24
  CrawlErrorsResponse,
@@ -105,7 +106,7 @@ class FirecrawlClient:
105
106
  timeout: Optional[int] = None,
106
107
  wait_for: Optional[int] = None,
107
108
  mobile: Optional[bool] = None,
108
- parsers: Optional[List[str]] = None,
109
+ parsers: Optional[Union[List[str], List[Union[str, PDFParser]]]] = None,
109
110
  actions: Optional[List[Union['WaitAction', 'ScreenshotAction', 'ClickAction', 'WriteAction', 'PressAction', 'ScrollAction', 'ScrapeAction', 'ExecuteJavascriptAction', 'PDFAction']]] = None,
110
111
  location: Optional['Location'] = None,
111
112
  skip_tls_verification: Optional[bool] = None,
@@ -571,7 +572,7 @@ class FirecrawlClient:
571
572
  timeout: Optional[int] = None,
572
573
  wait_for: Optional[int] = None,
573
574
  mobile: Optional[bool] = None,
574
- parsers: Optional[List[str]] = None,
575
+ parsers: Optional[Union[List[str], List[Union[str, PDFParser]]]] = None,
575
576
  actions: Optional[List[Union['WaitAction', 'ScreenshotAction', 'ClickAction', 'WriteAction', 'PressAction', 'ScrollAction', 'ScrapeAction', 'ExecuteJavascriptAction', 'PDFAction']]] = None,
576
577
  location: Optional['Location'] = None,
577
578
  skip_tls_verification: Optional[bool] = None,
@@ -759,7 +760,7 @@ class FirecrawlClient:
759
760
  timeout: Optional[int] = None,
760
761
  wait_for: Optional[int] = None,
761
762
  mobile: Optional[bool] = None,
762
- parsers: Optional[List[str]] = None,
763
+ parsers: Optional[Union[List[str], List[Union[str, PDFParser]]]] = None,
763
764
  actions: Optional[List[Union['WaitAction', 'ScreenshotAction', 'ClickAction', 'WriteAction', 'PressAction', 'ScrollAction', 'ScrapeAction', 'ExecuteJavascriptAction', 'PDFAction']]] = None,
764
765
  location: Optional['Location'] = None,
765
766
  skip_tls_verification: Optional[bool] = None,
firecrawl/v2/types.py CHANGED
@@ -278,7 +278,7 @@ class ScrapeOptions(BaseModel):
278
278
  timeout: Optional[int] = None
279
279
  wait_for: Optional[int] = None
280
280
  mobile: Optional[bool] = None
281
- parsers: Optional[List[str]] = None
281
+ parsers: Optional[Union[List[str], List[Union[str, 'PDFParser']]]] = None
282
282
  actions: Optional[List[Union['WaitAction', 'ScreenshotAction', 'ClickAction', 'WriteAction', 'PressAction', 'ScrollAction', 'ScrapeAction', 'ExecuteJavascriptAction', 'PDFAction']]] = None
283
283
  location: Optional['Location'] = None
284
284
  skip_tls_verification: Optional[bool] = None
@@ -536,6 +536,11 @@ class PDFAction(BaseModel):
536
536
  landscape: Optional[bool] = None
537
537
  scale: Optional[float] = None
538
538
 
539
+ class PDFParser(BaseModel):
540
+ """PDF parser configuration with optional page limit."""
541
+ type: Literal["pdf"] = "pdf"
542
+ max_pages: Optional[int] = None
543
+
539
544
  # Location types
540
545
  class Location(BaseModel):
541
546
  """Location configuration for scraping."""
@@ -594,6 +599,8 @@ class SearchRequest(BaseModel):
594
599
 
595
600
  return normalized_categories
596
601
 
602
+ # NOTE: parsers validation does not belong on SearchRequest; it is part of ScrapeOptions.
603
+
597
604
  class LinkResult(BaseModel):
598
605
  """A generic link result with optional metadata (used by search and map)."""
599
606
  url: str
@@ -686,4 +693,4 @@ AnyResponse = Union[
686
693
  MapResponse,
687
694
  SearchResponse,
688
695
  ErrorResponse,
689
- ]
696
+ ]
@@ -311,6 +311,20 @@ def prepare_scrape_options(options: Optional[ScrapeOptions]) -> Optional[Dict[st
311
311
  converted_action[action_key] = action_value
312
312
  converted_actions.append(converted_action)
313
313
  scrape_data["actions"] = converted_actions
314
+ elif key == "parsers":
315
+ converted_parsers = []
316
+ for parser in value:
317
+ if isinstance(parser, str):
318
+ converted_parsers.append(parser)
319
+ elif isinstance(parser, dict):
320
+ converted_parsers.append(parser)
321
+ else:
322
+ parser_data = parser.model_dump(exclude_none=True)
323
+ # Convert snake_case to camelCase for API
324
+ if "max_pages" in parser_data:
325
+ parser_data["maxPages"] = parser_data.pop("max_pages")
326
+ converted_parsers.append(parser_data)
327
+ scrape_data["parsers"] = converted_parsers
314
328
  elif key == "location":
315
329
  # Handle location conversion
316
330
  if isinstance(value, dict):
@@ -321,4 +335,4 @@ def prepare_scrape_options(options: Optional[ScrapeOptions]) -> Optional[Dict[st
321
335
  # For fields that don't need conversion, use as-is
322
336
  scrape_data[key] = value
323
337
 
324
- return scrape_data
338
+ return scrape_data
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: firecrawl
3
- Version: 4.0.0
3
+ Version: 4.1.1
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/firecrawl/firecrawl
6
6
  Author: Mendable.ai
@@ -1,4 +1,4 @@
1
- firecrawl/__init__.py,sha256=Fvnels2_mk87sESx-1GhfwBGCrRsN05RoUHsu4AswrY,2192
1
+ firecrawl/__init__.py,sha256=PTxLZcB2UvYQVIzsA-XrGsaQamCdRO4yU96eKWMEwIs,2192
2
2
  firecrawl/client.py,sha256=tp3mUo_3aGPuZ53kpU4bhM-5EtwD_IUWrJ7wm0GMuCc,11159
3
3
  firecrawl/firecrawl.backup.py,sha256=v1FEN3jR4g5Aupg4xp6SLkuFvYMQuUKND2YELbYjE6c,200430
4
4
  firecrawl/types.py,sha256=W9N2pqQuevEIIjYHN9rbDf31E-nwdCECqIn11Foz2T8,2836
@@ -44,9 +44,9 @@ firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py,sha256=87w47n0iOihtu4jTR4
44
44
  firecrawl/v1/__init__.py,sha256=aP1oisPeZVGGZynvENc07JySMOZfv_4zAlxQ0ecMJXA,481
45
45
  firecrawl/v1/client.py,sha256=sydurfEFTsXyowyaGryA1lkPxN_r9Nf6iQpM43OwJyM,201672
46
46
  firecrawl/v2/__init__.py,sha256=Jc6a8tBjYG5OPkjDM5pl-notyys-7DEj7PLEfepv3fc,137
47
- firecrawl/v2/client.py,sha256=aD4SDVKhh5glgzdnJ8JvCPCW_u8pv0BAfIg2Wffvjao,31137
47
+ firecrawl/v2/client.py,sha256=AMAHQ8Uz9bsEIy2vmIDNNUIT0FOivhLyj6lesEr1Rbg,31260
48
48
  firecrawl/v2/client_async.py,sha256=XyzojIJlWatBGlAMish22H-XHkkH9zHsD6MGtAdtFg8,10487
49
- firecrawl/v2/types.py,sha256=RnFZf9CXBS3XkeB74L48sxWZ_ECZht4gVOKtyQLZz0o,22973
49
+ firecrawl/v2/types.py,sha256=aD_q4wVUksZKyKifYn1lbNgZeSPAZjAIxa1lPwEKckU,23266
50
50
  firecrawl/v2/watcher.py,sha256=FOU71tqSKxgeuGycu4ye0SLc2dw7clIcoQjPsi-4Csc,14229
51
51
  firecrawl/v2/watcher_async.py,sha256=AVjW2mgABniolSsauK4u0FW8ya6WzRUdyEg2R-8vGCw,10278
52
52
  firecrawl/v2/methods/batch.py,sha256=jFSIPtvulUrPz3Y3zT1gDNwYEf8Botpfh4GOeYsVYRI,14852
@@ -70,11 +70,11 @@ firecrawl/v2/utils/get_version.py,sha256=0CxW_41q2hlzIxEWOivUCaYw3GFiSIH32RPUMcI
70
70
  firecrawl/v2/utils/http_client.py,sha256=gUrC1CvU5sj03w27Lbq-3-yH38Yi_OXiI01-piwA83w,6027
71
71
  firecrawl/v2/utils/http_client_async.py,sha256=iy89_bk2HS3afSRHZ8016eMCa9Fk-5MFTntcOHfbPgE,1936
72
72
  firecrawl/v2/utils/normalize.py,sha256=nlTU6QRghT1YKZzNZlIQj4STSRuSUGrS9cCErZIcY5w,3636
73
- firecrawl/v2/utils/validation.py,sha256=L8by7z-t6GuMGIYkK7il1BM8d-4_-sAdG9hDMF_LeG4,14518
74
- firecrawl-4.0.0.dist-info/licenses/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
73
+ firecrawl/v2/utils/validation.py,sha256=qWWiWaVcvODmVxf9rxIVy1j_dyuJCvdMMUoYhvWUEIU,15269
74
+ firecrawl-4.1.1.dist-info/licenses/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
75
75
  tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
76
76
  tests/test_timeout_conversion.py,sha256=PWlIEMASQNhu4cp1OW_ebklnE9NCiigPnEFCtI5N3w0,3996
77
- firecrawl-4.0.0.dist-info/METADATA,sha256=axsFmxpij8pHewgXTyRI3s5Rj96KoRG0Gxf4zCm11RY,7392
78
- firecrawl-4.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
79
- firecrawl-4.0.0.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
80
- firecrawl-4.0.0.dist-info/RECORD,,
77
+ firecrawl-4.1.1.dist-info/METADATA,sha256=D6H49PROhKlcEYd-216G5QQ1DlXLutq6SQv876cKivg,7392
78
+ firecrawl-4.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
79
+ firecrawl-4.1.1.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
80
+ firecrawl-4.1.1.dist-info/RECORD,,