firecrawl 4.0.0__py3-none-any.whl → 4.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of firecrawl might be problematic. Click here for more details.
- firecrawl/__init__.py +1 -1
- firecrawl/v2/client.py +4 -3
- firecrawl/v2/types.py +27 -2
- firecrawl/v2/utils/validation.py +15 -1
- {firecrawl-4.0.0.dist-info → firecrawl-4.1.0.dist-info}/METADATA +1 -1
- {firecrawl-4.0.0.dist-info → firecrawl-4.1.0.dist-info}/RECORD +9 -9
- {firecrawl-4.0.0.dist-info → firecrawl-4.1.0.dist-info}/WHEEL +0 -0
- {firecrawl-4.0.0.dist-info → firecrawl-4.1.0.dist-info}/licenses/LICENSE +0 -0
- {firecrawl-4.0.0.dist-info → firecrawl-4.1.0.dist-info}/top_level.txt +0 -0
firecrawl/__init__.py
CHANGED
firecrawl/v2/client.py
CHANGED
|
@@ -18,6 +18,7 @@ from .types import (
|
|
|
18
18
|
CrawlResponse,
|
|
19
19
|
CrawlJob,
|
|
20
20
|
CrawlParamsRequest,
|
|
21
|
+
PDFParser,
|
|
21
22
|
CrawlParamsData,
|
|
22
23
|
WebhookConfig,
|
|
23
24
|
CrawlErrorsResponse,
|
|
@@ -105,7 +106,7 @@ class FirecrawlClient:
|
|
|
105
106
|
timeout: Optional[int] = None,
|
|
106
107
|
wait_for: Optional[int] = None,
|
|
107
108
|
mobile: Optional[bool] = None,
|
|
108
|
-
parsers: Optional[List[str]] = None,
|
|
109
|
+
parsers: Optional[Union[List[str], List[Union[str, PDFParser]]]] = None,
|
|
109
110
|
actions: Optional[List[Union['WaitAction', 'ScreenshotAction', 'ClickAction', 'WriteAction', 'PressAction', 'ScrollAction', 'ScrapeAction', 'ExecuteJavascriptAction', 'PDFAction']]] = None,
|
|
110
111
|
location: Optional['Location'] = None,
|
|
111
112
|
skip_tls_verification: Optional[bool] = None,
|
|
@@ -571,7 +572,7 @@ class FirecrawlClient:
|
|
|
571
572
|
timeout: Optional[int] = None,
|
|
572
573
|
wait_for: Optional[int] = None,
|
|
573
574
|
mobile: Optional[bool] = None,
|
|
574
|
-
parsers: Optional[List[str]] = None,
|
|
575
|
+
parsers: Optional[Union[List[str], List[Union[str, PDFParser]]]] = None,
|
|
575
576
|
actions: Optional[List[Union['WaitAction', 'ScreenshotAction', 'ClickAction', 'WriteAction', 'PressAction', 'ScrollAction', 'ScrapeAction', 'ExecuteJavascriptAction', 'PDFAction']]] = None,
|
|
576
577
|
location: Optional['Location'] = None,
|
|
577
578
|
skip_tls_verification: Optional[bool] = None,
|
|
@@ -759,7 +760,7 @@ class FirecrawlClient:
|
|
|
759
760
|
timeout: Optional[int] = None,
|
|
760
761
|
wait_for: Optional[int] = None,
|
|
761
762
|
mobile: Optional[bool] = None,
|
|
762
|
-
parsers: Optional[List[str]] = None,
|
|
763
|
+
parsers: Optional[Union[List[str], List[Union[str, PDFParser]]]] = None,
|
|
763
764
|
actions: Optional[List[Union['WaitAction', 'ScreenshotAction', 'ClickAction', 'WriteAction', 'PressAction', 'ScrollAction', 'ScrapeAction', 'ExecuteJavascriptAction', 'PDFAction']]] = None,
|
|
764
765
|
location: Optional['Location'] = None,
|
|
765
766
|
skip_tls_verification: Optional[bool] = None,
|
firecrawl/v2/types.py
CHANGED
|
@@ -278,7 +278,7 @@ class ScrapeOptions(BaseModel):
|
|
|
278
278
|
timeout: Optional[int] = None
|
|
279
279
|
wait_for: Optional[int] = None
|
|
280
280
|
mobile: Optional[bool] = None
|
|
281
|
-
parsers: Optional[List[str]] = None
|
|
281
|
+
parsers: Optional[Union[List[str], List[Union[str, 'PDFParser']]]] = None
|
|
282
282
|
actions: Optional[List[Union['WaitAction', 'ScreenshotAction', 'ClickAction', 'WriteAction', 'PressAction', 'ScrollAction', 'ScrapeAction', 'ExecuteJavascriptAction', 'PDFAction']]] = None
|
|
283
283
|
location: Optional['Location'] = None
|
|
284
284
|
skip_tls_verification: Optional[bool] = None
|
|
@@ -536,6 +536,11 @@ class PDFAction(BaseModel):
|
|
|
536
536
|
landscape: Optional[bool] = None
|
|
537
537
|
scale: Optional[float] = None
|
|
538
538
|
|
|
539
|
+
class PDFParser(BaseModel):
|
|
540
|
+
"""PDF parser configuration with optional page limit."""
|
|
541
|
+
type: Literal["pdf"] = "pdf"
|
|
542
|
+
max_pages: Optional[int] = None
|
|
543
|
+
|
|
539
544
|
# Location types
|
|
540
545
|
class Location(BaseModel):
|
|
541
546
|
"""Location configuration for scraping."""
|
|
@@ -594,6 +599,26 @@ class SearchRequest(BaseModel):
|
|
|
594
599
|
|
|
595
600
|
return normalized_categories
|
|
596
601
|
|
|
602
|
+
@field_validator('parsers')
|
|
603
|
+
@classmethod
|
|
604
|
+
def validate_parsers(cls, v):
|
|
605
|
+
"""Validate and normalize parsers input."""
|
|
606
|
+
if v is None:
|
|
607
|
+
return v
|
|
608
|
+
|
|
609
|
+
normalized_parsers = []
|
|
610
|
+
for parser in v:
|
|
611
|
+
if isinstance(parser, str):
|
|
612
|
+
normalized_parsers.append(parser)
|
|
613
|
+
elif isinstance(parser, dict):
|
|
614
|
+
normalized_parsers.append(PDFParser(**parser))
|
|
615
|
+
elif isinstance(parser, PDFParser):
|
|
616
|
+
normalized_parsers.append(parser)
|
|
617
|
+
else:
|
|
618
|
+
raise ValueError(f"Invalid parser format: {parser}")
|
|
619
|
+
|
|
620
|
+
return normalized_parsers
|
|
621
|
+
|
|
597
622
|
class LinkResult(BaseModel):
|
|
598
623
|
"""A generic link result with optional metadata (used by search and map)."""
|
|
599
624
|
url: str
|
|
@@ -686,4 +711,4 @@ AnyResponse = Union[
|
|
|
686
711
|
MapResponse,
|
|
687
712
|
SearchResponse,
|
|
688
713
|
ErrorResponse,
|
|
689
|
-
]
|
|
714
|
+
]
|
firecrawl/v2/utils/validation.py
CHANGED
|
@@ -311,6 +311,20 @@ def prepare_scrape_options(options: Optional[ScrapeOptions]) -> Optional[Dict[st
|
|
|
311
311
|
converted_action[action_key] = action_value
|
|
312
312
|
converted_actions.append(converted_action)
|
|
313
313
|
scrape_data["actions"] = converted_actions
|
|
314
|
+
elif key == "parsers":
|
|
315
|
+
converted_parsers = []
|
|
316
|
+
for parser in value:
|
|
317
|
+
if isinstance(parser, str):
|
|
318
|
+
converted_parsers.append(parser)
|
|
319
|
+
elif isinstance(parser, dict):
|
|
320
|
+
converted_parsers.append(parser)
|
|
321
|
+
else:
|
|
322
|
+
parser_data = parser.model_dump(exclude_none=True)
|
|
323
|
+
# Convert snake_case to camelCase for API
|
|
324
|
+
if "max_pages" in parser_data:
|
|
325
|
+
parser_data["maxPages"] = parser_data.pop("max_pages")
|
|
326
|
+
converted_parsers.append(parser_data)
|
|
327
|
+
scrape_data["parsers"] = converted_parsers
|
|
314
328
|
elif key == "location":
|
|
315
329
|
# Handle location conversion
|
|
316
330
|
if isinstance(value, dict):
|
|
@@ -321,4 +335,4 @@ def prepare_scrape_options(options: Optional[ScrapeOptions]) -> Optional[Dict[st
|
|
|
321
335
|
# For fields that don't need conversion, use as-is
|
|
322
336
|
scrape_data[key] = value
|
|
323
337
|
|
|
324
|
-
return scrape_data
|
|
338
|
+
return scrape_data
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
firecrawl/__init__.py,sha256=
|
|
1
|
+
firecrawl/__init__.py,sha256=BT5Sx5lBRhzEEFNivjIhZaBA8kAeZCPFixbO9n8Myxw,2192
|
|
2
2
|
firecrawl/client.py,sha256=tp3mUo_3aGPuZ53kpU4bhM-5EtwD_IUWrJ7wm0GMuCc,11159
|
|
3
3
|
firecrawl/firecrawl.backup.py,sha256=v1FEN3jR4g5Aupg4xp6SLkuFvYMQuUKND2YELbYjE6c,200430
|
|
4
4
|
firecrawl/types.py,sha256=W9N2pqQuevEIIjYHN9rbDf31E-nwdCECqIn11Foz2T8,2836
|
|
@@ -44,9 +44,9 @@ firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py,sha256=87w47n0iOihtu4jTR4
|
|
|
44
44
|
firecrawl/v1/__init__.py,sha256=aP1oisPeZVGGZynvENc07JySMOZfv_4zAlxQ0ecMJXA,481
|
|
45
45
|
firecrawl/v1/client.py,sha256=sydurfEFTsXyowyaGryA1lkPxN_r9Nf6iQpM43OwJyM,201672
|
|
46
46
|
firecrawl/v2/__init__.py,sha256=Jc6a8tBjYG5OPkjDM5pl-notyys-7DEj7PLEfepv3fc,137
|
|
47
|
-
firecrawl/v2/client.py,sha256=
|
|
47
|
+
firecrawl/v2/client.py,sha256=AMAHQ8Uz9bsEIy2vmIDNNUIT0FOivhLyj6lesEr1Rbg,31260
|
|
48
48
|
firecrawl/v2/client_async.py,sha256=XyzojIJlWatBGlAMish22H-XHkkH9zHsD6MGtAdtFg8,10487
|
|
49
|
-
firecrawl/v2/types.py,sha256=
|
|
49
|
+
firecrawl/v2/types.py,sha256=YnbEmskB4eyfSIBDQK_Kv6xNjID3McagGEoIUiIIbL8,23840
|
|
50
50
|
firecrawl/v2/watcher.py,sha256=FOU71tqSKxgeuGycu4ye0SLc2dw7clIcoQjPsi-4Csc,14229
|
|
51
51
|
firecrawl/v2/watcher_async.py,sha256=AVjW2mgABniolSsauK4u0FW8ya6WzRUdyEg2R-8vGCw,10278
|
|
52
52
|
firecrawl/v2/methods/batch.py,sha256=jFSIPtvulUrPz3Y3zT1gDNwYEf8Botpfh4GOeYsVYRI,14852
|
|
@@ -70,11 +70,11 @@ firecrawl/v2/utils/get_version.py,sha256=0CxW_41q2hlzIxEWOivUCaYw3GFiSIH32RPUMcI
|
|
|
70
70
|
firecrawl/v2/utils/http_client.py,sha256=gUrC1CvU5sj03w27Lbq-3-yH38Yi_OXiI01-piwA83w,6027
|
|
71
71
|
firecrawl/v2/utils/http_client_async.py,sha256=iy89_bk2HS3afSRHZ8016eMCa9Fk-5MFTntcOHfbPgE,1936
|
|
72
72
|
firecrawl/v2/utils/normalize.py,sha256=nlTU6QRghT1YKZzNZlIQj4STSRuSUGrS9cCErZIcY5w,3636
|
|
73
|
-
firecrawl/v2/utils/validation.py,sha256=
|
|
74
|
-
firecrawl-4.
|
|
73
|
+
firecrawl/v2/utils/validation.py,sha256=qWWiWaVcvODmVxf9rxIVy1j_dyuJCvdMMUoYhvWUEIU,15269
|
|
74
|
+
firecrawl-4.1.0.dist-info/licenses/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
|
|
75
75
|
tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
|
|
76
76
|
tests/test_timeout_conversion.py,sha256=PWlIEMASQNhu4cp1OW_ebklnE9NCiigPnEFCtI5N3w0,3996
|
|
77
|
-
firecrawl-4.
|
|
78
|
-
firecrawl-4.
|
|
79
|
-
firecrawl-4.
|
|
80
|
-
firecrawl-4.
|
|
77
|
+
firecrawl-4.1.0.dist-info/METADATA,sha256=IFmGv9ChnRIoKksvP9RebpAcqm8c5-SHZhN8LrF04l4,7392
|
|
78
|
+
firecrawl-4.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
79
|
+
firecrawl-4.1.0.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
|
|
80
|
+
firecrawl-4.1.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|