firecrawl 3.2.1__py3-none-any.whl → 3.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of firecrawl might be problematic. Click here for more details.
- firecrawl/__init__.py +1 -1
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +2 -2
- firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +6 -6
- firecrawl/v2/methods/search.py +11 -0
- firecrawl/v2/types.py +30 -1
- {firecrawl-3.2.1.dist-info/licenses → firecrawl-3.3.0.dist-info}/LICENSE +0 -0
- {firecrawl-3.2.1.dist-info → firecrawl-3.3.0.dist-info}/METADATA +3 -7
- {firecrawl-3.2.1.dist-info → firecrawl-3.3.0.dist-info}/RECORD +10 -10
- {firecrawl-3.2.1.dist-info → firecrawl-3.3.0.dist-info}/WHEEL +1 -1
- {firecrawl-3.2.1.dist-info → firecrawl-3.3.0.dist-info}/top_level.txt +0 -0
firecrawl/__init__.py
CHANGED
|
@@ -14,7 +14,7 @@ class TestAsyncCrawlRequestPreparation:
|
|
|
14
14
|
include_paths=["/docs/*"],
|
|
15
15
|
exclude_paths=["/admin/*"],
|
|
16
16
|
max_discovery_depth=2,
|
|
17
|
-
|
|
17
|
+
sitemap="skip",
|
|
18
18
|
ignore_query_parameters=True,
|
|
19
19
|
crawl_entire_domain=True,
|
|
20
20
|
allow_external_links=False,
|
|
@@ -26,7 +26,7 @@ class TestAsyncCrawlRequestPreparation:
|
|
|
26
26
|
assert payload["includePaths"] == ["/docs/*"]
|
|
27
27
|
assert payload["excludePaths"] == ["/admin/*"]
|
|
28
28
|
assert payload["maxDiscoveryDepth"] == 2
|
|
29
|
-
assert payload["
|
|
29
|
+
assert payload["sitemap"] == "skip"
|
|
30
30
|
assert payload["ignoreQueryParameters"] is True
|
|
31
31
|
assert payload["crawlEntireDomain"] is True
|
|
32
32
|
assert payload["allowExternalLinks"] is False
|
|
@@ -24,7 +24,7 @@ class TestCrawlRequestPreparation:
|
|
|
24
24
|
url="https://example.com",
|
|
25
25
|
limit=10,
|
|
26
26
|
max_discovery_depth=3,
|
|
27
|
-
|
|
27
|
+
sitemap="skip",
|
|
28
28
|
crawl_entire_domain=False,
|
|
29
29
|
allow_external_links=True
|
|
30
30
|
)
|
|
@@ -39,8 +39,8 @@ class TestCrawlRequestPreparation:
|
|
|
39
39
|
assert data["limit"] == 10
|
|
40
40
|
assert "maxDiscoveryDepth" in data
|
|
41
41
|
assert data["maxDiscoveryDepth"] == 3
|
|
42
|
-
assert "
|
|
43
|
-
assert data["
|
|
42
|
+
assert "sitemap" in data
|
|
43
|
+
assert data["sitemap"] == "skip"
|
|
44
44
|
assert "crawlEntireDomain" in data
|
|
45
45
|
assert data["crawlEntireDomain"] is False
|
|
46
46
|
assert "allowExternalLinks" in data
|
|
@@ -106,7 +106,7 @@ class TestCrawlRequestPreparation:
|
|
|
106
106
|
include_paths=["/blog/*", "/docs/*"],
|
|
107
107
|
exclude_paths=["/admin/*"],
|
|
108
108
|
max_discovery_depth=3,
|
|
109
|
-
|
|
109
|
+
sitemap="include",
|
|
110
110
|
limit=100,
|
|
111
111
|
crawl_entire_domain=True,
|
|
112
112
|
allow_external_links=False,
|
|
@@ -126,8 +126,8 @@ class TestCrawlRequestPreparation:
|
|
|
126
126
|
assert data["excludePaths"] == ["/admin/*"]
|
|
127
127
|
assert "maxDiscoveryDepth" in data
|
|
128
128
|
assert data["maxDiscoveryDepth"] == 3
|
|
129
|
-
assert "
|
|
130
|
-
assert data["
|
|
129
|
+
assert "sitemap" in data
|
|
130
|
+
assert data["sitemap"] == "include"
|
|
131
131
|
assert "limit" in data
|
|
132
132
|
assert data["limit"] == 100
|
|
133
133
|
assert "crawlEntireDomain" in data
|
firecrawl/v2/methods/search.py
CHANGED
|
@@ -121,6 +121,17 @@ def _validate_search_request(request: SearchRequest) -> SearchRequest:
|
|
|
121
121
|
if source.type not in valid_sources:
|
|
122
122
|
raise ValueError(f"Invalid source type: {source.type}. Valid types: {valid_sources}")
|
|
123
123
|
|
|
124
|
+
# Validate categories (if provided)
|
|
125
|
+
if request.categories is not None:
|
|
126
|
+
valid_categories = {"github", "research"}
|
|
127
|
+
for category in request.categories:
|
|
128
|
+
if isinstance(category, str):
|
|
129
|
+
if category not in valid_categories:
|
|
130
|
+
raise ValueError(f"Invalid category type: {category}. Valid types: {valid_categories}")
|
|
131
|
+
elif hasattr(category, 'type'):
|
|
132
|
+
if category.type not in valid_categories:
|
|
133
|
+
raise ValueError(f"Invalid category type: {category.type}. Valid types: {valid_categories}")
|
|
134
|
+
|
|
124
135
|
# Validate location (if provided)
|
|
125
136
|
if request.location is not None:
|
|
126
137
|
if not isinstance(request.location, str) or len(request.location.strip()) == 0:
|
firecrawl/v2/types.py
CHANGED
|
@@ -174,6 +174,12 @@ class Source(BaseModel):
|
|
|
174
174
|
|
|
175
175
|
SourceOption = Union[str, Source]
|
|
176
176
|
|
|
177
|
+
class Category(BaseModel):
|
|
178
|
+
"""Configuration for a search category."""
|
|
179
|
+
type: str
|
|
180
|
+
|
|
181
|
+
CategoryOption = Union[str, Category]
|
|
182
|
+
|
|
177
183
|
FormatString = Literal[
|
|
178
184
|
# camelCase versions (API format)
|
|
179
185
|
"markdown", "html", "rawHtml", "links", "screenshot", "summary", "changeTracking", "json",
|
|
@@ -331,7 +337,8 @@ class SearchResultWeb(BaseModel):
|
|
|
331
337
|
"""A web search result with URL, title, and description."""
|
|
332
338
|
url: str
|
|
333
339
|
title: Optional[str] = None
|
|
334
|
-
description: Optional[str] = None
|
|
340
|
+
description: Optional[str] = None
|
|
341
|
+
category: Optional[str] = None
|
|
335
342
|
|
|
336
343
|
class SearchResultNews(BaseModel):
|
|
337
344
|
"""A news search result with URL, title, snippet, date, image URL, and position."""
|
|
@@ -341,6 +348,7 @@ class SearchResultNews(BaseModel):
|
|
|
341
348
|
date: Optional[str] = None
|
|
342
349
|
image_url: Optional[str] = None
|
|
343
350
|
position: Optional[int] = None
|
|
351
|
+
category: Optional[str] = None
|
|
344
352
|
|
|
345
353
|
class SearchResultImages(BaseModel):
|
|
346
354
|
"""An image search result with URL, title, image URL, image width, image height, and position."""
|
|
@@ -521,6 +529,7 @@ class SearchRequest(BaseModel):
|
|
|
521
529
|
"""Request for search operations."""
|
|
522
530
|
query: str
|
|
523
531
|
sources: Optional[List[SourceOption]] = None
|
|
532
|
+
categories: Optional[List[CategoryOption]] = None
|
|
524
533
|
limit: Optional[int] = 5
|
|
525
534
|
tbs: Optional[str] = None
|
|
526
535
|
location: Optional[str] = None
|
|
@@ -547,6 +556,26 @@ class SearchRequest(BaseModel):
|
|
|
547
556
|
raise ValueError(f"Invalid source format: {source}")
|
|
548
557
|
|
|
549
558
|
return normalized_sources
|
|
559
|
+
|
|
560
|
+
@field_validator('categories')
|
|
561
|
+
@classmethod
|
|
562
|
+
def validate_categories(cls, v):
|
|
563
|
+
"""Validate and normalize categories input."""
|
|
564
|
+
if v is None:
|
|
565
|
+
return v
|
|
566
|
+
|
|
567
|
+
normalized_categories = []
|
|
568
|
+
for category in v:
|
|
569
|
+
if isinstance(category, str):
|
|
570
|
+
normalized_categories.append(Category(type=category))
|
|
571
|
+
elif isinstance(category, dict):
|
|
572
|
+
normalized_categories.append(Category(**category))
|
|
573
|
+
elif isinstance(category, Category):
|
|
574
|
+
normalized_categories.append(category)
|
|
575
|
+
else:
|
|
576
|
+
raise ValueError(f"Invalid category format: {category}")
|
|
577
|
+
|
|
578
|
+
return normalized_categories
|
|
550
579
|
|
|
551
580
|
class LinkResult(BaseModel):
|
|
552
581
|
"""A generic link result with optional metadata (used by search and map)."""
|
|
File without changes
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
2
|
Name: firecrawl
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.3.0
|
|
4
4
|
Summary: Python SDK for Firecrawl API
|
|
5
5
|
Home-page: https://github.com/firecrawl/firecrawl
|
|
6
6
|
Author: Mendable.ai
|
|
@@ -38,12 +38,8 @@ Requires-Dist: httpx
|
|
|
38
38
|
Requires-Dist: python-dotenv
|
|
39
39
|
Requires-Dist: websockets
|
|
40
40
|
Requires-Dist: nest-asyncio
|
|
41
|
-
Requires-Dist: pydantic>=2.0
|
|
41
|
+
Requires-Dist: pydantic (>=2.0)
|
|
42
42
|
Requires-Dist: aiohttp
|
|
43
|
-
Dynamic: author
|
|
44
|
-
Dynamic: home-page
|
|
45
|
-
Dynamic: license-file
|
|
46
|
-
Dynamic: requires-python
|
|
47
43
|
|
|
48
44
|
# Firecrawl Python SDK
|
|
49
45
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
firecrawl/__init__.py,sha256=
|
|
1
|
+
firecrawl/__init__.py,sha256=B7SJY_289oeNIVIPxklkrqFQQURmszzwEqFjYXDfd20,2192
|
|
2
2
|
firecrawl/client.py,sha256=2BGIRTiW2eR6q3wu_g2s3VTQtrHYauoDeNF1YklQpHo,11089
|
|
3
3
|
firecrawl/firecrawl.backup.py,sha256=v1FEN3jR4g5Aupg4xp6SLkuFvYMQuUKND2YELbYjE6c,200430
|
|
4
4
|
firecrawl/types.py,sha256=W9N2pqQuevEIIjYHN9rbDf31E-nwdCECqIn11Foz2T8,2836
|
|
@@ -22,7 +22,7 @@ firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py,sha256=Dh9BVo48NKSZOKgLbO7n8fpM
|
|
|
22
22
|
firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py,sha256=hwES4Nu5c0hniZ9heIPDfvh_2JmJ2wPoX9ULTZ0Asjs,1471
|
|
23
23
|
firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py,sha256=HeOxN-sPYSssytcIRAEicJSZsFt_Oa5qGXAtdumR54c,4040
|
|
24
24
|
firecrawl/__tests__/unit/v2/methods/test_crawl_params.py,sha256=p9hzg14uAs1iHKXPDSXhGU6hEzPBF_Ae34RAf5XYa10,2387
|
|
25
|
-
firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py,sha256=
|
|
25
|
+
firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py,sha256=PEKbooNXfQwPpvcPHXABJnveztgAA-RFBhtlSs8uPro,8780
|
|
26
26
|
firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py,sha256=kErOmHSD01eMjXiMd4rgsMVGd_aU2G9uVymBjbAFoGw,3918
|
|
27
27
|
firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py,sha256=toVcgnMp_cFeYsIUuyKGEWZGp0nAAkzaeFGUbY0zY0o,1868
|
|
28
28
|
firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py,sha256=wDOslsA5BN4kyezlaT5GeMv_Ifn8f461EaA7i5ujnaQ,3482
|
|
@@ -31,7 +31,7 @@ firecrawl/__tests__/unit/v2/methods/test_search_validation.py,sha256=7UGcNHpQzCp
|
|
|
31
31
|
firecrawl/__tests__/unit/v2/methods/test_usage_types.py,sha256=cCHHfa6agSjD0brQ9rcAcw2kaI9riUH5C0dXV-fqktg,591
|
|
32
32
|
firecrawl/__tests__/unit/v2/methods/test_webhook.py,sha256=AvvW-bKpUA--Lvtif2bmUIp-AxiaMJ29ie1i9dk8WbI,4586
|
|
33
33
|
firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py,sha256=9azJxVvDOBqUevLp-wBF9gF7Ptj-7nN6LOkPQncFX2M,456
|
|
34
|
-
firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py,sha256=
|
|
34
|
+
firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py,sha256=RkIKt7uxBzVhAkrLQwXYjmC-9sj32SUNQrJZgF2WEMs,2565
|
|
35
35
|
firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py,sha256=WMgltdrrT2HOflqGyahC4v-Wb29_8sypN0hwS9lYXe8,403
|
|
36
36
|
firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py,sha256=PdUJrR0JLWqrithAnRXwuRrnsIN2h_DTu6-xvTOn_UU,725
|
|
37
37
|
firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py,sha256=A5DT4wpH4vrIPvFxKVHrtDH5A3bgJ_ad4fmVQ8LN1t0,1993
|
|
@@ -45,7 +45,7 @@ firecrawl/v1/client.py,sha256=sydurfEFTsXyowyaGryA1lkPxN_r9Nf6iQpM43OwJyM,201672
|
|
|
45
45
|
firecrawl/v2/__init__.py,sha256=Jc6a8tBjYG5OPkjDM5pl-notyys-7DEj7PLEfepv3fc,137
|
|
46
46
|
firecrawl/v2/client.py,sha256=P6WAzwYGLLIANTrqAM-K4EUdGWQoFsi-zCjBibbxKQw,30507
|
|
47
47
|
firecrawl/v2/client_async.py,sha256=zwxHis1bSh0tSF1480ze-4XDQEDJ5yDur1ZqtL94dwc,10127
|
|
48
|
-
firecrawl/v2/types.py,sha256=
|
|
48
|
+
firecrawl/v2/types.py,sha256=rBdTaTQmb1SmdR8O0GvA_gCfBG-QCtBOrMsFgA9Usms,22114
|
|
49
49
|
firecrawl/v2/watcher.py,sha256=FOU71tqSKxgeuGycu4ye0SLc2dw7clIcoQjPsi-4Csc,14229
|
|
50
50
|
firecrawl/v2/watcher_async.py,sha256=AVjW2mgABniolSsauK4u0FW8ya6WzRUdyEg2R-8vGCw,10278
|
|
51
51
|
firecrawl/v2/methods/batch.py,sha256=us7zUGl7u9ZDIEk2J3rNqj87bkaNjXU27SMFW_fdcg8,11932
|
|
@@ -53,7 +53,7 @@ firecrawl/v2/methods/crawl.py,sha256=4ZUmanHNuNtq9wbKMAZ3lenuPcNdOaV0kYXqMI5XJJ8
|
|
|
53
53
|
firecrawl/v2/methods/extract.py,sha256=-Jr4BtraU3b7hd3JIY73V-S69rUclxyXyUpoQb6DCQk,4274
|
|
54
54
|
firecrawl/v2/methods/map.py,sha256=4SADb0-lkbdOWDmO6k8_TzK0yRti5xsN40N45nUl9uA,2592
|
|
55
55
|
firecrawl/v2/methods/scrape.py,sha256=CSHBwC-P91UfrW3zHirjNAs2h899FKcWvd1DY_4fJdo,1921
|
|
56
|
-
firecrawl/v2/methods/search.py,sha256=
|
|
56
|
+
firecrawl/v2/methods/search.py,sha256=6BKiQ1aKJjWBKm9BBtKxFKGD74kCKBeMIp_OgjcDFAw,7673
|
|
57
57
|
firecrawl/v2/methods/usage.py,sha256=OJlkxwaB-AAtgO3WLr9QiqBRmjdh6GVhroCgleegupQ,1460
|
|
58
58
|
firecrawl/v2/methods/aio/__init__.py,sha256=RocMJnGwnLIvGu3G8ZvY8INkipC7WHZiu2bE31eSyJs,35
|
|
59
59
|
firecrawl/v2/methods/aio/batch.py,sha256=GS_xsd_Uib1fxFITBK1sH88VGzFMrIcqJVQqOvMQ540,3735
|
|
@@ -70,10 +70,10 @@ firecrawl/v2/utils/http_client.py,sha256=_n8mp4xi6GGihg662Lsv6TSlvw9zykyADwEk0fg
|
|
|
70
70
|
firecrawl/v2/utils/http_client_async.py,sha256=iy89_bk2HS3afSRHZ8016eMCa9Fk-5MFTntcOHfbPgE,1936
|
|
71
71
|
firecrawl/v2/utils/normalize.py,sha256=nlTU6QRghT1YKZzNZlIQj4STSRuSUGrS9cCErZIcY5w,3636
|
|
72
72
|
firecrawl/v2/utils/validation.py,sha256=L8by7z-t6GuMGIYkK7il1BM8d-4_-sAdG9hDMF_LeG4,14518
|
|
73
|
-
firecrawl-3.2.1.dist-info/licenses/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
|
|
74
73
|
tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
|
|
75
74
|
tests/test_timeout_conversion.py,sha256=PWlIEMASQNhu4cp1OW_ebklnE9NCiigPnEFCtI5N3w0,3996
|
|
76
|
-
firecrawl-3.
|
|
77
|
-
firecrawl-3.
|
|
78
|
-
firecrawl-3.
|
|
79
|
-
firecrawl-3.
|
|
75
|
+
firecrawl-3.3.0.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
|
|
76
|
+
firecrawl-3.3.0.dist-info/METADATA,sha256=Yk4-i0M6LdZdXoOQgdXyaclEX695WhU0JjhfdPePg9w,7313
|
|
77
|
+
firecrawl-3.3.0.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
|
78
|
+
firecrawl-3.3.0.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
|
|
79
|
+
firecrawl-3.3.0.dist-info/RECORD,,
|
|
File without changes
|