firecrawl 3.2.0__py3-none-any.whl → 3.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

firecrawl/__init__.py CHANGED
@@ -17,7 +17,7 @@ from .v1 import (
17
17
  V1ChangeTrackingOptions,
18
18
  )
19
19
 
20
- __version__ = "3.2.0"
20
+ __version__ = "3.3.0"
21
21
 
22
22
  # Define the logger for the Firecrawl project
23
23
  logger: logging.Logger = logging.getLogger("firecrawl")
@@ -14,7 +14,7 @@ class TestAsyncCrawlRequestPreparation:
14
14
  include_paths=["/docs/*"],
15
15
  exclude_paths=["/admin/*"],
16
16
  max_discovery_depth=2,
17
- ignore_sitemap=True,
17
+ sitemap="skip",
18
18
  ignore_query_parameters=True,
19
19
  crawl_entire_domain=True,
20
20
  allow_external_links=False,
@@ -26,7 +26,7 @@ class TestAsyncCrawlRequestPreparation:
26
26
  assert payload["includePaths"] == ["/docs/*"]
27
27
  assert payload["excludePaths"] == ["/admin/*"]
28
28
  assert payload["maxDiscoveryDepth"] == 2
29
- assert payload["ignoreSitemap"] is True
29
+ assert payload["sitemap"] == "skip"
30
30
  assert payload["ignoreQueryParameters"] is True
31
31
  assert payload["crawlEntireDomain"] is True
32
32
  assert payload["allowExternalLinks"] is False
@@ -24,7 +24,7 @@ class TestCrawlRequestPreparation:
24
24
  url="https://example.com",
25
25
  limit=10,
26
26
  max_discovery_depth=3,
27
- ignore_sitemap=True,
27
+ sitemap="skip",
28
28
  crawl_entire_domain=False,
29
29
  allow_external_links=True
30
30
  )
@@ -39,8 +39,8 @@ class TestCrawlRequestPreparation:
39
39
  assert data["limit"] == 10
40
40
  assert "maxDiscoveryDepth" in data
41
41
  assert data["maxDiscoveryDepth"] == 3
42
- assert "ignoreSitemap" in data
43
- assert data["ignoreSitemap"] is True
42
+ assert "sitemap" in data
43
+ assert data["sitemap"] == "skip"
44
44
  assert "crawlEntireDomain" in data
45
45
  assert data["crawlEntireDomain"] is False
46
46
  assert "allowExternalLinks" in data
@@ -106,7 +106,7 @@ class TestCrawlRequestPreparation:
106
106
  include_paths=["/blog/*", "/docs/*"],
107
107
  exclude_paths=["/admin/*"],
108
108
  max_discovery_depth=3,
109
- ignore_sitemap=False,
109
+ sitemap="include",
110
110
  limit=100,
111
111
  crawl_entire_domain=True,
112
112
  allow_external_links=False,
@@ -126,8 +126,8 @@ class TestCrawlRequestPreparation:
126
126
  assert data["excludePaths"] == ["/admin/*"]
127
127
  assert "maxDiscoveryDepth" in data
128
128
  assert data["maxDiscoveryDepth"] == 3
129
- assert "ignoreSitemap" in data
130
- assert data["ignoreSitemap"] is False
129
+ assert "sitemap" in data
130
+ assert data["sitemap"] == "include"
131
131
  assert "limit" in data
132
132
  assert data["limit"] == 100
133
133
  assert "crawlEntireDomain" in data
@@ -121,6 +121,17 @@ def _validate_search_request(request: SearchRequest) -> SearchRequest:
121
121
  if source.type not in valid_sources:
122
122
  raise ValueError(f"Invalid source type: {source.type}. Valid types: {valid_sources}")
123
123
 
124
+ # Validate categories (if provided)
125
+ if request.categories is not None:
126
+ valid_categories = {"github", "research"}
127
+ for category in request.categories:
128
+ if isinstance(category, str):
129
+ if category not in valid_categories:
130
+ raise ValueError(f"Invalid category type: {category}. Valid types: {valid_categories}")
131
+ elif hasattr(category, 'type'):
132
+ if category.type not in valid_categories:
133
+ raise ValueError(f"Invalid category type: {category.type}. Valid types: {valid_categories}")
134
+
124
135
  # Validate location (if provided)
125
136
  if request.location is not None:
126
137
  if not isinstance(request.location, str) or len(request.location.strip()) == 0:
firecrawl/v2/types.py CHANGED
@@ -174,6 +174,12 @@ class Source(BaseModel):
174
174
 
175
175
  SourceOption = Union[str, Source]
176
176
 
177
+ class Category(BaseModel):
178
+ """Configuration for a search category."""
179
+ type: str
180
+
181
+ CategoryOption = Union[str, Category]
182
+
177
183
  FormatString = Literal[
178
184
  # camelCase versions (API format)
179
185
  "markdown", "html", "rawHtml", "links", "screenshot", "summary", "changeTracking", "json",
@@ -331,7 +337,8 @@ class SearchResultWeb(BaseModel):
331
337
  """A web search result with URL, title, and description."""
332
338
  url: str
333
339
  title: Optional[str] = None
334
- description: Optional[str] = None
340
+ description: Optional[str] = None
341
+ category: Optional[str] = None
335
342
 
336
343
  class SearchResultNews(BaseModel):
337
344
  """A news search result with URL, title, snippet, date, image URL, and position."""
@@ -341,6 +348,7 @@ class SearchResultNews(BaseModel):
341
348
  date: Optional[str] = None
342
349
  image_url: Optional[str] = None
343
350
  position: Optional[int] = None
351
+ category: Optional[str] = None
344
352
 
345
353
  class SearchResultImages(BaseModel):
346
354
  """An image search result with URL, title, image URL, image width, image height, and position."""
@@ -521,6 +529,7 @@ class SearchRequest(BaseModel):
521
529
  """Request for search operations."""
522
530
  query: str
523
531
  sources: Optional[List[SourceOption]] = None
532
+ categories: Optional[List[CategoryOption]] = None
524
533
  limit: Optional[int] = 5
525
534
  tbs: Optional[str] = None
526
535
  location: Optional[str] = None
@@ -547,6 +556,26 @@ class SearchRequest(BaseModel):
547
556
  raise ValueError(f"Invalid source format: {source}")
548
557
 
549
558
  return normalized_sources
559
+
560
+ @field_validator('categories')
561
+ @classmethod
562
+ def validate_categories(cls, v):
563
+ """Validate and normalize categories input."""
564
+ if v is None:
565
+ return v
566
+
567
+ normalized_categories = []
568
+ for category in v:
569
+ if isinstance(category, str):
570
+ normalized_categories.append(Category(type=category))
571
+ elif isinstance(category, dict):
572
+ normalized_categories.append(Category(**category))
573
+ elif isinstance(category, Category):
574
+ normalized_categories.append(category)
575
+ else:
576
+ raise ValueError(f"Invalid category format: {category}")
577
+
578
+ return normalized_categories
550
579
 
551
580
  class LinkResult(BaseModel):
552
581
  """A generic link result with optional metadata (used by search and map)."""
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.4
1
+ Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 3.2.0
3
+ Version: 3.3.0
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/firecrawl/firecrawl
6
6
  Author: Mendable.ai
@@ -38,12 +38,8 @@ Requires-Dist: httpx
38
38
  Requires-Dist: python-dotenv
39
39
  Requires-Dist: websockets
40
40
  Requires-Dist: nest-asyncio
41
- Requires-Dist: pydantic
41
+ Requires-Dist: pydantic (>=2.0)
42
42
  Requires-Dist: aiohttp
43
- Dynamic: author
44
- Dynamic: home-page
45
- Dynamic: license-file
46
- Dynamic: requires-python
47
43
 
48
44
  # Firecrawl Python SDK
49
45
 
@@ -1,4 +1,4 @@
1
- firecrawl/__init__.py,sha256=ork6Ayurc2D3XvO14vevIf--UPrptN81Ldxytzxa8ho,2192
1
+ firecrawl/__init__.py,sha256=B7SJY_289oeNIVIPxklkrqFQQURmszzwEqFjYXDfd20,2192
2
2
  firecrawl/client.py,sha256=2BGIRTiW2eR6q3wu_g2s3VTQtrHYauoDeNF1YklQpHo,11089
3
3
  firecrawl/firecrawl.backup.py,sha256=v1FEN3jR4g5Aupg4xp6SLkuFvYMQuUKND2YELbYjE6c,200430
4
4
  firecrawl/types.py,sha256=W9N2pqQuevEIIjYHN9rbDf31E-nwdCECqIn11Foz2T8,2836
@@ -22,7 +22,7 @@ firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py,sha256=Dh9BVo48NKSZOKgLbO7n8fpM
22
22
  firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py,sha256=hwES4Nu5c0hniZ9heIPDfvh_2JmJ2wPoX9ULTZ0Asjs,1471
23
23
  firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py,sha256=HeOxN-sPYSssytcIRAEicJSZsFt_Oa5qGXAtdumR54c,4040
24
24
  firecrawl/__tests__/unit/v2/methods/test_crawl_params.py,sha256=p9hzg14uAs1iHKXPDSXhGU6hEzPBF_Ae34RAf5XYa10,2387
25
- firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py,sha256=9DbLkBg6tuMyg7ASGX_oaQmAy0VCV4oITCOrfeR2UkY,8806
25
+ firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py,sha256=PEKbooNXfQwPpvcPHXABJnveztgAA-RFBhtlSs8uPro,8780
26
26
  firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py,sha256=kErOmHSD01eMjXiMd4rgsMVGd_aU2G9uVymBjbAFoGw,3918
27
27
  firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py,sha256=toVcgnMp_cFeYsIUuyKGEWZGp0nAAkzaeFGUbY0zY0o,1868
28
28
  firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py,sha256=wDOslsA5BN4kyezlaT5GeMv_Ifn8f461EaA7i5ujnaQ,3482
@@ -31,7 +31,7 @@ firecrawl/__tests__/unit/v2/methods/test_search_validation.py,sha256=7UGcNHpQzCp
31
31
  firecrawl/__tests__/unit/v2/methods/test_usage_types.py,sha256=cCHHfa6agSjD0brQ9rcAcw2kaI9riUH5C0dXV-fqktg,591
32
32
  firecrawl/__tests__/unit/v2/methods/test_webhook.py,sha256=AvvW-bKpUA--Lvtif2bmUIp-AxiaMJ29ie1i9dk8WbI,4586
33
33
  firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py,sha256=9azJxVvDOBqUevLp-wBF9gF7Ptj-7nN6LOkPQncFX2M,456
34
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py,sha256=vFbitMhH92JS5AtU78KQPi6kbT2fv68i9-rBrY5hVss,2574
34
+ firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py,sha256=RkIKt7uxBzVhAkrLQwXYjmC-9sj32SUNQrJZgF2WEMs,2565
35
35
  firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py,sha256=WMgltdrrT2HOflqGyahC4v-Wb29_8sypN0hwS9lYXe8,403
36
36
  firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py,sha256=PdUJrR0JLWqrithAnRXwuRrnsIN2h_DTu6-xvTOn_UU,725
37
37
  firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py,sha256=A5DT4wpH4vrIPvFxKVHrtDH5A3bgJ_ad4fmVQ8LN1t0,1993
@@ -45,7 +45,7 @@ firecrawl/v1/client.py,sha256=sydurfEFTsXyowyaGryA1lkPxN_r9Nf6iQpM43OwJyM,201672
45
45
  firecrawl/v2/__init__.py,sha256=Jc6a8tBjYG5OPkjDM5pl-notyys-7DEj7PLEfepv3fc,137
46
46
  firecrawl/v2/client.py,sha256=P6WAzwYGLLIANTrqAM-K4EUdGWQoFsi-zCjBibbxKQw,30507
47
47
  firecrawl/v2/client_async.py,sha256=zwxHis1bSh0tSF1480ze-4XDQEDJ5yDur1ZqtL94dwc,10127
48
- firecrawl/v2/types.py,sha256=bbHXPWJp6Kvjx9rKkTPyWZwdqVTErS4VYZKfHsb7ZQc,21137
48
+ firecrawl/v2/types.py,sha256=rBdTaTQmb1SmdR8O0GvA_gCfBG-QCtBOrMsFgA9Usms,22114
49
49
  firecrawl/v2/watcher.py,sha256=FOU71tqSKxgeuGycu4ye0SLc2dw7clIcoQjPsi-4Csc,14229
50
50
  firecrawl/v2/watcher_async.py,sha256=AVjW2mgABniolSsauK4u0FW8ya6WzRUdyEg2R-8vGCw,10278
51
51
  firecrawl/v2/methods/batch.py,sha256=us7zUGl7u9ZDIEk2J3rNqj87bkaNjXU27SMFW_fdcg8,11932
@@ -53,7 +53,7 @@ firecrawl/v2/methods/crawl.py,sha256=4ZUmanHNuNtq9wbKMAZ3lenuPcNdOaV0kYXqMI5XJJ8
53
53
  firecrawl/v2/methods/extract.py,sha256=-Jr4BtraU3b7hd3JIY73V-S69rUclxyXyUpoQb6DCQk,4274
54
54
  firecrawl/v2/methods/map.py,sha256=4SADb0-lkbdOWDmO6k8_TzK0yRti5xsN40N45nUl9uA,2592
55
55
  firecrawl/v2/methods/scrape.py,sha256=CSHBwC-P91UfrW3zHirjNAs2h899FKcWvd1DY_4fJdo,1921
56
- firecrawl/v2/methods/search.py,sha256=c6tkDQGYZeLsPABPVfzhjalsasnhlien3w80aoe89t0,7077
56
+ firecrawl/v2/methods/search.py,sha256=6BKiQ1aKJjWBKm9BBtKxFKGD74kCKBeMIp_OgjcDFAw,7673
57
57
  firecrawl/v2/methods/usage.py,sha256=OJlkxwaB-AAtgO3WLr9QiqBRmjdh6GVhroCgleegupQ,1460
58
58
  firecrawl/v2/methods/aio/__init__.py,sha256=RocMJnGwnLIvGu3G8ZvY8INkipC7WHZiu2bE31eSyJs,35
59
59
  firecrawl/v2/methods/aio/batch.py,sha256=GS_xsd_Uib1fxFITBK1sH88VGzFMrIcqJVQqOvMQ540,3735
@@ -70,10 +70,10 @@ firecrawl/v2/utils/http_client.py,sha256=_n8mp4xi6GGihg662Lsv6TSlvw9zykyADwEk0fg
70
70
  firecrawl/v2/utils/http_client_async.py,sha256=iy89_bk2HS3afSRHZ8016eMCa9Fk-5MFTntcOHfbPgE,1936
71
71
  firecrawl/v2/utils/normalize.py,sha256=nlTU6QRghT1YKZzNZlIQj4STSRuSUGrS9cCErZIcY5w,3636
72
72
  firecrawl/v2/utils/validation.py,sha256=L8by7z-t6GuMGIYkK7il1BM8d-4_-sAdG9hDMF_LeG4,14518
73
- firecrawl-3.2.0.dist-info/licenses/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
74
73
  tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
75
74
  tests/test_timeout_conversion.py,sha256=PWlIEMASQNhu4cp1OW_ebklnE9NCiigPnEFCtI5N3w0,3996
76
- firecrawl-3.2.0.dist-info/METADATA,sha256=KNaWrAOA_ZRYnzqY-asiVcXstr7n_qeilUO8R2Gz1Os,7387
77
- firecrawl-3.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
78
- firecrawl-3.2.0.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
79
- firecrawl-3.2.0.dist-info/RECORD,,
75
+ firecrawl-3.3.0.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
76
+ firecrawl-3.3.0.dist-info/METADATA,sha256=Yk4-i0M6LdZdXoOQgdXyaclEX695WhU0JjhfdPePg9w,7313
77
+ firecrawl-3.3.0.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
78
+ firecrawl-3.3.0.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
79
+ firecrawl-3.3.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: bdist_wheel (0.38.4)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5