firecrawl-py 3.3.1__py3-none-any.whl → 3.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl-py might be problematic. Click here for more details.

Files changed (84) hide show
  1. firecrawl/__init__.py +1 -1
  2. firecrawl/__tests__/e2e/v2/test_scrape.py +37 -1
  3. firecrawl/client.py +8 -4
  4. firecrawl/v2/types.py +19 -2
  5. {firecrawl_py-3.3.1.dist-info → firecrawl_py-3.3.3.dist-info}/METADATA +7 -3
  6. firecrawl_py-3.3.3.dist-info/RECORD +79 -0
  7. {firecrawl_py-3.3.1.dist-info → firecrawl_py-3.3.3.dist-info}/WHEEL +1 -1
  8. {firecrawl_py-3.3.1.dist-info → firecrawl_py-3.3.3.dist-info/licenses}/LICENSE +0 -0
  9. {firecrawl_py-3.3.1.dist-info → firecrawl_py-3.3.3.dist-info}/top_level.txt +0 -2
  10. build/lib/firecrawl/__init__.py +0 -87
  11. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +0 -79
  12. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +0 -188
  13. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +0 -38
  14. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +0 -40
  15. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +0 -137
  16. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +0 -248
  17. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +0 -35
  18. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +0 -43
  19. build/lib/firecrawl/__tests__/e2e/v2/conftest.py +0 -73
  20. build/lib/firecrawl/__tests__/e2e/v2/test_async.py +0 -73
  21. build/lib/firecrawl/__tests__/e2e/v2/test_batch_scrape.py +0 -105
  22. build/lib/firecrawl/__tests__/e2e/v2/test_crawl.py +0 -276
  23. build/lib/firecrawl/__tests__/e2e/v2/test_extract.py +0 -54
  24. build/lib/firecrawl/__tests__/e2e/v2/test_map.py +0 -60
  25. build/lib/firecrawl/__tests__/e2e/v2/test_scrape.py +0 -154
  26. build/lib/firecrawl/__tests__/e2e/v2/test_search.py +0 -269
  27. build/lib/firecrawl/__tests__/e2e/v2/test_usage.py +0 -26
  28. build/lib/firecrawl/__tests__/e2e/v2/test_watcher.py +0 -65
  29. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +0 -12
  30. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +0 -61
  31. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +0 -12
  32. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +0 -19
  33. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +0 -50
  34. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +0 -63
  35. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +0 -28
  36. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +0 -117
  37. build/lib/firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +0 -90
  38. build/lib/firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +0 -70
  39. build/lib/firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +0 -240
  40. build/lib/firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +0 -107
  41. build/lib/firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +0 -53
  42. build/lib/firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +0 -92
  43. build/lib/firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +0 -167
  44. build/lib/firecrawl/__tests__/unit/v2/methods/test_search_validation.py +0 -236
  45. build/lib/firecrawl/__tests__/unit/v2/methods/test_usage_types.py +0 -18
  46. build/lib/firecrawl/__tests__/unit/v2/methods/test_webhook.py +0 -123
  47. build/lib/firecrawl/__tests__/unit/v2/utils/test_validation.py +0 -290
  48. build/lib/firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +0 -332
  49. build/lib/firecrawl/client.py +0 -242
  50. build/lib/firecrawl/firecrawl.backup.py +0 -4635
  51. build/lib/firecrawl/types.py +0 -161
  52. build/lib/firecrawl/v1/__init__.py +0 -14
  53. build/lib/firecrawl/v1/client.py +0 -4653
  54. build/lib/firecrawl/v2/__init__.py +0 -4
  55. build/lib/firecrawl/v2/client.py +0 -805
  56. build/lib/firecrawl/v2/client_async.py +0 -250
  57. build/lib/firecrawl/v2/methods/aio/__init__.py +0 -1
  58. build/lib/firecrawl/v2/methods/aio/batch.py +0 -85
  59. build/lib/firecrawl/v2/methods/aio/crawl.py +0 -171
  60. build/lib/firecrawl/v2/methods/aio/extract.py +0 -126
  61. build/lib/firecrawl/v2/methods/aio/map.py +0 -59
  62. build/lib/firecrawl/v2/methods/aio/scrape.py +0 -33
  63. build/lib/firecrawl/v2/methods/aio/search.py +0 -172
  64. build/lib/firecrawl/v2/methods/aio/usage.py +0 -42
  65. build/lib/firecrawl/v2/methods/batch.py +0 -417
  66. build/lib/firecrawl/v2/methods/crawl.py +0 -469
  67. build/lib/firecrawl/v2/methods/extract.py +0 -131
  68. build/lib/firecrawl/v2/methods/map.py +0 -77
  69. build/lib/firecrawl/v2/methods/scrape.py +0 -64
  70. build/lib/firecrawl/v2/methods/search.py +0 -197
  71. build/lib/firecrawl/v2/methods/usage.py +0 -41
  72. build/lib/firecrawl/v2/types.py +0 -665
  73. build/lib/firecrawl/v2/utils/__init__.py +0 -9
  74. build/lib/firecrawl/v2/utils/error_handler.py +0 -107
  75. build/lib/firecrawl/v2/utils/get_version.py +0 -15
  76. build/lib/firecrawl/v2/utils/http_client.py +0 -153
  77. build/lib/firecrawl/v2/utils/http_client_async.py +0 -65
  78. build/lib/firecrawl/v2/utils/normalize.py +0 -107
  79. build/lib/firecrawl/v2/utils/validation.py +0 -324
  80. build/lib/firecrawl/v2/watcher.py +0 -301
  81. build/lib/firecrawl/v2/watcher_async.py +0 -242
  82. build/lib/tests/test_change_tracking.py +0 -98
  83. build/lib/tests/test_timeout_conversion.py +0 -117
  84. firecrawl_py-3.3.1.dist-info/RECORD +0 -153
@@ -1,59 +0,0 @@
1
- from typing import Optional, Dict, Any
2
- from ...types import MapOptions, MapData, LinkResult
3
- from ...utils.http_client_async import AsyncHttpClient
4
- from ...utils.error_handler import handle_response_error
5
-
6
-
7
- def _prepare_map_request(url: str, options: Optional[MapOptions] = None) -> Dict[str, Any]:
8
- if not url or not url.strip():
9
- raise ValueError("URL cannot be empty")
10
- payload: Dict[str, Any] = {"url": url.strip()}
11
- if options is not None:
12
- data: Dict[str, Any] = {}
13
- if getattr(options, "sitemap", None) is not None:
14
- data["sitemap"] = options.sitemap
15
- if options.search is not None:
16
- data["search"] = options.search
17
- if options.include_subdomains is not None:
18
- data["includeSubdomains"] = options.include_subdomains
19
- if options.limit is not None:
20
- data["limit"] = options.limit
21
- if options.timeout is not None:
22
- data["timeout"] = options.timeout
23
- payload.update(data)
24
- return payload
25
-
26
-
27
- async def map(client: AsyncHttpClient, url: str, options: Optional[MapOptions] = None) -> MapData:
28
- request_data = _prepare_map_request(url, options)
29
- response = await client.post("/v2/map", request_data)
30
- if response.status_code >= 400:
31
- handle_response_error(response, "map")
32
- body = response.json()
33
- if not body.get("success"):
34
- raise Exception(body.get("error", "Unknown error occurred"))
35
-
36
-
37
- # data = body.get("data", {})
38
- # result_links: list[LinkResult] = []
39
- # for item in data.get("links", []):
40
- # if isinstance(item, dict):
41
- # result_links.append(
42
- # LinkResult(
43
- # url=item.get("url", ""),
44
- # title=item.get("title"),
45
- # description=item.get("description"),
46
- # )
47
- # )
48
- # elif isinstance(item, str):
49
- # result_links.append(LinkResult(url=item))
50
-
51
- result_links: list[LinkResult] = []
52
- for item in body.get("links", []):
53
- if isinstance(item, dict):
54
- result_links.append(LinkResult(url=item.get("url", ""), title=item.get("title"), description=item.get("description")))
55
- elif isinstance(item, str):
56
- result_links.append(LinkResult(url=item))
57
-
58
- return MapData(links=result_links)
59
-
@@ -1,33 +0,0 @@
1
- from typing import Optional, Dict, Any
2
- from ...types import ScrapeOptions, Document
3
- from ...utils.normalize import normalize_document_input
4
- from ...utils.error_handler import handle_response_error
5
- from ...utils.validation import prepare_scrape_options, validate_scrape_options
6
- from ...utils.http_client_async import AsyncHttpClient
7
-
8
-
9
- async def _prepare_scrape_request(url: str, options: Optional[ScrapeOptions] = None) -> Dict[str, Any]:
10
- if not url or not url.strip():
11
- raise ValueError("URL cannot be empty")
12
- payload: Dict[str, Any] = {"url": url.strip()}
13
- if options is not None:
14
- validated = validate_scrape_options(options)
15
- if validated is not None:
16
- opts = prepare_scrape_options(validated)
17
- if opts:
18
- payload.update(opts)
19
- return payload
20
-
21
-
22
- async def scrape(client: AsyncHttpClient, url: str, options: Optional[ScrapeOptions] = None) -> Document:
23
- payload = await _prepare_scrape_request(url, options)
24
- response = await client.post("/v2/scrape", payload)
25
- if response.status_code >= 400:
26
- handle_response_error(response, "scrape")
27
- body = response.json()
28
- if not body.get("success"):
29
- raise Exception(body.get("error", "Unknown error occurred"))
30
- document_data = body.get("data", {})
31
- normalized = normalize_document_input(document_data)
32
- return Document(**normalized)
33
-
@@ -1,172 +0,0 @@
1
- import re
2
- from typing import Dict, Any, Union, List, TypeVar, Type
3
- from ...types import (
4
- SearchRequest,
5
- SearchData,
6
- Document,
7
- SearchResultWeb,
8
- SearchResultNews,
9
- SearchResultImages,
10
- )
11
- from ...utils.http_client_async import AsyncHttpClient
12
- from ...utils.error_handler import handle_response_error
13
- from ...utils.validation import validate_scrape_options, prepare_scrape_options
14
-
15
- T = TypeVar("T")
16
-
17
- async def search(
18
- client: AsyncHttpClient,
19
- request: SearchRequest
20
- ) -> SearchData:
21
- """
22
- Async search for documents.
23
-
24
- Args:
25
- client: Async HTTP client instance
26
- request: Search request
27
-
28
- Returns:
29
- SearchData with search results grouped by source type
30
-
31
- Raises:
32
- FirecrawlError: If the search operation fails
33
- """
34
- request_data = _prepare_search_request(request)
35
- try:
36
- response = await client.post("/v2/search", request_data)
37
- if response.status_code != 200:
38
- handle_response_error(response, "search")
39
- response_data = response.json()
40
- if not response_data.get("success"):
41
- handle_response_error(response, "search")
42
- data = response_data.get("data", {}) or {}
43
- out = SearchData()
44
- if "web" in data:
45
- out.web = _transform_array(data["web"], SearchResultWeb)
46
- if "news" in data:
47
- out.news = _transform_array(data["news"], SearchResultNews)
48
- if "images" in data:
49
- out.images = _transform_array(data["images"], SearchResultImages)
50
- return out
51
- except Exception as err:
52
- if hasattr(err, "response"):
53
- handle_response_error(getattr(err, "response"), "search")
54
- raise err
55
-
56
- def _transform_array(arr: List[Any], result_type: Type[T]) -> List[Union[T, Document]]:
57
- """
58
- Transforms an array of items into a list of result_type or Document.
59
- If the item dict contains any of the special keys, it is treated as a Document.
60
- Otherwise, it is treated as result_type.
61
- If the item is not a dict, it is wrapped as result_type with url=item.
62
- """
63
- results: List[Union[T, Document]] = []
64
- for item in arr:
65
- if item and isinstance(item, dict):
66
- if (
67
- "markdown" in item or
68
- "html" in item or
69
- "rawHtml" in item or
70
- "links" in item or
71
- "screenshot" in item or
72
- "changeTracking" in item or
73
- "summary" in item or
74
- "json" in item
75
- ):
76
- results.append(Document(**item))
77
- else:
78
- results.append(result_type(**item))
79
- else:
80
- results.append(result_type(url=item))
81
- return results
82
-
83
- def _validate_search_request(request: SearchRequest) -> SearchRequest:
84
- """
85
- Validate and normalize search request.
86
-
87
- Args:
88
- request: Search request to validate
89
-
90
- Returns:
91
- Validated request
92
-
93
- Raises:
94
- ValueError: If request is invalid
95
- """
96
- if not request.query or not request.query.strip():
97
- raise ValueError("Query cannot be empty")
98
-
99
- if request.limit is not None:
100
- if request.limit <= 0:
101
- raise ValueError("Limit must be positive")
102
- if request.limit > 100:
103
- raise ValueError("Limit cannot exceed 100")
104
-
105
- if request.timeout is not None:
106
- if request.timeout <= 0:
107
- raise ValueError("Timeout must be positive")
108
- if request.timeout > 300000:
109
- raise ValueError("Timeout cannot exceed 300000ms (5 minutes)")
110
-
111
- if request.sources is not None:
112
- valid_sources = {"web", "news", "images"}
113
- for source in request.sources:
114
- if isinstance(source, str):
115
- if source not in valid_sources:
116
- raise ValueError(f"Invalid source type: {source}. Valid types: {valid_sources}")
117
- elif hasattr(source, 'type'):
118
- if source.type not in valid_sources:
119
- raise ValueError(f"Invalid source type: {source.type}. Valid types: {valid_sources}")
120
-
121
- if request.location is not None:
122
- if not isinstance(request.location, str) or len(request.location.strip()) == 0:
123
- raise ValueError("Location must be a non-empty string")
124
-
125
- if request.tbs is not None:
126
- valid_tbs_values = {
127
- "qdr:h", "qdr:d", "qdr:w", "qdr:m", "qdr:y",
128
- "d", "w", "m", "y"
129
- }
130
- if request.tbs in valid_tbs_values:
131
- pass
132
- elif request.tbs.startswith("cdr:"):
133
- custom_date_pattern = r"^cdr:1,cd_min:\d{1,2}/\d{1,2}/\d{4},cd_max:\d{1,2}/\d{1,2}/\d{4}$"
134
- if not re.match(custom_date_pattern, request.tbs):
135
- raise ValueError(f"Invalid custom date range format: {request.tbs}. Expected format: cdr:1,cd_min:MM/DD/YYYY,cd_max:MM/DD/YYYY")
136
- else:
137
- raise ValueError(f"Invalid tbs value: {request.tbs}. Valid values: {valid_tbs_values} or custom date range format: cdr:1,cd_min:MM/DD/YYYY,cd_max:MM/DD/YYYY")
138
-
139
- if request.scrape_options is not None:
140
- validate_scrape_options(request.scrape_options)
141
-
142
- return request
143
-
144
- def _prepare_search_request(request: SearchRequest) -> Dict[str, Any]:
145
- """
146
- Prepare a search request payload.
147
-
148
- Args:
149
- request: Search request
150
-
151
- Returns:
152
- Request payload dictionary
153
- """
154
- validated_request = _validate_search_request(request)
155
- data = validated_request.model_dump(exclude_none=True, by_alias=True)
156
-
157
- if "limit" not in data and validated_request.limit is not None:
158
- data["limit"] = validated_request.limit
159
- if "timeout" not in data and validated_request.timeout is not None:
160
- data["timeout"] = validated_request.timeout
161
-
162
- if validated_request.ignore_invalid_urls is not None:
163
- data["ignoreInvalidURLs"] = validated_request.ignore_invalid_urls
164
- data.pop("ignore_invalid_urls", None)
165
-
166
- if validated_request.scrape_options is not None:
167
- scrape_data = prepare_scrape_options(validated_request.scrape_options)
168
- if scrape_data:
169
- data["scrapeOptions"] = scrape_data
170
- data.pop("scrape_options", None)
171
-
172
- return data
@@ -1,42 +0,0 @@
1
- from ...utils.http_client_async import AsyncHttpClient
2
- from ...utils.error_handler import handle_response_error
3
- from ...types import ConcurrencyCheck, CreditUsage, TokenUsage
4
-
5
-
6
- async def get_concurrency(client: AsyncHttpClient) -> ConcurrencyCheck:
7
- resp = await client.get("/v2/concurrency-check")
8
- if resp.status_code >= 400:
9
- handle_response_error(resp, "get concurrency")
10
- body = resp.json()
11
- if not body.get("success"):
12
- raise Exception(body.get("error", "Unknown error"))
13
- data = body.get("data", body)
14
- return ConcurrencyCheck(
15
- concurrency=data.get("concurrency"),
16
- max_concurrency=data.get("maxConcurrency", data.get("max_concurrency")),
17
- )
18
-
19
-
20
- async def get_credit_usage(client: AsyncHttpClient) -> CreditUsage:
21
- resp = await client.get("/v2/team/credit-usage")
22
- if resp.status_code >= 400:
23
- handle_response_error(resp, "get credit usage")
24
- body = resp.json()
25
- if not body.get("success"):
26
- raise Exception(body.get("error", "Unknown error"))
27
- data = body.get("data", body)
28
- return CreditUsage(remaining_credits=data.get("remainingCredits", data.get("remaining_credits", 0)))
29
-
30
-
31
- async def get_token_usage(client: AsyncHttpClient) -> TokenUsage:
32
- resp = await client.get("/v2/team/token-usage")
33
- if resp.status_code >= 400:
34
- handle_response_error(resp, "get token usage")
35
- body = resp.json()
36
- if not body.get("success"):
37
- raise Exception(body.get("error", "Unknown error"))
38
- data = body.get("data", body)
39
- return TokenUsage(
40
- remaining_tokens=data.get("remainingTokens", 0)
41
- )
42
-