firecrawl-py 3.3.0__py3-none-any.whl → 3.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl-py might be problematic. Click here for more details.

Files changed (82) hide show
  1. firecrawl/__init__.py +1 -1
  2. firecrawl/v2/client.py +3 -0
  3. {firecrawl_py-3.3.0.dist-info → firecrawl_py-3.3.2.dist-info}/METADATA +1 -1
  4. firecrawl_py-3.3.2.dist-info/RECORD +79 -0
  5. {firecrawl_py-3.3.0.dist-info → firecrawl_py-3.3.2.dist-info}/top_level.txt +0 -2
  6. build/lib/firecrawl/__init__.py +0 -87
  7. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +0 -79
  8. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +0 -188
  9. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +0 -38
  10. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +0 -40
  11. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +0 -137
  12. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +0 -248
  13. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +0 -35
  14. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +0 -43
  15. build/lib/firecrawl/__tests__/e2e/v2/conftest.py +0 -73
  16. build/lib/firecrawl/__tests__/e2e/v2/test_async.py +0 -73
  17. build/lib/firecrawl/__tests__/e2e/v2/test_batch_scrape.py +0 -105
  18. build/lib/firecrawl/__tests__/e2e/v2/test_crawl.py +0 -276
  19. build/lib/firecrawl/__tests__/e2e/v2/test_extract.py +0 -54
  20. build/lib/firecrawl/__tests__/e2e/v2/test_map.py +0 -60
  21. build/lib/firecrawl/__tests__/e2e/v2/test_scrape.py +0 -154
  22. build/lib/firecrawl/__tests__/e2e/v2/test_search.py +0 -269
  23. build/lib/firecrawl/__tests__/e2e/v2/test_usage.py +0 -26
  24. build/lib/firecrawl/__tests__/e2e/v2/test_watcher.py +0 -65
  25. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +0 -12
  26. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +0 -61
  27. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +0 -12
  28. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +0 -19
  29. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +0 -50
  30. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +0 -63
  31. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +0 -28
  32. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +0 -117
  33. build/lib/firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +0 -90
  34. build/lib/firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +0 -70
  35. build/lib/firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +0 -240
  36. build/lib/firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +0 -107
  37. build/lib/firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +0 -53
  38. build/lib/firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +0 -92
  39. build/lib/firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +0 -167
  40. build/lib/firecrawl/__tests__/unit/v2/methods/test_search_validation.py +0 -236
  41. build/lib/firecrawl/__tests__/unit/v2/methods/test_usage_types.py +0 -18
  42. build/lib/firecrawl/__tests__/unit/v2/methods/test_webhook.py +0 -123
  43. build/lib/firecrawl/__tests__/unit/v2/utils/test_validation.py +0 -290
  44. build/lib/firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +0 -332
  45. build/lib/firecrawl/client.py +0 -242
  46. build/lib/firecrawl/firecrawl.backup.py +0 -4635
  47. build/lib/firecrawl/types.py +0 -161
  48. build/lib/firecrawl/v1/__init__.py +0 -14
  49. build/lib/firecrawl/v1/client.py +0 -4653
  50. build/lib/firecrawl/v2/__init__.py +0 -4
  51. build/lib/firecrawl/v2/client.py +0 -802
  52. build/lib/firecrawl/v2/client_async.py +0 -250
  53. build/lib/firecrawl/v2/methods/aio/__init__.py +0 -1
  54. build/lib/firecrawl/v2/methods/aio/batch.py +0 -85
  55. build/lib/firecrawl/v2/methods/aio/crawl.py +0 -171
  56. build/lib/firecrawl/v2/methods/aio/extract.py +0 -126
  57. build/lib/firecrawl/v2/methods/aio/map.py +0 -59
  58. build/lib/firecrawl/v2/methods/aio/scrape.py +0 -33
  59. build/lib/firecrawl/v2/methods/aio/search.py +0 -172
  60. build/lib/firecrawl/v2/methods/aio/usage.py +0 -42
  61. build/lib/firecrawl/v2/methods/batch.py +0 -417
  62. build/lib/firecrawl/v2/methods/crawl.py +0 -469
  63. build/lib/firecrawl/v2/methods/extract.py +0 -131
  64. build/lib/firecrawl/v2/methods/map.py +0 -77
  65. build/lib/firecrawl/v2/methods/scrape.py +0 -64
  66. build/lib/firecrawl/v2/methods/search.py +0 -197
  67. build/lib/firecrawl/v2/methods/usage.py +0 -41
  68. build/lib/firecrawl/v2/types.py +0 -665
  69. build/lib/firecrawl/v2/utils/__init__.py +0 -9
  70. build/lib/firecrawl/v2/utils/error_handler.py +0 -107
  71. build/lib/firecrawl/v2/utils/get_version.py +0 -15
  72. build/lib/firecrawl/v2/utils/http_client.py +0 -153
  73. build/lib/firecrawl/v2/utils/http_client_async.py +0 -65
  74. build/lib/firecrawl/v2/utils/normalize.py +0 -107
  75. build/lib/firecrawl/v2/utils/validation.py +0 -324
  76. build/lib/firecrawl/v2/watcher.py +0 -301
  77. build/lib/firecrawl/v2/watcher_async.py +0 -242
  78. build/lib/tests/test_change_tracking.py +0 -98
  79. build/lib/tests/test_timeout_conversion.py +0 -117
  80. firecrawl_py-3.3.0.dist-info/RECORD +0 -153
  81. {firecrawl_py-3.3.0.dist-info → firecrawl_py-3.3.2.dist-info}/LICENSE +0 -0
  82. {firecrawl_py-3.3.0.dist-info → firecrawl_py-3.3.2.dist-info}/WHEEL +0 -0
@@ -1,59 +0,0 @@
1
- from typing import Optional, Dict, Any
2
- from ...types import MapOptions, MapData, LinkResult
3
- from ...utils.http_client_async import AsyncHttpClient
4
- from ...utils.error_handler import handle_response_error
5
-
6
-
7
- def _prepare_map_request(url: str, options: Optional[MapOptions] = None) -> Dict[str, Any]:
8
- if not url or not url.strip():
9
- raise ValueError("URL cannot be empty")
10
- payload: Dict[str, Any] = {"url": url.strip()}
11
- if options is not None:
12
- data: Dict[str, Any] = {}
13
- if getattr(options, "sitemap", None) is not None:
14
- data["sitemap"] = options.sitemap
15
- if options.search is not None:
16
- data["search"] = options.search
17
- if options.include_subdomains is not None:
18
- data["includeSubdomains"] = options.include_subdomains
19
- if options.limit is not None:
20
- data["limit"] = options.limit
21
- if options.timeout is not None:
22
- data["timeout"] = options.timeout
23
- payload.update(data)
24
- return payload
25
-
26
-
27
- async def map(client: AsyncHttpClient, url: str, options: Optional[MapOptions] = None) -> MapData:
28
- request_data = _prepare_map_request(url, options)
29
- response = await client.post("/v2/map", request_data)
30
- if response.status_code >= 400:
31
- handle_response_error(response, "map")
32
- body = response.json()
33
- if not body.get("success"):
34
- raise Exception(body.get("error", "Unknown error occurred"))
35
-
36
-
37
- # data = body.get("data", {})
38
- # result_links: list[LinkResult] = []
39
- # for item in data.get("links", []):
40
- # if isinstance(item, dict):
41
- # result_links.append(
42
- # LinkResult(
43
- # url=item.get("url", ""),
44
- # title=item.get("title"),
45
- # description=item.get("description"),
46
- # )
47
- # )
48
- # elif isinstance(item, str):
49
- # result_links.append(LinkResult(url=item))
50
-
51
- result_links: list[LinkResult] = []
52
- for item in body.get("links", []):
53
- if isinstance(item, dict):
54
- result_links.append(LinkResult(url=item.get("url", ""), title=item.get("title"), description=item.get("description")))
55
- elif isinstance(item, str):
56
- result_links.append(LinkResult(url=item))
57
-
58
- return MapData(links=result_links)
59
-
@@ -1,33 +0,0 @@
1
- from typing import Optional, Dict, Any
2
- from ...types import ScrapeOptions, Document
3
- from ...utils.normalize import normalize_document_input
4
- from ...utils.error_handler import handle_response_error
5
- from ...utils.validation import prepare_scrape_options, validate_scrape_options
6
- from ...utils.http_client_async import AsyncHttpClient
7
-
8
-
9
- async def _prepare_scrape_request(url: str, options: Optional[ScrapeOptions] = None) -> Dict[str, Any]:
10
- if not url or not url.strip():
11
- raise ValueError("URL cannot be empty")
12
- payload: Dict[str, Any] = {"url": url.strip()}
13
- if options is not None:
14
- validated = validate_scrape_options(options)
15
- if validated is not None:
16
- opts = prepare_scrape_options(validated)
17
- if opts:
18
- payload.update(opts)
19
- return payload
20
-
21
-
22
- async def scrape(client: AsyncHttpClient, url: str, options: Optional[ScrapeOptions] = None) -> Document:
23
- payload = await _prepare_scrape_request(url, options)
24
- response = await client.post("/v2/scrape", payload)
25
- if response.status_code >= 400:
26
- handle_response_error(response, "scrape")
27
- body = response.json()
28
- if not body.get("success"):
29
- raise Exception(body.get("error", "Unknown error occurred"))
30
- document_data = body.get("data", {})
31
- normalized = normalize_document_input(document_data)
32
- return Document(**normalized)
33
-
@@ -1,172 +0,0 @@
1
- import re
2
- from typing import Dict, Any, Union, List, TypeVar, Type
3
- from ...types import (
4
- SearchRequest,
5
- SearchData,
6
- Document,
7
- SearchResultWeb,
8
- SearchResultNews,
9
- SearchResultImages,
10
- )
11
- from ...utils.http_client_async import AsyncHttpClient
12
- from ...utils.error_handler import handle_response_error
13
- from ...utils.validation import validate_scrape_options, prepare_scrape_options
14
-
15
- T = TypeVar("T")
16
-
17
- async def search(
18
- client: AsyncHttpClient,
19
- request: SearchRequest
20
- ) -> SearchData:
21
- """
22
- Async search for documents.
23
-
24
- Args:
25
- client: Async HTTP client instance
26
- request: Search request
27
-
28
- Returns:
29
- SearchData with search results grouped by source type
30
-
31
- Raises:
32
- FirecrawlError: If the search operation fails
33
- """
34
- request_data = _prepare_search_request(request)
35
- try:
36
- response = await client.post("/v2/search", request_data)
37
- if response.status_code != 200:
38
- handle_response_error(response, "search")
39
- response_data = response.json()
40
- if not response_data.get("success"):
41
- handle_response_error(response, "search")
42
- data = response_data.get("data", {}) or {}
43
- out = SearchData()
44
- if "web" in data:
45
- out.web = _transform_array(data["web"], SearchResultWeb)
46
- if "news" in data:
47
- out.news = _transform_array(data["news"], SearchResultNews)
48
- if "images" in data:
49
- out.images = _transform_array(data["images"], SearchResultImages)
50
- return out
51
- except Exception as err:
52
- if hasattr(err, "response"):
53
- handle_response_error(getattr(err, "response"), "search")
54
- raise err
55
-
56
- def _transform_array(arr: List[Any], result_type: Type[T]) -> List[Union[T, Document]]:
57
- """
58
- Transforms an array of items into a list of result_type or Document.
59
- If the item dict contains any of the special keys, it is treated as a Document.
60
- Otherwise, it is treated as result_type.
61
- If the item is not a dict, it is wrapped as result_type with url=item.
62
- """
63
- results: List[Union[T, Document]] = []
64
- for item in arr:
65
- if item and isinstance(item, dict):
66
- if (
67
- "markdown" in item or
68
- "html" in item or
69
- "rawHtml" in item or
70
- "links" in item or
71
- "screenshot" in item or
72
- "changeTracking" in item or
73
- "summary" in item or
74
- "json" in item
75
- ):
76
- results.append(Document(**item))
77
- else:
78
- results.append(result_type(**item))
79
- else:
80
- results.append(result_type(url=item))
81
- return results
82
-
83
- def _validate_search_request(request: SearchRequest) -> SearchRequest:
84
- """
85
- Validate and normalize search request.
86
-
87
- Args:
88
- request: Search request to validate
89
-
90
- Returns:
91
- Validated request
92
-
93
- Raises:
94
- ValueError: If request is invalid
95
- """
96
- if not request.query or not request.query.strip():
97
- raise ValueError("Query cannot be empty")
98
-
99
- if request.limit is not None:
100
- if request.limit <= 0:
101
- raise ValueError("Limit must be positive")
102
- if request.limit > 100:
103
- raise ValueError("Limit cannot exceed 100")
104
-
105
- if request.timeout is not None:
106
- if request.timeout <= 0:
107
- raise ValueError("Timeout must be positive")
108
- if request.timeout > 300000:
109
- raise ValueError("Timeout cannot exceed 300000ms (5 minutes)")
110
-
111
- if request.sources is not None:
112
- valid_sources = {"web", "news", "images"}
113
- for source in request.sources:
114
- if isinstance(source, str):
115
- if source not in valid_sources:
116
- raise ValueError(f"Invalid source type: {source}. Valid types: {valid_sources}")
117
- elif hasattr(source, 'type'):
118
- if source.type not in valid_sources:
119
- raise ValueError(f"Invalid source type: {source.type}. Valid types: {valid_sources}")
120
-
121
- if request.location is not None:
122
- if not isinstance(request.location, str) or len(request.location.strip()) == 0:
123
- raise ValueError("Location must be a non-empty string")
124
-
125
- if request.tbs is not None:
126
- valid_tbs_values = {
127
- "qdr:h", "qdr:d", "qdr:w", "qdr:m", "qdr:y",
128
- "d", "w", "m", "y"
129
- }
130
- if request.tbs in valid_tbs_values:
131
- pass
132
- elif request.tbs.startswith("cdr:"):
133
- custom_date_pattern = r"^cdr:1,cd_min:\d{1,2}/\d{1,2}/\d{4},cd_max:\d{1,2}/\d{1,2}/\d{4}$"
134
- if not re.match(custom_date_pattern, request.tbs):
135
- raise ValueError(f"Invalid custom date range format: {request.tbs}. Expected format: cdr:1,cd_min:MM/DD/YYYY,cd_max:MM/DD/YYYY")
136
- else:
137
- raise ValueError(f"Invalid tbs value: {request.tbs}. Valid values: {valid_tbs_values} or custom date range format: cdr:1,cd_min:MM/DD/YYYY,cd_max:MM/DD/YYYY")
138
-
139
- if request.scrape_options is not None:
140
- validate_scrape_options(request.scrape_options)
141
-
142
- return request
143
-
144
- def _prepare_search_request(request: SearchRequest) -> Dict[str, Any]:
145
- """
146
- Prepare a search request payload.
147
-
148
- Args:
149
- request: Search request
150
-
151
- Returns:
152
- Request payload dictionary
153
- """
154
- validated_request = _validate_search_request(request)
155
- data = validated_request.model_dump(exclude_none=True, by_alias=True)
156
-
157
- if "limit" not in data and validated_request.limit is not None:
158
- data["limit"] = validated_request.limit
159
- if "timeout" not in data and validated_request.timeout is not None:
160
- data["timeout"] = validated_request.timeout
161
-
162
- if validated_request.ignore_invalid_urls is not None:
163
- data["ignoreInvalidURLs"] = validated_request.ignore_invalid_urls
164
- data.pop("ignore_invalid_urls", None)
165
-
166
- if validated_request.scrape_options is not None:
167
- scrape_data = prepare_scrape_options(validated_request.scrape_options)
168
- if scrape_data:
169
- data["scrapeOptions"] = scrape_data
170
- data.pop("scrape_options", None)
171
-
172
- return data
@@ -1,42 +0,0 @@
1
- from ...utils.http_client_async import AsyncHttpClient
2
- from ...utils.error_handler import handle_response_error
3
- from ...types import ConcurrencyCheck, CreditUsage, TokenUsage
4
-
5
-
6
- async def get_concurrency(client: AsyncHttpClient) -> ConcurrencyCheck:
7
- resp = await client.get("/v2/concurrency-check")
8
- if resp.status_code >= 400:
9
- handle_response_error(resp, "get concurrency")
10
- body = resp.json()
11
- if not body.get("success"):
12
- raise Exception(body.get("error", "Unknown error"))
13
- data = body.get("data", body)
14
- return ConcurrencyCheck(
15
- concurrency=data.get("concurrency"),
16
- max_concurrency=data.get("maxConcurrency", data.get("max_concurrency")),
17
- )
18
-
19
-
20
- async def get_credit_usage(client: AsyncHttpClient) -> CreditUsage:
21
- resp = await client.get("/v2/team/credit-usage")
22
- if resp.status_code >= 400:
23
- handle_response_error(resp, "get credit usage")
24
- body = resp.json()
25
- if not body.get("success"):
26
- raise Exception(body.get("error", "Unknown error"))
27
- data = body.get("data", body)
28
- return CreditUsage(remaining_credits=data.get("remainingCredits", data.get("remaining_credits", 0)))
29
-
30
-
31
- async def get_token_usage(client: AsyncHttpClient) -> TokenUsage:
32
- resp = await client.get("/v2/team/token-usage")
33
- if resp.status_code >= 400:
34
- handle_response_error(resp, "get token usage")
35
- body = resp.json()
36
- if not body.get("success"):
37
- raise Exception(body.get("error", "Unknown error"))
38
- data = body.get("data", body)
39
- return TokenUsage(
40
- remaining_tokens=data.get("remainingTokens", 0)
41
- )
42
-