crawl4ai-cloud-sdk 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,100 @@
1
+ """
2
+ Crawl4AI Cloud SDK - Lightweight cloud client for Crawl4AI API.
3
+
4
+ Example:
5
+ ```python
6
+ from crawl4ai_cloud import AsyncWebCrawler, CrawlerRunConfig
7
+
8
+ async with AsyncWebCrawler(api_key="sk_live_xxx") as crawler:
9
+ result = await crawler.run("https://example.com")
10
+ print(result.markdown.raw_markdown)
11
+ ```
12
+ """
13
+
14
+ __version__ = "0.2.0"
15
+
16
+ # Main crawler class
17
+ from .crawler import AsyncWebCrawler
18
+
19
+ # Configuration classes
20
+ from .configs import (
21
+ CrawlerRunConfig,
22
+ BrowserConfig,
23
+ build_crawl_request,
24
+ sanitize_crawler_config,
25
+ sanitize_browser_config,
26
+ normalize_proxy,
27
+ normalize_url,
28
+ )
29
+
30
+ # Response models
31
+ from .models import (
32
+ CrawlResult,
33
+ CrawlJob,
34
+ JobProgress,
35
+ MarkdownResult,
36
+ DeepCrawlResult,
37
+ ScanUrlInfo,
38
+ ContextResult,
39
+ GeneratedSchema,
40
+ StorageUsage,
41
+ ProxyConfig,
42
+ LLMUsage,
43
+ # Usage metrics
44
+ Usage,
45
+ CrawlUsageMetrics,
46
+ LLMUsageMetrics,
47
+ StorageUsageMetrics,
48
+ )
49
+
50
+ # Errors
51
+ from .errors import (
52
+ CloudError,
53
+ AuthenticationError,
54
+ RateLimitError,
55
+ QuotaExceededError,
56
+ NotFoundError,
57
+ ValidationError,
58
+ TimeoutError,
59
+ ServerError,
60
+ )
61
+
62
+ __all__ = [
63
+ # Version
64
+ "__version__",
65
+ # Main class
66
+ "AsyncWebCrawler",
67
+ # Configs
68
+ "CrawlerRunConfig",
69
+ "BrowserConfig",
70
+ "build_crawl_request",
71
+ "sanitize_crawler_config",
72
+ "sanitize_browser_config",
73
+ "normalize_proxy",
74
+ "normalize_url",
75
+ # Models
76
+ "CrawlResult",
77
+ "CrawlJob",
78
+ "JobProgress",
79
+ "MarkdownResult",
80
+ "DeepCrawlResult",
81
+ "ScanUrlInfo",
82
+ "ContextResult",
83
+ "GeneratedSchema",
84
+ "StorageUsage",
85
+ "ProxyConfig",
86
+ "LLMUsage",
87
+ "Usage",
88
+ "CrawlUsageMetrics",
89
+ "LLMUsageMetrics",
90
+ "StorageUsageMetrics",
91
+ # Errors
92
+ "CloudError",
93
+ "AuthenticationError",
94
+ "RateLimitError",
95
+ "QuotaExceededError",
96
+ "NotFoundError",
97
+ "ValidationError",
98
+ "TimeoutError",
99
+ "ServerError",
100
+ ]
@@ -0,0 +1,190 @@
1
+ """Internal HTTP client for Crawl4AI Cloud SDK."""
2
+ import asyncio
3
+ import os
4
+ from typing import Optional, Dict, Any
5
+
6
+ import httpx
7
+
8
+ from .errors import (
9
+ CloudError,
10
+ AuthenticationError,
11
+ RateLimitError,
12
+ QuotaExceededError,
13
+ NotFoundError,
14
+ ValidationError,
15
+ ServerError,
16
+ TimeoutError,
17
+ )
18
+
19
+ __version__ = "0.1.0"
20
+
21
+ DEFAULT_BASE_URL = "https://api.crawl4ai.com"
22
+ DEFAULT_TIMEOUT = 120.0
23
+ DEFAULT_MAX_RETRIES = 3
24
+
25
+
26
+ class HTTPClient:
27
+ """Internal async HTTP client with retries and error mapping."""
28
+
29
+ def __init__(
30
+ self,
31
+ api_key: Optional[str] = None,
32
+ base_url: str = DEFAULT_BASE_URL,
33
+ timeout: float = DEFAULT_TIMEOUT,
34
+ max_retries: int = DEFAULT_MAX_RETRIES,
35
+ ):
36
+ """
37
+ Initialize the HTTP client.
38
+
39
+ Args:
40
+ api_key: Your Crawl4AI API key (sk_live_* or sk_test_*).
41
+ If not provided, reads from CRAWL4AI_API_KEY env var.
42
+ base_url: API base URL (default: https://api.crawl4ai.com)
43
+ timeout: Request timeout in seconds (default: 120)
44
+ max_retries: Max retry attempts for transient errors (default: 3)
45
+
46
+ Raises:
47
+ ValueError: If API key is missing or has invalid format
48
+ """
49
+ self._api_key = api_key or os.getenv("CRAWL4AI_API_KEY")
50
+
51
+ if not self._api_key:
52
+ raise ValueError(
53
+ "API key is required. Provide it as an argument or set "
54
+ "the CRAWL4AI_API_KEY environment variable."
55
+ )
56
+
57
+ if not self._api_key.startswith(("sk_live_", "sk_test_")):
58
+ raise ValueError(
59
+ "Invalid API key format. Expected sk_live_* or sk_test_*"
60
+ )
61
+
62
+ self._base_url = base_url.rstrip("/")
63
+ self._timeout = timeout
64
+ self._max_retries = max_retries
65
+ self._client: Optional[httpx.AsyncClient] = None
66
+
67
+ async def _get_client(self) -> httpx.AsyncClient:
68
+ """Get or create the HTTP client."""
69
+ if self._client is None or self._client.is_closed:
70
+ self._client = httpx.AsyncClient(
71
+ base_url=self._base_url,
72
+ headers={
73
+ "X-API-Key": self._api_key,
74
+ "Content-Type": "application/json",
75
+ "User-Agent": f"crawl4ai-cloud/{__version__}",
76
+ },
77
+ timeout=httpx.Timeout(self._timeout),
78
+ )
79
+ return self._client
80
+
81
+ async def request(
82
+ self,
83
+ method: str,
84
+ path: str,
85
+ params: Optional[Dict[str, Any]] = None,
86
+ json: Optional[Dict[str, Any]] = None,
87
+ timeout: Optional[float] = None,
88
+ ) -> Dict[str, Any]:
89
+ """
90
+ Make HTTP request with error handling and retries.
91
+
92
+ Args:
93
+ method: HTTP method (GET, POST, DELETE, etc.)
94
+ path: API endpoint path
95
+ params: Query parameters
96
+ json: JSON body
97
+ timeout: Request timeout override
98
+
99
+ Returns:
100
+ Parsed JSON response
101
+
102
+ Raises:
103
+ AuthenticationError: 401 - Invalid API key
104
+ NotFoundError: 404 - Resource not found
105
+ RateLimitError: 429 - Rate limit exceeded
106
+ QuotaExceededError: 429 - Quota exceeded
107
+ ValidationError: 400 - Invalid request
108
+ TimeoutError: 504 or client timeout
109
+ ServerError: 500/503 - Server error
110
+ CloudError: Other errors
111
+ """
112
+ client = await self._get_client()
113
+
114
+ for attempt in range(self._max_retries):
115
+ try:
116
+ response = await client.request(
117
+ method,
118
+ path,
119
+ params=params,
120
+ json=json,
121
+ timeout=timeout or self._timeout,
122
+ )
123
+
124
+ # Success
125
+ if response.status_code < 400:
126
+ if response.content:
127
+ return response.json()
128
+ return {}
129
+
130
+ # Parse error response
131
+ try:
132
+ error_data = response.json()
133
+ detail = error_data.get("detail", str(error_data))
134
+ except Exception:
135
+ detail = response.text or f"HTTP {response.status_code}"
136
+ error_data = {}
137
+
138
+ headers = {k.lower(): v for k, v in response.headers.items()}
139
+
140
+ # Map status codes to exceptions
141
+ if response.status_code == 401:
142
+ raise AuthenticationError(detail, 401, error_data, headers)
143
+ elif response.status_code == 404:
144
+ raise NotFoundError(detail, 404, error_data, headers)
145
+ elif response.status_code == 429:
146
+ if "rate limit" in detail.lower():
147
+ raise RateLimitError(detail, 429, error_data, headers)
148
+ else:
149
+ raise QuotaExceededError(detail, 429, error_data, headers)
150
+ elif response.status_code == 400:
151
+ raise ValidationError(detail, 400, error_data, headers)
152
+ elif response.status_code == 504:
153
+ raise TimeoutError(detail, 504, error_data, headers)
154
+ elif response.status_code >= 500:
155
+ if attempt < self._max_retries - 1:
156
+ await asyncio.sleep(2 ** attempt)
157
+ continue
158
+ raise ServerError(
159
+ detail, response.status_code, error_data, headers
160
+ )
161
+ else:
162
+ raise CloudError(
163
+ detail, response.status_code, error_data, headers
164
+ )
165
+
166
+ except httpx.TimeoutException as e:
167
+ if attempt < self._max_retries - 1:
168
+ await asyncio.sleep(2 ** attempt)
169
+ continue
170
+ raise TimeoutError(f"Request timed out: {e}")
171
+
172
+ except httpx.RequestError as e:
173
+ if attempt < self._max_retries - 1:
174
+ await asyncio.sleep(2 ** attempt)
175
+ continue
176
+ raise CloudError(f"Request failed: {e}")
177
+
178
+ raise CloudError("Max retries exceeded")
179
+
180
+ async def close(self):
181
+ """Close the HTTP client."""
182
+ if self._client and not self._client.is_closed:
183
+ await self._client.aclose()
184
+ self._client = None
185
+
186
+ async def __aenter__(self) -> "HTTPClient":
187
+ return self
188
+
189
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
190
+ await self.close()