anysite-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of anysite-cli might be problematic. Click here for more details.

Files changed (64) hide show
  1. anysite/__init__.py +4 -0
  2. anysite/__main__.py +6 -0
  3. anysite/api/__init__.py +21 -0
  4. anysite/api/client.py +271 -0
  5. anysite/api/errors.py +137 -0
  6. anysite/api/schemas.py +333 -0
  7. anysite/batch/__init__.py +1 -0
  8. anysite/batch/executor.py +176 -0
  9. anysite/batch/input.py +160 -0
  10. anysite/batch/rate_limiter.py +98 -0
  11. anysite/cli/__init__.py +1 -0
  12. anysite/cli/config.py +176 -0
  13. anysite/cli/executor.py +388 -0
  14. anysite/cli/options.py +249 -0
  15. anysite/config/__init__.py +11 -0
  16. anysite/config/paths.py +46 -0
  17. anysite/config/settings.py +187 -0
  18. anysite/dataset/__init__.py +37 -0
  19. anysite/dataset/analyzer.py +268 -0
  20. anysite/dataset/cli.py +644 -0
  21. anysite/dataset/collector.py +686 -0
  22. anysite/dataset/db_loader.py +248 -0
  23. anysite/dataset/errors.py +30 -0
  24. anysite/dataset/exporters.py +121 -0
  25. anysite/dataset/history.py +153 -0
  26. anysite/dataset/models.py +245 -0
  27. anysite/dataset/notifications.py +87 -0
  28. anysite/dataset/scheduler.py +107 -0
  29. anysite/dataset/storage.py +171 -0
  30. anysite/dataset/transformer.py +213 -0
  31. anysite/db/__init__.py +38 -0
  32. anysite/db/adapters/__init__.py +1 -0
  33. anysite/db/adapters/base.py +158 -0
  34. anysite/db/adapters/postgres.py +201 -0
  35. anysite/db/adapters/sqlite.py +183 -0
  36. anysite/db/cli.py +687 -0
  37. anysite/db/config.py +92 -0
  38. anysite/db/manager.py +166 -0
  39. anysite/db/operations/__init__.py +1 -0
  40. anysite/db/operations/insert.py +199 -0
  41. anysite/db/operations/query.py +43 -0
  42. anysite/db/schema/__init__.py +1 -0
  43. anysite/db/schema/inference.py +213 -0
  44. anysite/db/schema/types.py +71 -0
  45. anysite/db/utils/__init__.py +1 -0
  46. anysite/db/utils/sanitize.py +99 -0
  47. anysite/main.py +498 -0
  48. anysite/models/__init__.py +1 -0
  49. anysite/output/__init__.py +11 -0
  50. anysite/output/console.py +45 -0
  51. anysite/output/formatters.py +301 -0
  52. anysite/output/templates.py +76 -0
  53. anysite/py.typed +0 -0
  54. anysite/streaming/__init__.py +1 -0
  55. anysite/streaming/progress.py +121 -0
  56. anysite/streaming/writer.py +130 -0
  57. anysite/utils/__init__.py +1 -0
  58. anysite/utils/fields.py +242 -0
  59. anysite/utils/retry.py +109 -0
  60. anysite_cli-0.1.0.dist-info/METADATA +437 -0
  61. anysite_cli-0.1.0.dist-info/RECORD +64 -0
  62. anysite_cli-0.1.0.dist-info/WHEEL +4 -0
  63. anysite_cli-0.1.0.dist-info/entry_points.txt +2 -0
  64. anysite_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
anysite/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ """Anysite CLI - Web data extraction for humans and AI agents."""
2
+
3
+ __version__ = "0.1.0"
4
+ __app_name__ = "anysite"
anysite/__main__.py ADDED
@@ -0,0 +1,6 @@
1
+ """Entry point for running anysite as a module: python -m anysite."""
2
+
3
+ from anysite.main import app
4
+
5
+ if __name__ == "__main__":
6
+ app()
@@ -0,0 +1,21 @@
1
+ """API client module."""
2
+
3
+ from anysite.api.client import AnysiteClient
4
+ from anysite.api.errors import (
5
+ AnysiteError,
6
+ AuthenticationError,
7
+ NotFoundError,
8
+ RateLimitError,
9
+ ServerError,
10
+ ValidationError,
11
+ )
12
+
13
+ __all__ = [
14
+ "AnysiteClient",
15
+ "AnysiteError",
16
+ "AuthenticationError",
17
+ "NotFoundError",
18
+ "RateLimitError",
19
+ "ServerError",
20
+ "ValidationError",
21
+ ]
anysite/api/client.py ADDED
@@ -0,0 +1,271 @@
1
+ """Anysite API client with authentication and retry logic."""
2
+
3
+ import asyncio
4
+ from typing import Any
5
+
6
+ import httpx
7
+
8
+ from anysite import __version__
9
+ from anysite.api.errors import (
10
+ AnysiteError,
11
+ AuthenticationError,
12
+ NetworkError,
13
+ NotFoundError,
14
+ RateLimitError,
15
+ ServerError,
16
+ TimeoutError,
17
+ ValidationError,
18
+ )
19
+
20
+
21
+ class AnysiteClient:
22
+ """HTTP client for Anysite API.
23
+
24
+ Features:
25
+ - Authentication via access-token header
26
+ - Automatic retry with exponential backoff
27
+ - Proper error handling with helpful messages
28
+ """
29
+
30
+ DEFAULT_BASE_URL = "https://api.anysite.io"
31
+ DEFAULT_TIMEOUT = 300 # 5 minutes
32
+ MAX_RETRIES = 3
33
+ RETRY_DELAYS = [1, 2, 4] # Exponential backoff
34
+
35
+ def __init__(
36
+ self,
37
+ api_key: str | None = None,
38
+ base_url: str | None = None,
39
+ timeout: int | None = None,
40
+ ) -> None:
41
+ """Initialize the API client.
42
+
43
+ Args:
44
+ api_key: Anysite API key
45
+ base_url: API base URL (default: https://api.anysite.io)
46
+ timeout: Request timeout in seconds (default: 300)
47
+ """
48
+ self.api_key = api_key
49
+ self.base_url = (base_url or self.DEFAULT_BASE_URL).rstrip("/")
50
+ self.timeout = timeout or self.DEFAULT_TIMEOUT
51
+
52
+ self._client: httpx.AsyncClient | None = None
53
+
54
+ def _get_headers(self) -> dict[str, str]:
55
+ """Get request headers with authentication."""
56
+ headers = {
57
+ "User-Agent": f"anysite-cli/{__version__}",
58
+ "Content-Type": "application/json",
59
+ "Accept": "application/json",
60
+ }
61
+ if self.api_key:
62
+ headers["access-token"] = self.api_key
63
+ return headers
64
+
65
+ async def _get_client(self) -> httpx.AsyncClient:
66
+ """Get or create the HTTP client."""
67
+ if self._client is None or self._client.is_closed:
68
+ self._client = httpx.AsyncClient(
69
+ base_url=self.base_url,
70
+ headers=self._get_headers(),
71
+ timeout=httpx.Timeout(self.timeout),
72
+ )
73
+ return self._client
74
+
75
+ async def close(self) -> None:
76
+ """Close the HTTP client."""
77
+ if self._client is not None and not self._client.is_closed:
78
+ await self._client.aclose()
79
+ self._client = None
80
+
81
+ async def __aenter__(self) -> "AnysiteClient":
82
+ """Async context manager entry."""
83
+ return self
84
+
85
+ async def __aexit__(self, *args: Any) -> None:
86
+ """Async context manager exit."""
87
+ await self.close()
88
+
89
+ def _handle_error_response(self, response: httpx.Response) -> None:
90
+ """Handle error responses from the API."""
91
+ status_code = response.status_code
92
+
93
+ # Try to parse error details from JSON
94
+ try:
95
+ raw = response.json()
96
+ error_data = raw if isinstance(raw, dict) else {"detail": raw}
97
+ except Exception:
98
+ error_data = {"detail": response.text}
99
+
100
+ # Handle specific status codes
101
+ if status_code == 401:
102
+ raise AuthenticationError(details=error_data)
103
+
104
+ if status_code == 404:
105
+ detail = error_data.get("detail", "Resource not found")
106
+ raise NotFoundError(resource=str(detail), details=error_data)
107
+
108
+ if status_code == 422:
109
+ # Validation error - FastAPI format
110
+ detail = error_data.get("detail", [])
111
+ if isinstance(detail, list):
112
+ raise ValidationError(errors=detail, details=error_data)
113
+ raise ValidationError(message=str(detail), details=error_data)
114
+
115
+ if status_code == 429:
116
+ retry_after = response.headers.get("Retry-After")
117
+ raise RateLimitError(
118
+ retry_after=int(retry_after) if retry_after else None,
119
+ details=error_data,
120
+ )
121
+
122
+ if 500 <= status_code < 600:
123
+ raise ServerError(status_code=status_code, details=error_data)
124
+
125
+ # Generic error for other status codes
126
+ detail = error_data.get("detail", f"Request failed with status {status_code}")
127
+ raise AnysiteError(message=str(detail), details=error_data)
128
+
129
+ async def _request_with_retry(
130
+ self,
131
+ method: str,
132
+ endpoint: str,
133
+ **kwargs: Any,
134
+ ) -> httpx.Response:
135
+ """Make a request with retry logic.
136
+
137
+ Args:
138
+ method: HTTP method
139
+ endpoint: API endpoint path
140
+ **kwargs: Additional arguments for httpx
141
+
142
+ Returns:
143
+ Response object
144
+
145
+ Raises:
146
+ Various AnysiteError subclasses on failure
147
+ """
148
+ client = await self._get_client()
149
+ last_error: Exception | None = None
150
+
151
+ for attempt in range(self.MAX_RETRIES):
152
+ try:
153
+ response = await client.request(method, endpoint, **kwargs)
154
+
155
+ # Success - return response
156
+ if response.is_success:
157
+ return response
158
+
159
+ # Don't retry client errors (4xx) except rate limit
160
+ if 400 <= response.status_code < 500 and response.status_code != 429:
161
+ self._handle_error_response(response)
162
+
163
+ # Rate limit - wait and retry
164
+ if response.status_code == 429:
165
+ retry_after = response.headers.get("Retry-After")
166
+ wait_time = int(retry_after) if retry_after else self.RETRY_DELAYS[attempt]
167
+ if attempt < self.MAX_RETRIES - 1:
168
+ await asyncio.sleep(wait_time)
169
+ continue
170
+ self._handle_error_response(response)
171
+
172
+ # Server error - retry with backoff
173
+ if response.status_code >= 500:
174
+ if attempt < self.MAX_RETRIES - 1:
175
+ await asyncio.sleep(self.RETRY_DELAYS[attempt])
176
+ continue
177
+ self._handle_error_response(response)
178
+
179
+ except httpx.TimeoutException as e:
180
+ last_error = e
181
+ if attempt < self.MAX_RETRIES - 1:
182
+ await asyncio.sleep(self.RETRY_DELAYS[attempt])
183
+ continue
184
+ raise TimeoutError(timeout=self.timeout) from e
185
+
186
+ except httpx.NetworkError as e:
187
+ last_error = e
188
+ if attempt < self.MAX_RETRIES - 1:
189
+ await asyncio.sleep(self.RETRY_DELAYS[attempt])
190
+ continue
191
+ raise NetworkError(original_error=e) from e
192
+
193
+ except (AuthenticationError, NotFoundError, ValidationError):
194
+ # Don't retry these errors
195
+ raise
196
+
197
+ # Should not reach here, but just in case
198
+ raise NetworkError(
199
+ message="Request failed after multiple retries",
200
+ original_error=last_error,
201
+ )
202
+
203
+ async def post(
204
+ self,
205
+ endpoint: str,
206
+ data: dict[str, Any] | None = None,
207
+ ) -> list[dict[str, Any]]:
208
+ """Make a POST request to the API.
209
+
210
+ All Anysite API endpoints use POST with JSON body.
211
+
212
+ Args:
213
+ endpoint: API endpoint path (e.g., '/api/linkedin/user')
214
+ data: Request body as dict
215
+
216
+ Returns:
217
+ Response data as list of dicts (API always returns arrays)
218
+ """
219
+ response = await self._request_with_retry(
220
+ "POST",
221
+ endpoint,
222
+ json=data or {},
223
+ )
224
+ return response.json() # type: ignore[no-any-return]
225
+
226
+ async def get(
227
+ self,
228
+ endpoint: str,
229
+ params: dict[str, Any] | None = None,
230
+ ) -> Any:
231
+ """Make a GET request to the API.
232
+
233
+ Args:
234
+ endpoint: API endpoint path
235
+ params: Query parameters
236
+
237
+ Returns:
238
+ Response data
239
+ """
240
+ response = await self._request_with_retry(
241
+ "GET",
242
+ endpoint,
243
+ params=params,
244
+ )
245
+ return response.json()
246
+
247
+
248
+ def create_client(
249
+ api_key: str | None = None,
250
+ base_url: str | None = None,
251
+ timeout: int | None = None,
252
+ ) -> AnysiteClient:
253
+ """Create an API client with settings from config if not provided.
254
+
255
+ Args:
256
+ api_key: API key (falls back to config/env)
257
+ base_url: Base URL (falls back to config/env)
258
+ timeout: Timeout in seconds (falls back to config/env)
259
+
260
+ Returns:
261
+ Configured AnysiteClient instance
262
+ """
263
+ from anysite.config import get_settings
264
+
265
+ settings = get_settings()
266
+
267
+ return AnysiteClient(
268
+ api_key=api_key or settings.api_key,
269
+ base_url=base_url or settings.base_url,
270
+ timeout=timeout or settings.timeout,
271
+ )
anysite/api/errors.py ADDED
@@ -0,0 +1,137 @@
1
+ """API error classes with helpful messages."""
2
+
3
+ from typing import Any
4
+
5
+
6
+ class AnysiteError(Exception):
7
+ """Base exception for Anysite API errors."""
8
+
9
+ def __init__(self, message: str, details: dict[str, Any] | None = None) -> None:
10
+ self.message = message
11
+ self.details = details or {}
12
+ super().__init__(message)
13
+
14
+ def __str__(self) -> str:
15
+ return self.message
16
+
17
+
18
+ class AuthenticationError(AnysiteError):
19
+ """Raised when API authentication fails (401)."""
20
+
21
+ def __init__(self, message: str | None = None, details: dict[str, Any] | None = None) -> None:
22
+ default_message = """Authentication failed
23
+
24
+ Your API key is invalid or expired.
25
+
26
+ To fix this:
27
+ 1. Get your API key at https://app.anysite.io/
28
+ 2. Set it with: anysite config set api_key <your-key>
29
+
30
+ Or set environment variable:
31
+ export ANYSITE_API_KEY=sk-xxxxx"""
32
+
33
+ super().__init__(message or default_message, details)
34
+
35
+
36
+ class RateLimitError(AnysiteError):
37
+ """Raised when rate limit is exceeded (429)."""
38
+
39
+ def __init__(
40
+ self,
41
+ message: str | None = None,
42
+ details: dict[str, Any] | None = None,
43
+ retry_after: int | None = None,
44
+ ) -> None:
45
+ self.retry_after = retry_after
46
+ default_message = "Rate limit exceeded. Please wait before making more requests."
47
+ if retry_after:
48
+ default_message += f"\nRetry after: {retry_after} seconds"
49
+ super().__init__(message or default_message, details)
50
+
51
+
52
+ class NotFoundError(AnysiteError):
53
+ """Raised when a resource is not found (404)."""
54
+
55
+ def __init__(
56
+ self,
57
+ resource: str = "Resource",
58
+ identifier: str | None = None,
59
+ details: dict[str, Any] | None = None,
60
+ ) -> None:
61
+ self.resource = resource
62
+ self.identifier = identifier
63
+ message = f"{resource} not found"
64
+ if identifier:
65
+ message = f"{resource} '{identifier}' not found"
66
+ super().__init__(message, details)
67
+
68
+
69
+ class ValidationError(AnysiteError):
70
+ """Raised when request validation fails (400/422)."""
71
+
72
+ def __init__(
73
+ self,
74
+ message: str | None = None,
75
+ errors: list[dict[str, Any]] | None = None,
76
+ details: dict[str, Any] | None = None,
77
+ ) -> None:
78
+ self.errors = errors or []
79
+ default_message = "Validation error"
80
+ if errors:
81
+ error_msgs = []
82
+ for error in errors:
83
+ loc = ".".join(str(x) for x in error.get("loc", []))
84
+ msg = error.get("msg", "Invalid value")
85
+ if loc:
86
+ error_msgs.append(f" - {loc}: {msg}")
87
+ else:
88
+ error_msgs.append(f" - {msg}")
89
+ default_message = "Validation errors:\n" + "\n".join(error_msgs)
90
+ super().__init__(message or default_message, details)
91
+
92
+
93
+ class ServerError(AnysiteError):
94
+ """Raised when API returns a server error (5xx)."""
95
+
96
+ def __init__(
97
+ self,
98
+ message: str | None = None,
99
+ status_code: int = 500,
100
+ details: dict[str, Any] | None = None,
101
+ ) -> None:
102
+ self.status_code = status_code
103
+ default_message = f"Server error ({status_code}). Please try again later."
104
+ super().__init__(message or default_message, details)
105
+
106
+
107
+ class NetworkError(AnysiteError):
108
+ """Raised when a network error occurs."""
109
+
110
+ def __init__(
111
+ self,
112
+ message: str | None = None,
113
+ original_error: Exception | None = None,
114
+ details: dict[str, Any] | None = None,
115
+ ) -> None:
116
+ self.original_error = original_error
117
+ default_message = "Network error. Please check your internet connection."
118
+ if original_error:
119
+ default_message += f"\nDetails: {original_error}"
120
+ super().__init__(message or default_message, details)
121
+
122
+
123
+ class TimeoutError(AnysiteError):
124
+ """Raised when a request times out."""
125
+
126
+ def __init__(
127
+ self,
128
+ message: str | None = None,
129
+ timeout: int | None = None,
130
+ details: dict[str, Any] | None = None,
131
+ ) -> None:
132
+ self.timeout = timeout
133
+ default_message = "Request timed out."
134
+ if timeout:
135
+ default_message = f"Request timed out after {timeout} seconds."
136
+ default_message += "\nTry increasing the timeout with --timeout option."
137
+ super().__init__(message or default_message, details)