mcp-data-core 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,78 @@
1
+ """Shared HTTP and MCP scaffolding for consumer libraries.
2
+
3
+ Provides the infrastructure that consumers build on:
4
+
5
+ - Exception hierarchy for API errors (``McpDataCoreError`` and subclasses)
6
+ - ``BaseAsyncClient`` with caching and retry support
7
+ - HTTP caching utilities (``CacheManager``, ``build_cached_http_client``)
8
+ - Resilience utilities (``default_retryer``, ``with_retry``)
9
+ - File-based logging configured per consumer app (``configure``)
10
+ """
11
+
12
+ from .base_client import BaseAsyncClient
13
+ from .cache import CacheManager, CacheStats, build_cached_http_client
14
+ from .envelope import (
15
+ ListEnvelope,
16
+ Provenance,
17
+ ResponseEnvelope,
18
+ decode_cursor,
19
+ encode_cursor,
20
+ make_provenance,
21
+ )
22
+ from .envelope import configure as configure_envelope
23
+ from .exceptions import (
24
+ ApiError,
25
+ AuthenticationError,
26
+ ConfigurationError,
27
+ McpDataCoreError,
28
+ NotFoundError,
29
+ ParseError,
30
+ RateLimitError,
31
+ ServerError,
32
+ ValidationError,
33
+ )
34
+ from .logging import configure, log_file_hint
35
+ from .oauth2 import OAuth2ClientCredentialsAuth
36
+ from .resilience import (
37
+ RETRYABLE_STATUS_CODES,
38
+ default_retryer,
39
+ is_retryable_error,
40
+ with_retry,
41
+ )
42
+
43
+ __all__ = [
44
+ # Base client
45
+ "BaseAsyncClient",
46
+ # Caching
47
+ "build_cached_http_client",
48
+ "CacheManager",
49
+ "CacheStats",
50
+ # Envelope
51
+ "Provenance",
52
+ "ResponseEnvelope",
53
+ "ListEnvelope",
54
+ "configure_envelope",
55
+ "make_provenance",
56
+ "encode_cursor",
57
+ "decode_cursor",
58
+ # Exceptions
59
+ "McpDataCoreError",
60
+ "ApiError",
61
+ "NotFoundError",
62
+ "RateLimitError",
63
+ "AuthenticationError",
64
+ "ServerError",
65
+ "ValidationError",
66
+ "ConfigurationError",
67
+ "ParseError",
68
+ # Logging
69
+ "configure",
70
+ "log_file_hint",
71
+ # OAuth2
72
+ "OAuth2ClientCredentialsAuth",
73
+ # Resilience
74
+ "RETRYABLE_STATUS_CODES",
75
+ "is_retryable_error",
76
+ "default_retryer",
77
+ "with_retry",
78
+ ]
@@ -0,0 +1,348 @@
1
+ """Base async HTTP client with standardized patterns."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import contextlib
6
+ import logging
7
+ from datetime import timedelta
8
+ from pathlib import Path
9
+ from typing import Any, Self
10
+
11
+ import httpx
12
+
13
+ from .cache import CacheManager, CacheStats, build_cached_http_client, get_default_cache_dir
14
+ from .exceptions import (
15
+ ApiError,
16
+ AuthenticationError,
17
+ NotFoundError,
18
+ RateLimitError,
19
+ ServerError,
20
+ )
21
+ from .resilience import default_retryer
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class BaseAsyncClient:
27
+ """Base class for async API clients with caching and retry support.
28
+
29
+ Subclasses should override:
30
+ - DEFAULT_BASE_URL: The default API base URL
31
+ - CACHE_NAME: Name for the cache database file
32
+
33
+ Example:
34
+ class MyApiClient(BaseAsyncClient):
35
+ DEFAULT_BASE_URL = "https://api.example.com"
36
+ CACHE_NAME = "my_api"
37
+
38
+ async def get_resource(self, id: str) -> dict:
39
+ return await self._request_json("GET", f"/resources/{id}")
40
+
41
+ Cache Management:
42
+ async with MyApiClient() as client:
43
+ # Make requests...
44
+ result = await client.get_resource("123")
45
+
46
+ # Get cache statistics
47
+ stats = await client.cache_stats()
48
+ print(f"Hit rate: {stats.hit_rate:.1f}%")
49
+ print(f"Cache size: {stats.size_mb:.2f} MB")
50
+
51
+ # Clear all cached data
52
+ cleared = await client.cache_clear()
53
+ print(f"Cleared {cleared} entries")
54
+
55
+ # Clear entries older than 1 hour
56
+ cleared = await client.cache_clear_expired(max_age=timedelta(hours=1))
57
+
58
+ # Invalidate specific URLs by pattern
59
+ cleared = await client.cache_invalidate(r"/resources/123")
60
+ """
61
+
62
+ DEFAULT_BASE_URL: str = ""
63
+ CACHE_NAME: str = "default"
64
+ DEFAULT_TIMEOUT: float = 30.0
65
+ HTTP2: bool = False
66
+
67
+ def __init__(
68
+ self,
69
+ *,
70
+ base_url: str | None = None,
71
+ cache_path: Path | None = None,
72
+ client: httpx.AsyncClient | None = None,
73
+ use_cache: bool = True,
74
+ ttl_seconds: int | None = None,
75
+ max_retries: int = 4,
76
+ headers: dict[str, str] | None = None,
77
+ timeout: float | None = None,
78
+ auth: httpx.Auth | None = None,
79
+ http2: bool | None = None,
80
+ ) -> None:
81
+ """Initialize the client.
82
+
83
+ Args:
84
+ base_url: Override the default base URL.
85
+ cache_path: Custom path for the cache directory.
86
+ client: Existing httpx.AsyncClient to use (for testing).
87
+ use_cache: Whether to enable HTTP caching.
88
+ ttl_seconds: Default TTL for cache entries. None uses HTTP headers.
89
+ max_retries: Maximum retry attempts for transient failures.
90
+ headers: Additional headers to include in requests.
91
+ timeout: Request timeout in seconds (defaults to ``DEFAULT_TIMEOUT``).
92
+ auth: httpx Auth handler (e.g. for OAuth2 token refresh).
93
+ http2: Enable HTTP/2 for the underlying client. Falls back to
94
+ the subclass ``HTTP2`` class attribute. Some upstream APIs
95
+ (e.g. api.publicrecords.copyright.gov) reject HTTP/1.1.
96
+ """
97
+ self.base_url = (base_url or self.DEFAULT_BASE_URL).rstrip("/")
98
+ self._owns_client = client is None
99
+ self._max_retries = max_retries
100
+ self._timeout = timeout or self.DEFAULT_TIMEOUT
101
+ self._cache_manager: CacheManager | None = None
102
+ resolved_http2 = self.HTTP2 if http2 is None else http2
103
+
104
+ if client is None:
105
+ cache_dir = cache_path or get_default_cache_dir()
106
+ cache_dir.mkdir(parents=True, exist_ok=True)
107
+ self._client, self._cache_manager = build_cached_http_client(
108
+ use_cache=use_cache,
109
+ cache_name=self.CACHE_NAME,
110
+ cache_dir=cache_dir,
111
+ ttl_seconds=ttl_seconds,
112
+ headers=headers or {},
113
+ follow_redirects=True,
114
+ timeout=self._timeout,
115
+ auth=auth,
116
+ http2=resolved_http2,
117
+ )
118
+ else:
119
+ self._client = client
120
+ if headers:
121
+ for key, value in headers.items():
122
+ self._client.headers.setdefault(key, value)
123
+
124
+ @property
125
+ def cache_enabled(self) -> bool:
126
+ """Check if caching is enabled."""
127
+ return self._cache_manager is not None
128
+
129
+ async def cache_stats(self) -> CacheStats:
130
+ """Get cache statistics.
131
+
132
+ Returns:
133
+ CacheStats with hits, misses, entry count, and size.
134
+
135
+ Raises:
136
+ RuntimeError: If caching is disabled.
137
+ """
138
+ if self._cache_manager is None:
139
+ raise RuntimeError("Caching is disabled for this client")
140
+ return await self._cache_manager.get_stats()
141
+
142
+ async def cache_clear(self) -> int:
143
+ """Clear all cache entries.
144
+
145
+ Returns:
146
+ Number of entries cleared.
147
+
148
+ Raises:
149
+ RuntimeError: If caching is disabled.
150
+ """
151
+ if self._cache_manager is None:
152
+ raise RuntimeError("Caching is disabled for this client")
153
+ return await self._cache_manager.clear_all()
154
+
155
+ async def cache_clear_expired(self, max_age: timedelta | None = None) -> int:
156
+ """Clear expired cache entries.
157
+
158
+ Args:
159
+ max_age: Maximum age for entries. Defaults to TTL or 24 hours.
160
+
161
+ Returns:
162
+ Number of entries cleared.
163
+
164
+ Raises:
165
+ RuntimeError: If caching is disabled.
166
+ """
167
+ if self._cache_manager is None:
168
+ raise RuntimeError("Caching is disabled for this client")
169
+ return await self._cache_manager.clear_expired(max_age)
170
+
171
+ async def cache_invalidate(self, url_pattern: str) -> int:
172
+ """Invalidate cache entries matching a URL pattern.
173
+
174
+ Args:
175
+ url_pattern: Regex pattern to match against cached URLs.
176
+
177
+ Returns:
178
+ Number of entries invalidated.
179
+
180
+ Raises:
181
+ RuntimeError: If caching is disabled.
182
+ """
183
+ if self._cache_manager is None:
184
+ raise RuntimeError("Caching is disabled for this client")
185
+ return await self._cache_manager.invalidate_pattern(url_pattern)
186
+
187
+ async def close(self) -> None:
188
+ """Close the underlying HTTP client if we own it."""
189
+ if self._owns_client:
190
+ await self._client.aclose()
191
+ if self._cache_manager is not None:
192
+ await self._cache_manager.close()
193
+
194
+ async def __aenter__(self) -> Self:
195
+ return self
196
+
197
+ async def __aexit__(self, *exc: object) -> None:
198
+ await self.close()
199
+
200
+ def _build_url(self, path: str) -> str:
201
+ """Build a full URL from a path."""
202
+ return f"{self.base_url}{path}"
203
+
204
+ def _raise_for_status(self, response: httpx.Response, context: str = "") -> None:
205
+ """Convert HTTP errors to typed exceptions.
206
+
207
+ Args:
208
+ response: The HTTP response to check.
209
+ context: Optional context string for error messages.
210
+
211
+ Raises:
212
+ NotFoundError: For 404 responses.
213
+ RateLimitError: For 429 responses.
214
+ AuthenticationError: For 401/403 responses.
215
+ ServerError: For 5xx responses.
216
+ ApiError: For other non-success responses.
217
+ """
218
+ if response.is_success:
219
+ return
220
+
221
+ status = response.status_code
222
+ body = response.text[:500] if response.text else ""
223
+ msg = f"{context}: HTTP {status}" if context else f"HTTP {status}"
224
+
225
+ # Log full response details to file for debugging
226
+ logger.error(
227
+ "%s %s -> %s\nResponse body: %s",
228
+ response.request.method,
229
+ response.request.url,
230
+ status,
231
+ body,
232
+ )
233
+
234
+ if status == 404:
235
+ raise NotFoundError(msg, status, body)
236
+ if status == 429:
237
+ retry_after: float | None = None
238
+ raw = response.headers.get("Retry-After")
239
+ if raw is not None:
240
+ with contextlib.suppress(ValueError):
241
+ retry_after = float(raw)
242
+ raise RateLimitError(msg, status, body, retry_after=retry_after)
243
+ if status in (401, 403):
244
+ raise AuthenticationError(msg, status, body)
245
+ if 500 <= status < 600:
246
+ raise ServerError(msg, status, body)
247
+ raise ApiError(msg, status, body)
248
+
249
+ async def _request(
250
+ self,
251
+ method: str,
252
+ path: str,
253
+ *,
254
+ params: dict[str, Any] | None = None,
255
+ json: dict[str, Any] | None = None,
256
+ data: dict[str, Any] | None = None,
257
+ content: bytes | None = None,
258
+ headers: dict[str, str] | None = None,
259
+ context: str = "",
260
+ timeout: float | None = None,
261
+ ) -> httpx.Response:
262
+ """Make an HTTP request with retry logic.
263
+
264
+ Args:
265
+ method: HTTP method (GET, POST, etc.).
266
+ path: URL path (will be appended to base_url).
267
+ params: Query parameters.
268
+ json: JSON body for POST/PUT requests.
269
+ data: Form-encoded body.
270
+ content: Raw bytes body.
271
+ headers: Per-request header overrides (merged on top of client headers).
272
+ context: Context string for error messages.
273
+ timeout: Optional per-request timeout in seconds.
274
+
275
+ Returns:
276
+ The HTTP response.
277
+
278
+ Raises:
279
+ ApiError: On non-retryable HTTP errors.
280
+ """
281
+ url = self._build_url(path)
282
+ request_kwargs: dict[str, Any] = {}
283
+ if params:
284
+ request_kwargs["params"] = params
285
+ if json is not None:
286
+ request_kwargs["json"] = json
287
+ if data is not None:
288
+ request_kwargs["data"] = data
289
+ if content is not None:
290
+ request_kwargs["content"] = content
291
+ if headers:
292
+ request_kwargs["headers"] = headers
293
+ if timeout:
294
+ request_kwargs["timeout"] = timeout
295
+
296
+ async for attempt in default_retryer(max_attempts=self._max_retries):
297
+ with attempt:
298
+ response = await self._client.request(method, url, **request_kwargs)
299
+ self._raise_for_status(response, context)
300
+ return response
301
+
302
+ # Should not reach here due to reraise=True in retryer
303
+ raise RuntimeError("Unexpected retry exhaustion")
304
+
305
+ async def _request_json(
306
+ self,
307
+ method: str,
308
+ path: str,
309
+ *,
310
+ params: dict[str, Any] | None = None,
311
+ json: dict[str, Any] | None = None,
312
+ data: dict[str, Any] | None = None,
313
+ content: bytes | None = None,
314
+ headers: dict[str, str] | None = None,
315
+ context: str = "",
316
+ timeout: float | None = None,
317
+ ) -> dict[str, Any]:
318
+ """Make an HTTP request and return JSON response.
319
+
320
+ Args:
321
+ method: HTTP method.
322
+ path: URL path.
323
+ params: Query parameters.
324
+ json: JSON body.
325
+ data: Form-encoded body.
326
+ content: Raw bytes body.
327
+ headers: Per-request header overrides.
328
+ context: Context string for error messages.
329
+ timeout: Optional per-request timeout.
330
+
331
+ Returns:
332
+ Parsed JSON response as a dictionary.
333
+ """
334
+ response = await self._request(
335
+ method,
336
+ path,
337
+ params=params,
338
+ json=json,
339
+ data=data,
340
+ content=content,
341
+ headers=headers,
342
+ context=context,
343
+ timeout=timeout,
344
+ )
345
+ return response.json()
346
+
347
+
348
+ __all__ = ["BaseAsyncClient"]