ModelMetre 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llmcost/__init__.py ADDED
@@ -0,0 +1,39 @@
1
+ #LLM Cost Observability SDK.
2
+
3
+ from .client import CostAnalyticsClient,AuthenticationError
4
+ from .sdk import CostAnalyticsSDK, get_sdk
5
+ from .pricing import (
6
+ CostExtractor,
7
+ get_extractor,
8
+ RequestDetailsBuffer,
9
+ RequestDetails,
10
+ get_request_buffer,
11
+ get_cost_aggregator,
12
+ CostInterceptor,
13
+ wrap_custom_client,
14
+ FLUSH_BATCH_SIZE,
15
+ FLUSH_INTERVAL_SECONDS,
16
+ )
17
+
18
+ __version__ = "0.1.0"
19
+ __all__ = [
20
+ # Authenticated analytics client
21
+ "CostAnalyticsClient",
22
+ "AuthenticationError",
23
+ # Main SDK
24
+ "CostAnalyticsSDK",
25
+ "get_sdk",
26
+ # Extractors
27
+ "CostExtractor",
28
+ "get_extractor",
29
+ # Buffer (replaces aggregator)
30
+ "RequestDetailsBuffer",
31
+ "RequestDetails",
32
+ "get_request_buffer",
33
+ "get_cost_aggregator",
34
+ "FLUSH_BATCH_SIZE",
35
+ "FLUSH_INTERVAL_SECONDS",
36
+ # Interceptor
37
+ "CostInterceptor",
38
+ "wrap_custom_client",
39
+ ]
@@ -0,0 +1,10 @@
1
+ from .routes import AUTH_VERIFY_PATH, CUSTOM_PRICING_PATH, TELEMETRY_FLUSH_PATH
2
+ from .telemetry import DEFAULT_TELEMETRY_PATH, TelemetryClient
3
+
4
+ __all__ = [
5
+ "AUTH_VERIFY_PATH",
6
+ "CUSTOM_PRICING_PATH",
7
+ "DEFAULT_TELEMETRY_PATH",
8
+ "TELEMETRY_FLUSH_PATH",
9
+ "TelemetryClient",
10
+ ]
llmcost/api/routes.py ADDED
@@ -0,0 +1,11 @@
1
+ """Backend API route constants used by the client-side SDK."""
2
+
3
+ AUTH_VERIFY_PATH = "/v1/auth/verify"
4
+ TELEMETRY_FLUSH_PATH = "/v1/telemetry/flush"
5
+ CUSTOM_PRICING_PATH = "/v1/pricing/custom"
6
+
7
+ __all__ = [
8
+ "AUTH_VERIFY_PATH",
9
+ "CUSTOM_PRICING_PATH",
10
+ "TELEMETRY_FLUSH_PATH",
11
+ ]
@@ -0,0 +1,106 @@
1
+ """Backend telemetry API routes for flushed SDK request batches."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import uuid
7
+ from typing import List, Optional
8
+
9
+ import requests
10
+
11
+ from .routes import TELEMETRY_FLUSH_PATH
12
+ from ..pricing.aggregator import RequestDetails
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ DEFAULT_TELEMETRY_PATH = TELEMETRY_FLUSH_PATH
17
+ MAX_FAILED_BATCHES = 5
18
+
19
+
20
+ class TelemetryClient:
21
+ """HTTP sender that connects the client-side SDK buffer to the backend."""
22
+
23
+ def __init__(
24
+ self,
25
+ server_url: str,
26
+ endpoint: Optional[str] = None,
27
+ *,
28
+ api_key: Optional[str] = None,
29
+ client_id: Optional[str] = None,
30
+ timeout: float = 30.0,
31
+ telemetry_path: Optional[str] = None,
32
+ session: Optional[requests.Session] = None,
33
+ ) -> None:
34
+ self.server_url = server_url.rstrip("/")
35
+ self.api_key = api_key
36
+ self.client_id = client_id or str(uuid.uuid4())
37
+ self.timeout = timeout
38
+ self.telemetry_path = telemetry_path or endpoint or DEFAULT_TELEMETRY_PATH
39
+ self.session = session or requests.Session()
40
+ self._failed_batches: List[List[RequestDetails]] = []
41
+
42
+ @property
43
+ def flush_url(self) -> str:
44
+ """Full backend route used for telemetry flushes."""
45
+
46
+ return f"{self.server_url}/{self.telemetry_path.lstrip('/')}"
47
+
48
+ def flush_batch(self, batch: List[RequestDetails]) -> None:
49
+ """POST a flushed request-details batch to the backend."""
50
+
51
+ if not batch:
52
+ return
53
+
54
+ pending_batches = [*self._failed_batches, batch]
55
+ self._failed_batches = []
56
+
57
+ for pending_batch in pending_batches:
58
+ try:
59
+ self._post_batch(pending_batch)
60
+ except Exception as exc:
61
+ if self._looks_not_received(exc):
62
+ try:
63
+ self._post_batch(pending_batch)
64
+ continue
65
+ except Exception as retry_exc:
66
+ exc = retry_exc
67
+
68
+ logger.warning("Telemetry flush failed; retaining batch: %s", exc)
69
+ self._retain_failed_batch(pending_batch)
70
+
71
+ def _post_batch(self, batch: List[RequestDetails]) -> None:
72
+ payload = {
73
+ "client_id": self.client_id,
74
+ "batch": [request.to_dict() for request in batch],
75
+ }
76
+
77
+ response = self.session.post(
78
+ self.flush_url,
79
+ json=payload,
80
+ headers=self._headers(),
81
+ timeout=self.timeout,
82
+ )
83
+ response.raise_for_status()
84
+
85
+ def _headers(self) -> dict:
86
+ headers = {
87
+ "Content-Type": "application/json",
88
+ "X-Client-ID": self.client_id,
89
+ }
90
+ if self.api_key:
91
+ headers["Authorization"] = f"Bearer {self.api_key}"
92
+ return headers
93
+
94
+ def _retain_failed_batch(self, batch: List[RequestDetails]) -> None:
95
+ self._failed_batches.append(batch)
96
+ if len(self._failed_batches) > MAX_FAILED_BATCHES:
97
+ self._failed_batches = self._failed_batches[-MAX_FAILED_BATCHES:]
98
+
99
+ def _looks_not_received(self, exc: Exception) -> bool:
100
+ message = str(exc).lower()
101
+ return isinstance(exc, requests.ConnectionError) or "not received" in message
102
+
103
+ def close(self) -> None:
104
+ """Close the underlying HTTP session."""
105
+
106
+ self.session.close()
@@ -0,0 +1 @@
1
+ """Auth helpers for API-key based SDK access."""
llmcost/auth/config.py ADDED
@@ -0,0 +1,24 @@
1
+ # Auth configuration helpers for API-key only setup.
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+
7
+
8
+ class ConfigError(RuntimeError):
9
+ """Raised when required auth configuration is missing or invalid."""
10
+ pass
11
+
12
+
13
+ def get_api_key() -> str:
14
+ """Return the API key from CA_API_KEY."""
15
+
16
+ try:
17
+ api_key = os.environ["CA_API_KEY"].strip()
18
+ except KeyError as exc:
19
+ raise ConfigError("CA_API_KEY is required in the environment") from exc
20
+
21
+ if not api_key:
22
+ raise ConfigError("CA_API_KEY cannot be empty")
23
+
24
+ return api_key
llmcost/client.py ADDED
@@ -0,0 +1,208 @@
1
+ #Client-side analytics API client with lazy API-key authentication.
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import time
7
+ import uuid
8
+ from dataclasses import dataclass
9
+ from typing import Any, Callable, Dict, Optional
10
+
11
+ import requests
12
+
13
+ from .api.routes import AUTH_VERIFY_PATH, CUSTOM_PRICING_PATH
14
+ from .auth.config import ConfigError, get_api_key
15
+
16
+
17
+ DEFAULT_BASE_URL = "http://localhost:8000"
18
+ DEFAULT_AUTH_PATH = AUTH_VERIFY_PATH
19
+
20
+
21
+ @dataclass(frozen=True)
22
+ class AuthContext:
23
+
24
+ user_id: str
25
+ api_key_id: str
26
+
27
+
28
+ class AuthenticationError(RuntimeError):
29
+ """Raised when API-key authentication fails or is missing."""
30
+
31
+
32
+ class CostAnalyticsClient:
33
+ """HTTP client that lazily validates an SDK API key on first use."""
34
+
35
+ def __init__(
36
+ self,
37
+ api_key: Optional[str] = None,
38
+ base_url: Optional[str] = None,
39
+ timeout: float = 30.0,
40
+ session: Optional[requests.Session] = None,
41
+ auth_path: str = DEFAULT_AUTH_PATH,
42
+ max_retries: int = 3,
43
+ backoff_factor: float = 0.5,
44
+ request_id_factory: Optional[Callable[[], str]] = None,
45
+ ) -> None:
46
+ """Initialize the client without forcing network validation."""
47
+
48
+ try:
49
+ self.api_key = api_key or get_api_key()
50
+ except ConfigError as exc:
51
+ raise AuthenticationError(str(exc)) from exc
52
+ self.base_url = (base_url or os.getenv("CA_API_BASE_URL", DEFAULT_BASE_URL)).rstrip("/")
53
+ self.timeout = timeout
54
+ self.session = session or requests.Session()
55
+ self.auth_path = auth_path
56
+ self.max_retries = max(1, max_retries)
57
+ self.backoff_factor = backoff_factor
58
+ self._auth_context: Optional[AuthContext] = None
59
+ self._authenticated = False
60
+ self._request_id_factory = request_id_factory or (lambda: str(uuid.uuid4()))
61
+
62
+ def _ensure_authenticated(self) -> AuthContext:
63
+ """Validate the API key only once, on the first real request."""
64
+
65
+ if self._authenticated and self._auth_context is not None:
66
+ return self._auth_context
67
+
68
+ if not self.api_key:
69
+ raise AuthenticationError("CA_API_KEY is required")
70
+
71
+ if not self.api_key.startswith("ca_live_"):
72
+ raise AuthenticationError("Invalid API key format")
73
+
74
+ url = f"{self.base_url}{self.auth_path}"
75
+ response = self.session.get(
76
+ url,
77
+ headers={"Authorization": f"Bearer {self.api_key}"},
78
+ timeout=self.timeout,
79
+ )
80
+
81
+ if response.status_code in (401, 403):
82
+ # Never retry auth failures; they are not transient.
83
+ error_payload = self._safe_json(response)
84
+ raise AuthenticationError(error_payload.get("error", "invalid_api_key"))
85
+
86
+ response.raise_for_status()
87
+ payload = self._safe_json(response)
88
+
89
+ user_id = payload.get("user_id")
90
+ api_key_id = payload.get("api_key_id")
91
+ if not user_id or not api_key_id:
92
+ raise AuthenticationError("Authentication response missing identity fields")
93
+
94
+ self._auth_context = AuthContext(user_id=str(user_id), api_key_id=str(api_key_id))
95
+ self._authenticated = True
96
+ return self._auth_context
97
+
98
+ def _safe_json(self, response: requests.Response) -> Dict[str, Any]:
99
+ """Parse JSON without leaking raw response bodies into exception traces."""
100
+
101
+ try:
102
+ data = response.json()
103
+ return data if isinstance(data, dict) else {}
104
+ except ValueError:
105
+ return {}
106
+
107
+ def _request_headers(
108
+ self,
109
+ *,
110
+ provider: Optional[str] = None,
111
+ model: Optional[str] = None,
112
+ request_id: Optional[str] = None,
113
+ ) -> Dict[str, str]:
114
+ """Attach request metadata to every analytics call."""
115
+
116
+ auth_context = self._ensure_authenticated()
117
+ return {
118
+ "Authorization": f"Bearer {self.api_key}",
119
+ "X-CA-Key-Id": auth_context.api_key_id,
120
+ "X-CA-User-Id": auth_context.user_id,
121
+ "X-Request-Id": request_id or self._request_id_factory(),
122
+ "X-CA-Provider": provider or "",
123
+ "X-CA-Model": model or "",
124
+ }
125
+
126
+ def request(
127
+ self,
128
+ method: str,
129
+ path: str,
130
+ *,
131
+ json: Optional[Dict[str, Any]] = None,
132
+ params: Optional[Dict[str, Any]] = None,
133
+ provider: Optional[str] = None,
134
+ model: Optional[str] = None,
135
+ request_id: Optional[str] = None,
136
+ ) -> requests.Response:
137
+ """Send an authenticated request with 5xx retry only."""
138
+
139
+ url = f"{self.base_url}/{path.lstrip('/')}"
140
+ headers = self._request_headers(provider=provider, model=model, request_id=request_id)
141
+
142
+ for attempt in range(self.max_retries):
143
+ response = self.session.request(
144
+ method=method,
145
+ url=url,
146
+ headers=headers,
147
+ json=json,
148
+ params=params,
149
+ timeout=self.timeout,
150
+ )
151
+
152
+ if response.status_code in (401, 403):
153
+ # Authentication and authorization errors must fail fast.
154
+ error_payload = self._safe_json(response)
155
+ raise AuthenticationError(error_payload.get("error", "invalid_api_key"))
156
+
157
+ if response.status_code < 500:
158
+ return response
159
+
160
+ if attempt == self.max_retries - 1:
161
+ response.raise_for_status()
162
+
163
+ time.sleep(self.backoff_factor * (2**attempt))
164
+
165
+ raise RuntimeError("Request retry loop exited unexpectedly")
166
+
167
+ def submit_custom_pricing(
168
+ self,
169
+ *,
170
+ model: str,
171
+ provider: str,
172
+ input_cost_per_1m_tokens: float,
173
+ output_cost_per_1m_tokens: float,
174
+ cache_creation_cost_per_1m_tokens: Optional[float] = None,
175
+ cache_read_cost_per_1m_tokens: Optional[float] = None,
176
+ source: Optional[str] = None,
177
+ currency: str = "USD",
178
+ path: str = CUSTOM_PRICING_PATH,
179
+ ) -> requests.Response:
180
+ """Send client-supplied pricing data to the server for this account."""
181
+
182
+ payload: Dict[str, Any] = {
183
+ "model": model,
184
+ "provider": provider,
185
+ "input_cost_per_1m_tokens": input_cost_per_1m_tokens,
186
+ "output_cost_per_1m_tokens": output_cost_per_1m_tokens,
187
+ "currency": currency,
188
+ }
189
+ if cache_creation_cost_per_1m_tokens is not None:
190
+ payload["cache_creation_cost_per_1m_tokens"] = cache_creation_cost_per_1m_tokens
191
+ if cache_read_cost_per_1m_tokens is not None:
192
+ payload["cache_read_cost_per_1m_tokens"] = cache_read_cost_per_1m_tokens
193
+ if source is not None:
194
+ payload["source"] = source
195
+
196
+ return self.request(
197
+ "POST",
198
+ path,
199
+ json=payload,
200
+ provider=provider,
201
+ model=model,
202
+ )
203
+
204
+ def close(self) -> None:
205
+ """Close the underlying HTTP session."""
206
+
207
+ self.session.close()
208
+
llmcost/constants.py ADDED
File without changes
llmcost/errors.py ADDED
File without changes
File without changes
File without changes
@@ -0,0 +1,33 @@
1
+ # _internal/middleware/rate_limit.py
2
+
3
+ import time
4
+ import threading
5
+
6
+ class TokenBucket:
7
+ def __init__(self, capacity: int, refill_rate: float):
8
+ """
9
+ capacity : max burst allowed ( 5 flushes)
10
+ refill_rate : tokens added per second ( = 1 token per 10s)
11
+ """
12
+ self.capacity = capacity
13
+ self.tokens = capacity # start full
14
+ self.refill_rate = refill_rate
15
+ self._last = time.monotonic()
16
+ self._lock = threading.Lock()
17
+
18
+ def acquire(self) -> bool:
19
+ with self._lock:
20
+ now = time.monotonic()
21
+ elapsed = now - self._last
22
+
23
+ # refill proportional to time passed
24
+ self.tokens = min(
25
+ self.capacity,
26
+ self.tokens + elapsed * self.refill_rate
27
+ )
28
+ self._last = now
29
+
30
+ if self.tokens >= 1:
31
+ self.tokens -= 1
32
+ return True
33
+ return False
File without changes
File without changes
@@ -0,0 +1,41 @@
1
+ #Cost analytics and pricing module.
2
+
3
+ from .extractors import (
4
+ UsageBreakdown,
5
+ UsageExtractor,
6
+ Extractor,
7
+ get_extractor,
8
+ )
9
+ from .aggregator import (
10
+ RequestDetailsBuffer,
11
+ RequestDetails,
12
+ get_request_buffer,
13
+ get_cost_aggregator,
14
+ FLUSH_BATCH_SIZE,
15
+ FLUSH_INTERVAL_SECONDS,
16
+ )
17
+ from .interceptor import (
18
+ CostInterceptor,
19
+ wrap_custom_client,
20
+ )
21
+
22
+ CostExtractor = UsageExtractor
23
+
24
+ __all__ = [
25
+ # Extractors
26
+ "CostExtractor",
27
+ "UsageBreakdown",
28
+ "UsageExtractor",
29
+ "Extractor",
30
+ "get_extractor",
31
+ # Buffer (replaces aggregator)
32
+ "RequestDetailsBuffer",
33
+ "RequestDetails",
34
+ "get_request_buffer",
35
+ "get_cost_aggregator",
36
+ "FLUSH_BATCH_SIZE",
37
+ "FLUSH_INTERVAL_SECONDS",
38
+ # Interceptor
39
+ "CostInterceptor",
40
+ "wrap_custom_client",
41
+ ]