PyPI - ModelMetre - Versions diffs - 0.1.0__py3-none-any.whl - Mend

ModelMetre 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

llmcost/__init__.py +39 -0
llmcost/api/__init__.py +10 -0
llmcost/api/routes.py +11 -0
llmcost/api/telemetry.py +106 -0
llmcost/auth/__init__.py +1 -0
llmcost/auth/config.py +24 -0
llmcost/client.py +208 -0
llmcost/constants.py +0 -0
llmcost/errors.py +0 -0
llmcost/middleware/__init__.py +0 -0
llmcost/middleware/error_handler.py +0 -0
llmcost/middleware/rate_limit.py +33 -0
llmcost/plugins/__init__.py +0 -0
llmcost/plugins/base_plugin.py +0 -0
llmcost/pricing/__init__.py +41 -0
llmcost/pricing/aggregator.py +228 -0
llmcost/pricing/extractors.py +122 -0
llmcost/pricing/interceptor.py +145 -0
llmcost/sdk.py +143 -0
llmcost/utils/__init__.py +0 -0
llmcost/utils/cache.py +0 -0
llmcost/utils/helpers.py +0 -0
llmcost/utils/http.py +0 -0
llmcost/utils/logger.py +0 -0
llmcost/utils/retry.py +0 -0
llmcost/utils/validation.py +0 -0
modelmetre-0.1.0.dist-info/METADATA +241 -0
modelmetre-0.1.0.dist-info/RECORD +30 -0
modelmetre-0.1.0.dist-info/WHEEL +5 -0
modelmetre-0.1.0.dist-info/top_level.txt +1 -0

llmcost/__init__.py ADDED Viewed

@@ -0,0 +1,39 @@
+#LLM Cost Observability SDK.
+from .client import CostAnalyticsClient,AuthenticationError
+from .sdk import CostAnalyticsSDK, get_sdk
+from .pricing import (
+    CostExtractor,
+    get_extractor,
+    RequestDetailsBuffer,
+    RequestDetails,
+    get_request_buffer,
+    get_cost_aggregator,
+    CostInterceptor,
+    wrap_custom_client,
+    FLUSH_BATCH_SIZE,
+    FLUSH_INTERVAL_SECONDS,
+)
+__version__ = "0.1.0"
+__all__ = [
+    # Authenticated analytics client
+    "CostAnalyticsClient",
+    "AuthenticationError",
+    # Main SDK
+    "CostAnalyticsSDK",
+    "get_sdk",
+    # Extractors
+    "CostExtractor",
+    "get_extractor",
+    # Buffer (replaces aggregator)
+    "RequestDetailsBuffer",
+    "RequestDetails",
+    "get_request_buffer",
+    "get_cost_aggregator",
+    "FLUSH_BATCH_SIZE",
+    "FLUSH_INTERVAL_SECONDS",
+    # Interceptor
+    "CostInterceptor",
+    "wrap_custom_client",
+]

llmcost/api/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+from .routes import AUTH_VERIFY_PATH, CUSTOM_PRICING_PATH, TELEMETRY_FLUSH_PATH
+from .telemetry import DEFAULT_TELEMETRY_PATH, TelemetryClient
+__all__ = [
+    "AUTH_VERIFY_PATH",
+    "CUSTOM_PRICING_PATH",
+    "DEFAULT_TELEMETRY_PATH",
+    "TELEMETRY_FLUSH_PATH",
+    "TelemetryClient",
+]

llmcost/api/routes.py ADDED Viewed

@@ -0,0 +1,11 @@
+"""Backend API route constants used by the client-side SDK."""
+AUTH_VERIFY_PATH = "/v1/auth/verify"
+TELEMETRY_FLUSH_PATH = "/v1/telemetry/flush"
+CUSTOM_PRICING_PATH = "/v1/pricing/custom"
+__all__ = [
+    "AUTH_VERIFY_PATH",
+    "CUSTOM_PRICING_PATH",
+    "TELEMETRY_FLUSH_PATH",
+]

llmcost/api/telemetry.py ADDED Viewed

@@ -0,0 +1,106 @@
+"""Backend telemetry API routes for flushed SDK request batches."""
+from __future__ import annotations
+import logging
+import uuid
+from typing import List, Optional
+import requests
+from .routes import TELEMETRY_FLUSH_PATH
+from ..pricing.aggregator import RequestDetails
+logger = logging.getLogger(__name__)
+DEFAULT_TELEMETRY_PATH = TELEMETRY_FLUSH_PATH
+MAX_FAILED_BATCHES = 5
+class TelemetryClient:
+    """HTTP sender that connects the client-side SDK buffer to the backend."""
+    def __init__(
+        self,
+        server_url: str,
+        endpoint: Optional[str] = None,
+        *,
+        api_key: Optional[str] = None,
+        client_id: Optional[str] = None,
+        timeout: float = 30.0,
+        telemetry_path: Optional[str] = None,
+        session: Optional[requests.Session] = None,
+    ) -> None:
+        self.server_url = server_url.rstrip("/")
+        self.api_key = api_key
+        self.client_id = client_id or str(uuid.uuid4())
+        self.timeout = timeout
+        self.telemetry_path = telemetry_path or endpoint or DEFAULT_TELEMETRY_PATH
+        self.session = session or requests.Session()
+        self._failed_batches: List[List[RequestDetails]] = []
+    @property
+    def flush_url(self) -> str:
+        """Full backend route used for telemetry flushes."""
+        return f"{self.server_url}/{self.telemetry_path.lstrip('/')}"
+    def flush_batch(self, batch: List[RequestDetails]) -> None:
+        """POST a flushed request-details batch to the backend."""
+        if not batch:
+            return
+        pending_batches = [*self._failed_batches, batch]
+        self._failed_batches = []
+        for pending_batch in pending_batches:
+            try:
+                self._post_batch(pending_batch)
+            except Exception as exc:
+                if self._looks_not_received(exc):
+                    try:
+                        self._post_batch(pending_batch)
+                        continue
+                    except Exception as retry_exc:
+                        exc = retry_exc
+                logger.warning("Telemetry flush failed; retaining batch: %s", exc)
+                self._retain_failed_batch(pending_batch)
+    def _post_batch(self, batch: List[RequestDetails]) -> None:
+        payload = {
+            "client_id": self.client_id,
+            "batch": [request.to_dict() for request in batch],
+        }
+        response = self.session.post(
+            self.flush_url,
+            json=payload,
+            headers=self._headers(),
+            timeout=self.timeout,
+        )
+        response.raise_for_status()
+    def _headers(self) -> dict:
+        headers = {
+            "Content-Type": "application/json",
+            "X-Client-ID": self.client_id,
+        }
+        if self.api_key:
+            headers["Authorization"] = f"Bearer {self.api_key}"
+        return headers
+    def _retain_failed_batch(self, batch: List[RequestDetails]) -> None:
+        self._failed_batches.append(batch)
+        if len(self._failed_batches) > MAX_FAILED_BATCHES:
+            self._failed_batches = self._failed_batches[-MAX_FAILED_BATCHES:]
+    def _looks_not_received(self, exc: Exception) -> bool:
+        message = str(exc).lower()
+        return isinstance(exc, requests.ConnectionError) or "not received" in message
+    def close(self) -> None:
+        """Close the underlying HTTP session."""
+        self.session.close()

llmcost/auth/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Auth helpers for API-key based SDK access."""

llmcost/auth/config.py ADDED Viewed

@@ -0,0 +1,24 @@
+# Auth configuration helpers for API-key only setup.
+from __future__ import annotations
+import os
+class ConfigError(RuntimeError):
+    """Raised when required auth configuration is missing or invalid."""
+    pass
+def get_api_key() -> str:
+    """Return the API key from CA_API_KEY."""
+    try:
+        api_key = os.environ["CA_API_KEY"].strip()
+    except KeyError as exc:
+        raise ConfigError("CA_API_KEY is required in the environment") from exc
+    if not api_key:
+        raise ConfigError("CA_API_KEY cannot be empty")
+    return api_key

llmcost/client.py ADDED Viewed

@@ -0,0 +1,208 @@
+#Client-side analytics API client with lazy API-key authentication.
+from __future__ import annotations
+import os
+import time
+import uuid
+from dataclasses import dataclass
+from typing import Any, Callable, Dict, Optional
+import requests
+from .api.routes import AUTH_VERIFY_PATH, CUSTOM_PRICING_PATH
+from .auth.config import ConfigError, get_api_key
+DEFAULT_BASE_URL = "http://localhost:8000"
+DEFAULT_AUTH_PATH = AUTH_VERIFY_PATH
+@dataclass(frozen=True)
+class AuthContext:
+	user_id: str
+	api_key_id: str
+class AuthenticationError(RuntimeError):
+	"""Raised when API-key authentication fails or is missing."""
+class CostAnalyticsClient:
+	"""HTTP client that lazily validates an SDK API key on first use."""
+	def __init__(
+		self,
+		api_key: Optional[str] = None,
+		base_url: Optional[str] = None,
+		timeout: float = 30.0,
+		session: Optional[requests.Session] = None,
+		auth_path: str = DEFAULT_AUTH_PATH,
+		max_retries: int = 3,
+		backoff_factor: float = 0.5,
+		request_id_factory: Optional[Callable[[], str]] = None,
+	) -> None:
+		"""Initialize the client without forcing network validation."""
+		try:
+			self.api_key = api_key or get_api_key()
+		except ConfigError as exc:
+			raise AuthenticationError(str(exc)) from exc
+		self.base_url = (base_url or os.getenv("CA_API_BASE_URL", DEFAULT_BASE_URL)).rstrip("/")
+		self.timeout = timeout
+		self.session = session or requests.Session()
+		self.auth_path = auth_path
+		self.max_retries = max(1, max_retries)
+		self.backoff_factor = backoff_factor
+		self._auth_context: Optional[AuthContext] = None
+		self._authenticated = False
+		self._request_id_factory = request_id_factory or (lambda: str(uuid.uuid4()))
+	def _ensure_authenticated(self) -> AuthContext:
+		"""Validate the API key only once, on the first real request."""
+		if self._authenticated and self._auth_context is not None:
+			return self._auth_context
+		if not self.api_key:
+			raise AuthenticationError("CA_API_KEY is required")
+		if not self.api_key.startswith("ca_live_"):
+			raise AuthenticationError("Invalid API key format")
+		url = f"{self.base_url}{self.auth_path}"
+		response = self.session.get(
+			url,
+			headers={"Authorization": f"Bearer {self.api_key}"},
+			timeout=self.timeout,
+		)
+		if response.status_code in (401, 403):
+			# Never retry auth failures; they are not transient.
+			error_payload = self._safe_json(response)
+			raise AuthenticationError(error_payload.get("error", "invalid_api_key"))
+		response.raise_for_status()
+		payload = self._safe_json(response)
+		user_id = payload.get("user_id")
+		api_key_id = payload.get("api_key_id")
+		if not user_id or not api_key_id:
+			raise AuthenticationError("Authentication response missing identity fields")
+		self._auth_context = AuthContext(user_id=str(user_id), api_key_id=str(api_key_id))
+		self._authenticated = True
+		return self._auth_context
+	def _safe_json(self, response: requests.Response) -> Dict[str, Any]:
+		"""Parse JSON without leaking raw response bodies into exception traces."""
+		try:
+			data = response.json()
+			return data if isinstance(data, dict) else {}
+		except ValueError:
+			return {}
+	def _request_headers(
+		self,
+		*,
+		provider: Optional[str] = None,
+		model: Optional[str] = None,
+		request_id: Optional[str] = None,
+	) -> Dict[str, str]:
+		"""Attach request metadata to every analytics call."""
+		auth_context = self._ensure_authenticated()
+		return {
+			"Authorization": f"Bearer {self.api_key}",
+			"X-CA-Key-Id": auth_context.api_key_id,
+			"X-CA-User-Id": auth_context.user_id,
+			"X-Request-Id": request_id or self._request_id_factory(),
+			"X-CA-Provider": provider or "",
+			"X-CA-Model": model or "",
+		}
+	def request(
+		self,
+		method: str,
+		path: str,
+		*,
+		json: Optional[Dict[str, Any]] = None,
+		params: Optional[Dict[str, Any]] = None,
+		provider: Optional[str] = None,
+		model: Optional[str] = None,
+		request_id: Optional[str] = None,
+	) -> requests.Response:
+		"""Send an authenticated request with 5xx retry only."""
+		url = f"{self.base_url}/{path.lstrip('/')}"
+		headers = self._request_headers(provider=provider, model=model, request_id=request_id)
+		for attempt in range(self.max_retries):
+			response = self.session.request(
+				method=method,
+				url=url,
+				headers=headers,
+				json=json,
+				params=params,
+				timeout=self.timeout,
+			)
+			if response.status_code in (401, 403):
+				# Authentication and authorization errors must fail fast.
+				error_payload = self._safe_json(response)
+				raise AuthenticationError(error_payload.get("error", "invalid_api_key"))
+			if response.status_code < 500:
+				return response
+			if attempt == self.max_retries - 1:
+				response.raise_for_status()
+			time.sleep(self.backoff_factor * (2**attempt))
+		raise RuntimeError("Request retry loop exited unexpectedly")
+	def submit_custom_pricing(
+		self,
+		*,
+		model: str,
+		provider: str,
+		input_cost_per_1m_tokens: float,
+		output_cost_per_1m_tokens: float,
+		cache_creation_cost_per_1m_tokens: Optional[float] = None,
+		cache_read_cost_per_1m_tokens: Optional[float] = None,
+		source: Optional[str] = None,
+		currency: str = "USD",
+		path: str = CUSTOM_PRICING_PATH,
+	) -> requests.Response:
+		"""Send client-supplied pricing data to the server for this account."""
+		payload: Dict[str, Any] = {
+			"model": model,
+			"provider": provider,
+			"input_cost_per_1m_tokens": input_cost_per_1m_tokens,
+			"output_cost_per_1m_tokens": output_cost_per_1m_tokens,
+			"currency": currency,
+		}
+		if cache_creation_cost_per_1m_tokens is not None:
+			payload["cache_creation_cost_per_1m_tokens"] = cache_creation_cost_per_1m_tokens
+		if cache_read_cost_per_1m_tokens is not None:
+			payload["cache_read_cost_per_1m_tokens"] = cache_read_cost_per_1m_tokens
+		if source is not None:
+			payload["source"] = source
+		return self.request(
+			"POST",
+			path,
+			json=payload,
+			provider=provider,
+			model=model,
+		)
+	def close(self) -> None:
+		"""Close the underlying HTTP session."""
+		self.session.close()

llmcost/constants.py ADDED Viewed

File without changes

llmcost/errors.py ADDED Viewed

File without changes

llmcost/middleware/__init__.py ADDED Viewed

File without changes

llmcost/middleware/error_handler.py ADDED Viewed

File without changes

llmcost/middleware/rate_limit.py ADDED Viewed

@@ -0,0 +1,33 @@
+# _internal/middleware/rate_limit.py
+import time
+import threading
+class TokenBucket:
+    def __init__(self, capacity: int, refill_rate: float):
+        """
+        capacity    : max burst allowed ( 5 flushes)
+        refill_rate : tokens added per second (  = 1 token per 10s)
+        """
+        self.capacity = capacity
+        self.tokens = capacity       # start full
+        self.refill_rate = refill_rate
+        self._last = time.monotonic()
+        self._lock = threading.Lock()
+    def acquire(self) -> bool:
+        with self._lock:
+            now = time.monotonic()
+            elapsed = now - self._last
+            # refill proportional to time passed
+            self.tokens = min(
+                self.capacity,
+                self.tokens + elapsed * self.refill_rate
+            )
+            self._last = now
+            if self.tokens >= 1:
+                self.tokens -= 1
+                return True
+            return False

llmcost/plugins/__init__.py ADDED Viewed

File without changes

llmcost/plugins/base_plugin.py ADDED Viewed

File without changes

llmcost/pricing/__init__.py ADDED Viewed

@@ -0,0 +1,41 @@
+#Cost analytics and pricing module.
+from .extractors import (
+    UsageBreakdown,
+    UsageExtractor,
+    Extractor,
+    get_extractor,
+)
+from .aggregator import (
+    RequestDetailsBuffer,
+    RequestDetails,
+    get_request_buffer,
+    get_cost_aggregator,
+    FLUSH_BATCH_SIZE,
+    FLUSH_INTERVAL_SECONDS,
+)
+from .interceptor import (
+    CostInterceptor,
+    wrap_custom_client,
+)
+CostExtractor = UsageExtractor
+__all__ = [
+    # Extractors
+    "CostExtractor",
+    "UsageBreakdown",
+    "UsageExtractor",
+    "Extractor",
+    "get_extractor",
+    # Buffer (replaces aggregator)
+    "RequestDetailsBuffer",
+    "RequestDetails",
+    "get_request_buffer",
+    "get_cost_aggregator",
+    "FLUSH_BATCH_SIZE",
+    "FLUSH_INTERVAL_SECONDS",
+    # Interceptor
+    "CostInterceptor",
+    "wrap_custom_client",
+]