PyPI - eval-hub-sdk - Versions diffs - 0.1.0a0__py3-none-any.whl - Mend

eval-hub-sdk 0.1.0a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

eval_hub_sdk-0.1.0a0.dist-info/METADATA +711 -0
eval_hub_sdk-0.1.0a0.dist-info/RECORD +27 -0
eval_hub_sdk-0.1.0a0.dist-info/WHEEL +5 -0
eval_hub_sdk-0.1.0a0.dist-info/entry_points.txt +2 -0
eval_hub_sdk-0.1.0a0.dist-info/licenses/LICENSE +201 -0
eval_hub_sdk-0.1.0a0.dist-info/top_level.txt +1 -0
evalhub/__init__.py +84 -0
evalhub/adapter/__init__.py +28 -0
evalhub/adapter/api/__init__.py +6 -0
evalhub/adapter/api/endpoints.py +342 -0
evalhub/adapter/api/router.py +135 -0
evalhub/adapter/cli.py +331 -0
evalhub/adapter/client/__init__.py +6 -0
evalhub/adapter/client/adapter_client.py +418 -0
evalhub/adapter/client/discovery.py +275 -0
evalhub/adapter/models/__init__.py +9 -0
evalhub/adapter/models/framework.py +404 -0
evalhub/adapter/oci/__init__.py +5 -0
evalhub/adapter/oci/persister.py +76 -0
evalhub/adapter/server/__init__.py +5 -0
evalhub/adapter/server/app.py +157 -0
evalhub/cli.py +331 -0
evalhub/models/__init__.py +32 -0
evalhub/models/api.py +388 -0
evalhub/py.typed +0 -0
evalhub/utils/__init__.py +5 -0
evalhub/utils/logging.py +41 -0

evalhub/adapter/client/adapter_client.py ADDED Viewed

@@ -0,0 +1,418 @@
+"""Client for communicating with framework adapters via the standard SDK API."""
+import logging
+from collections.abc import AsyncGenerator
+from typing import Any
+# typing imports removed - using PEP 604 union syntax
+import httpx
+from ...models.api import (
+    BenchmarkInfo,
+    EvaluationJob,
+    EvaluationRequest,
+    EvaluationResponse,
+    FrameworkInfo,
+    HealthResponse,
+    JobStatus,
+    OCICoordinate,
+    PersistResponse,
+)
+logger = logging.getLogger(__name__)
+class ClientError(Exception):
+    """Base exception for client errors."""
+    def __init__(self, message: str, cause: Exception | None = None) -> None:
+        super().__init__(message)
+        self.cause = cause
+class AdapterClient:
+    """Client for communicating with framework adapters.
+    This client provides a standardized way for EvalHub to communicate
+    with any framework adapter that implements the SDK API.
+    """
+    def __init__(self, base_url: str, timeout: float = 30.0, max_retries: int = 3):
+        """Initialize the adapter client.
+        Args:
+            base_url: Base URL of the framework adapter (e.g., "http://adapter:8080")
+            timeout: Request timeout in seconds
+            max_retries: Maximum number of retry attempts
+        """
+        self.base_url = base_url.rstrip("/")
+        self.api_base = f"{self.base_url}/api/v1"
+        self._client = httpx.AsyncClient(
+            timeout=httpx.Timeout(timeout),
+            limits=httpx.Limits(max_connections=20, max_keepalive_connections=5),
+        )
+        self.max_retries = max_retries
+    async def close(self) -> None:
+        """Close the HTTP client."""
+        await self._client.aclose()
+    async def __aenter__(self) -> "AdapterClient":
+        """Async context manager entry."""
+        return self
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_val: BaseException | None,
+        exc_tb: Any,
+    ) -> None:
+        """Async context manager exit."""
+        await self.close()
+    async def _request(self, method: str, path: str, **kwargs: Any) -> httpx.Response:
+        """Make HTTP request with retry logic.
+        Args:
+            method: HTTP method
+            path: API path (without base URL)
+            **kwargs: Additional arguments for httpx
+        Returns:
+            httpx.Response: Response object
+        Raises:
+            httpx.HTTPError: If request fails after retries
+        """
+        url = f"{self.api_base}{path}"
+        for attempt in range(self.max_retries + 1):
+            try:
+                response = await self._client.request(method, url, **kwargs)
+                response.raise_for_status()
+                return response
+            except httpx.TimeoutException:
+                if attempt == self.max_retries:
+                    logger.error(
+                        f"Request to {url} timed out after {self.max_retries} retries"
+                    )
+                    raise
+                logger.warning(
+                    f"Request to {url} timed out, retrying ({attempt + 1}/{self.max_retries})"
+                )
+            except httpx.HTTPStatusError as e:
+                # Don't retry client errors (4xx), only server errors (5xx)
+                if e.response.status_code < 500 or attempt == self.max_retries:
+                    raise
+                logger.warning(
+                    f"Server error {e.response.status_code} for {url}, retrying ({attempt + 1}/{self.max_retries})"
+                )
+            except httpx.RequestError as e:
+                if attempt == self.max_retries:
+                    logger.error(
+                        f"Connection error to {url} after {self.max_retries} retries: {e}"
+                    )
+                    raise
+                logger.warning(
+                    f"Connection error to {url}, retrying ({attempt + 1}/{self.max_retries}): {e}"
+                )
+        # This should never be reached, but mypy needs a return
+        raise RuntimeError("Request retry loop completed without returning")
+    # Health and Info endpoints
+    async def health_check(self) -> HealthResponse:
+        """Check the health of the framework adapter.
+        Returns:
+            HealthResponse: Current health status
+        Raises:
+            httpx.HTTPError: If health check fails
+        """
+        response = await self._request("GET", "/health")
+        return HealthResponse(**response.json())
+    async def get_framework_info(self) -> FrameworkInfo:
+        """Get information about the framework adapter.
+        Returns:
+            FrameworkInfo: Framework capabilities and metadata
+        Raises:
+            httpx.HTTPError: If request fails
+        """
+        response = await self._request("GET", "/info")
+        return FrameworkInfo(**response.json())
+    # Benchmark endpoints
+    async def list_benchmarks(self) -> list[BenchmarkInfo]:
+        """List all available benchmarks.
+        Returns:
+            List[BenchmarkInfo]: Available benchmarks
+        Raises:
+            httpx.HTTPError: If request fails
+        """
+        response = await self._request("GET", "/benchmarks")
+        return [BenchmarkInfo(**benchmark) for benchmark in response.json()]
+    async def get_benchmark_info(self, benchmark_id: str) -> BenchmarkInfo:
+        """Get detailed information about a specific benchmark.
+        Args:
+            benchmark_id: The benchmark identifier
+        Returns:
+            BenchmarkInfo: Benchmark information
+        Raises:
+            httpx.HTTPError: If benchmark not found or request fails
+        """
+        response = await self._request("GET", f"/benchmarks/{benchmark_id}")
+        return BenchmarkInfo(**response.json())
+    # Evaluation endpoints
+    async def submit_evaluation(self, request: EvaluationRequest) -> EvaluationJob:
+        """Submit an evaluation job.
+        Args:
+            request: The evaluation request
+        Returns:
+            EvaluationJob: The submitted job
+        Raises:
+            httpx.HTTPError: If request fails or is invalid
+        """
+        response = await self._request(
+            "POST", "/evaluations", json=request.model_dump()
+        )
+        return EvaluationJob(**response.json())
+    async def get_job_status(self, job_id: str) -> EvaluationJob:
+        """Get the status of an evaluation job.
+        Args:
+            job_id: The job identifier
+        Returns:
+            EvaluationJob: Current job status
+        Raises:
+            httpx.HTTPError: If job not found or request fails
+        """
+        response = await self._request("GET", f"/evaluations/{job_id}")
+        return EvaluationJob(**response.json())
+    async def get_evaluation_results(self, job_id: str) -> EvaluationResponse:
+        """Get the results of a completed evaluation.
+        Args:
+            job_id: The job identifier
+        Returns:
+            EvaluationResponse: Evaluation results
+        Raises:
+            httpx.HTTPError: If results not available or request fails
+        """
+        response = await self._request("GET", f"/evaluations/{job_id}/results")
+        return EvaluationResponse(**response.json())
+    async def cancel_job(self, job_id: str) -> bool:
+        """Cancel an evaluation job.
+        Args:
+            job_id: The job identifier
+        Returns:
+            bool: True if job was cancelled
+        Raises:
+            httpx.HTTPError: If request fails
+        """
+        try:
+            await self._request("DELETE", f"/evaluations/{job_id}")
+            return True
+        except httpx.HTTPStatusError as e:
+            if e.response.status_code == 404:
+                return False  # Job not found
+            elif e.response.status_code == 409:
+                return False  # Job cannot be cancelled
+            else:
+                raise
+    async def list_jobs(
+        self, status: JobStatus | None = None, limit: int | None = None
+    ) -> list[EvaluationJob]:
+        """List evaluation jobs.
+        Args:
+            status: Filter by job status
+            limit: Maximum number of jobs to return
+        Returns:
+            List[EvaluationJob]: List of jobs
+        Raises:
+            httpx.HTTPError: If request fails
+        """
+        params = {}
+        if status:
+            params["status"] = status.value
+        if limit:
+            params["limit"] = str(limit)
+        response = await self._request("GET", "/evaluations", params=params)
+        return [EvaluationJob(**job) for job in response.json()]
+    async def stream_job_updates(
+        self, job_id: str
+    ) -> AsyncGenerator[EvaluationJob, None]:
+        """Stream real-time updates for an evaluation job.
+        Args:
+            job_id: The job identifier
+        Yields:
+            EvaluationJob: Updated job status
+        Raises:
+            httpx.HTTPError: If streaming fails
+        """
+        url = f"{self.api_base}/evaluations/{job_id}/stream"
+        try:
+            async with self._client.stream("GET", url) as response:
+                response.raise_for_status()
+                async for line in response.aiter_lines():
+                    if line.startswith("data: "):
+                        data = line[6:]  # Remove "data: " prefix
+                        if data.strip():
+                            try:
+                                import json
+                                job_data = json.loads(data)
+                                yield EvaluationJob(**job_data)
+                            except Exception as e:
+                                logger.warning(f"Failed to parse streaming data: {e}")
+        except httpx.HTTPError:
+            # Fall back to polling if streaming is not supported
+            logger.info(
+                f"Streaming not available for {job_id}, falling back to polling"
+            )
+            async for job_update in self._poll_job_updates(job_id):
+                yield job_update
+    async def _poll_job_updates(
+        self, job_id: str, interval: float = 2.0
+    ) -> AsyncGenerator[EvaluationJob, None]:
+        """Poll for job updates (fallback for streaming).
+        Args:
+            job_id: The job identifier
+            interval: Polling interval in seconds
+        Yields:
+            EvaluationJob: Updated job status
+        """
+        import asyncio
+        while True:
+            try:
+                job = await self.get_job_status(job_id)
+                yield job
+                if job.status in [
+                    JobStatus.COMPLETED,
+                    JobStatus.FAILED,
+                    JobStatus.CANCELLED,
+                ]:
+                    break
+                await asyncio.sleep(interval)
+            except httpx.HTTPStatusError as e:
+                if e.response.status_code == 404:
+                    break  # Job not found
+                else:
+                    raise
+    async def wait_for_completion(
+        self, job_id: str, timeout: float | None = None, poll_interval: float = 5.0
+    ) -> EvaluationJob:
+        """Wait for an evaluation job to complete.
+        Args:
+            job_id: The job identifier
+            timeout: Maximum time to wait in seconds
+            poll_interval: Polling interval in seconds
+        Returns:
+            EvaluationJob: Final job status
+        Raises:
+            TimeoutError: If job doesn't complete within timeout
+            httpx.HTTPError: If request fails
+        """
+        import asyncio
+        import time
+        start_time = time.time()
+        while True:
+            job = await self.get_job_status(job_id)
+            if job.status in [
+                JobStatus.COMPLETED,
+                JobStatus.FAILED,
+                JobStatus.CANCELLED,
+            ]:
+                return job
+            if timeout and (time.time() - start_time) > timeout:
+                raise TimeoutError(
+                    f"Job {job_id} did not complete within {timeout} seconds"
+                )
+            await asyncio.sleep(poll_interval)
+    async def persist_job_files(
+        self, job_id: str, coordinate: OCICoordinate
+    ) -> PersistResponse:
+        """Persist job files as OCI artifact.
+        Args:
+            job_id: The job identifier
+            coordinate: OCI coordinates (reference and optional subject)
+        Returns:
+            PersistResponse: Persistence status and artifact information
+        Raises:
+            httpx.HTTPError: If request fails
+            ClientError: If job not found or has no files to persist
+        """
+        try:
+            response = await self._request(
+                "POST", f"/evaluations/{job_id}/persist", json=coordinate.model_dump()
+            )
+            return PersistResponse(**response.json())
+        except httpx.HTTPStatusError as e:
+            if e.response.status_code == 404:
+                raise ClientError(
+                    f"Job {job_id} not found or has no files to persist", cause=e
+                )
+            raise

evalhub/adapter/client/discovery.py ADDED Viewed

@@ -0,0 +1,275 @@
+"""Discovery service for finding and managing framework adapters."""
+import asyncio
+import logging
+from dataclasses import dataclass
+# typing imports removed - using PEP 604 union syntax
+from ...models.api import FrameworkInfo, HealthResponse
+from .adapter_client import AdapterClient
+logger = logging.getLogger(__name__)
+@dataclass
+class AdapterEndpoint:
+    """Information about a discovered adapter endpoint."""
+    url: str
+    framework_id: str
+    name: str
+    version: str
+    status: str  # "healthy", "unhealthy", "unreachable"
+    last_checked: float | None = None
+    framework_info: FrameworkInfo | None = None
+    health_info: HealthResponse | None = None
+class AdapterDiscovery:
+    """Service for discovering and managing framework adapter endpoints.
+    This helps EvalHub automatically discover available framework adapters
+    and route requests to the appropriate adapter.
+    """
+    def __init__(self) -> None:
+        """Initialize the discovery service."""
+        self._adapters: dict[str, AdapterEndpoint] = {}
+        self._check_interval = 30.0  # Health check interval in seconds
+        self._running = False
+        self._health_check_task: asyncio.Task | None = None
+    def register_adapter(self, url: str, framework_id: str | None = None) -> None:
+        """Manually register a framework adapter.
+        Args:
+            url: The adapter's base URL
+            framework_id: Optional framework ID (will be discovered if not provided)
+        """
+        adapter = AdapterEndpoint(
+            url=url,
+            framework_id=framework_id or f"unknown_{len(self._adapters)}",
+            name="Unknown",
+            version="unknown",
+            status="unknown",
+        )
+        self._adapters[url] = adapter
+        logger.info(f"Registered adapter: {url}")
+    def unregister_adapter(self, url: str) -> bool:
+        """Unregister a framework adapter.
+        Args:
+            url: The adapter's base URL
+        Returns:
+            bool: True if adapter was unregistered
+        """
+        if url in self._adapters:
+            del self._adapters[url]
+            logger.info(f"Unregistered adapter: {url}")
+            return True
+        return False
+    async def discover_adapter(self, url: str) -> AdapterEndpoint | None:
+        """Discover information about an adapter at the given URL.
+        Args:
+            url: The adapter's base URL
+        Returns:
+            AdapterEndpoint: Adapter information, or None if unreachable
+        """
+        try:
+            async with AdapterClient(url, timeout=10.0) as client:
+                # Get framework info
+                framework_info = await client.get_framework_info()
+                # Get health status
+                health_info = await client.health_check()
+                adapter = AdapterEndpoint(
+                    url=url,
+                    framework_id=framework_info.framework_id,
+                    name=framework_info.name,
+                    version=framework_info.version,
+                    status=health_info.status,
+                    last_checked=asyncio.get_event_loop().time(),
+                    framework_info=framework_info,
+                    health_info=health_info,
+                )
+                logger.info(
+                    f"Discovered adapter: {framework_info.name} "
+                    f"({framework_info.framework_id}) at {url}"
+                )
+                return adapter
+        except Exception as e:
+            logger.warning(f"Failed to discover adapter at {url}: {e}")
+            return None
+    async def check_adapter_health(self, adapter: AdapterEndpoint) -> AdapterEndpoint:
+        """Check the health of a specific adapter.
+        Args:
+            adapter: The adapter to check
+        Returns:
+            AdapterEndpoint: Updated adapter information
+        """
+        try:
+            async with AdapterClient(adapter.url, timeout=5.0) as client:
+                health_info = await client.health_check()
+                adapter.status = health_info.status
+                adapter.health_info = health_info
+                adapter.last_checked = asyncio.get_event_loop().time()
+                logger.debug(
+                    f"Health check: {adapter.framework_id} is {adapter.status}"
+                )
+        except Exception as e:
+            adapter.status = "unreachable"
+            adapter.last_checked = asyncio.get_event_loop().time()
+            logger.warning(f"Health check failed for {adapter.framework_id}: {e}")
+        return adapter
+    async def refresh_all_adapters(self) -> None:
+        """Refresh information for all registered adapters."""
+        if not self._adapters:
+            logger.debug("No adapters registered for health check")
+            return
+        logger.debug(f"Checking health of {len(self._adapters)} adapters")
+        # Check all adapters concurrently
+        tasks = []
+        for adapter in self._adapters.values():
+            task = asyncio.create_task(self.check_adapter_health(adapter))
+            tasks.append(task)
+        # Wait for all health checks to complete
+        await asyncio.gather(*tasks, return_exceptions=True)
+        # Log summary
+        healthy_count = len(
+            [a for a in self._adapters.values() if a.status == "healthy"]
+        )
+        logger.info(
+            f"Health check complete: {healthy_count}/{len(self._adapters)} adapters healthy"
+        )
+    async def auto_discover_from_config(self, config: dict[str, str]) -> None:
+        """Auto-discover adapters from configuration.
+        Args:
+            config: Dictionary mapping framework_id to URL
+        """
+        for framework_id, url in config.items():
+            logger.info(f"Discovering adapter for {framework_id} at {url}")
+            adapter = await self.discover_adapter(url)
+            if adapter:
+                self._adapters[url] = adapter
+            else:
+                # Still register even if discovery fails
+                self.register_adapter(url, framework_id)
+    def get_adapters(
+        self, status: str | None = None, framework_id: str | None = None
+    ) -> list[AdapterEndpoint]:
+        """Get list of registered adapters.
+        Args:
+            status: Filter by status ("healthy", "unhealthy", "unreachable")
+            framework_id: Filter by framework ID
+        Returns:
+            List[AdapterEndpoint]: Matching adapters
+        """
+        adapters = list(self._adapters.values())
+        if status:
+            adapters = [a for a in adapters if a.status == status]
+        if framework_id:
+            adapters = [a for a in adapters if a.framework_id == framework_id]
+        return adapters
+    def get_adapter_for_framework(self, framework_id: str) -> AdapterEndpoint | None:
+        """Get a healthy adapter for a specific framework.
+        Args:
+            framework_id: The framework identifier
+        Returns:
+            AdapterEndpoint: Healthy adapter, or None if not available
+        """
+        for adapter in self._adapters.values():
+            if adapter.framework_id == framework_id and adapter.status == "healthy":
+                return adapter
+        return None
+    def get_healthy_adapters(self) -> list[AdapterEndpoint]:
+        """Get all healthy adapters.
+        Returns:
+            List[AdapterEndpoint]: All healthy adapters
+        """
+        return [a for a in self._adapters.values() if a.status == "healthy"]
+    async def start_health_monitoring(self, interval: float | None = None) -> None:
+        """Start continuous health monitoring of adapters.
+        Args:
+            interval: Health check interval in seconds
+        """
+        if self._running:
+            logger.warning("Health monitoring is already running")
+            return
+        if interval:
+            self._check_interval = interval
+        self._running = True
+        self._health_check_task = asyncio.create_task(self._health_monitor_loop())
+        logger.info(f"Started health monitoring (interval: {self._check_interval}s)")
+    async def stop_health_monitoring(self) -> None:
+        """Stop health monitoring."""
+        self._running = False
+        if self._health_check_task:
+            self._health_check_task.cancel()
+            try:
+                await self._health_check_task
+            except asyncio.CancelledError:
+                pass
+            self._health_check_task = None
+        logger.info("Stopped health monitoring")
+    async def _health_monitor_loop(self) -> None:
+        """Background loop for health monitoring."""
+        while self._running:
+            try:
+                await self.refresh_all_adapters()
+                await asyncio.sleep(self._check_interval)
+            except asyncio.CancelledError:
+                break
+            except Exception as e:
+                logger.exception(f"Error in health monitoring loop: {e}")
+                await asyncio.sleep(5.0)  # Short delay before retrying
+    async def shutdown(self) -> None:
+        """Shutdown the discovery service."""
+        await self.stop_health_monitoring()
+        self._adapters.clear()
+        logger.info("Discovery service shut down")

evalhub/adapter/models/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""Adapter-specific models and base classes."""
+from .framework import AdapterConfig, AdapterMetadata, FrameworkAdapter
+__all__ = [
+    "AdapterConfig",
+    "AdapterMetadata",
+    "FrameworkAdapter",
+]