PyPI - eval-hub-sdk - Versions diffs - 0.1.0a0__py3-none-any.whl - Mend

eval-hub-sdk 0.1.0a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

eval_hub_sdk-0.1.0a0.dist-info/METADATA +711 -0
eval_hub_sdk-0.1.0a0.dist-info/RECORD +27 -0
eval_hub_sdk-0.1.0a0.dist-info/WHEEL +5 -0
eval_hub_sdk-0.1.0a0.dist-info/entry_points.txt +2 -0
eval_hub_sdk-0.1.0a0.dist-info/licenses/LICENSE +201 -0
eval_hub_sdk-0.1.0a0.dist-info/top_level.txt +1 -0
evalhub/__init__.py +84 -0
evalhub/adapter/__init__.py +28 -0
evalhub/adapter/api/__init__.py +6 -0
evalhub/adapter/api/endpoints.py +342 -0
evalhub/adapter/api/router.py +135 -0
evalhub/adapter/cli.py +331 -0
evalhub/adapter/client/__init__.py +6 -0
evalhub/adapter/client/adapter_client.py +418 -0
evalhub/adapter/client/discovery.py +275 -0
evalhub/adapter/models/__init__.py +9 -0
evalhub/adapter/models/framework.py +404 -0
evalhub/adapter/oci/__init__.py +5 -0
evalhub/adapter/oci/persister.py +76 -0
evalhub/adapter/server/__init__.py +5 -0
evalhub/adapter/server/app.py +157 -0
evalhub/cli.py +331 -0
evalhub/models/__init__.py +32 -0
evalhub/models/api.py +388 -0
evalhub/py.typed +0 -0
evalhub/utils/__init__.py +5 -0
evalhub/utils/logging.py +41 -0

evalhub/adapter/models/framework.py ADDED Viewed

@@ -0,0 +1,404 @@
+"""Framework adapter models and base classes."""
+import asyncio
+import logging
+from abc import ABC, abstractmethod
+from collections.abc import AsyncGenerator
+from typing import Any
+from pydantic import BaseModel, Field
+from ...models.api import (
+    BenchmarkInfo,
+    EvaluationJob,
+    EvaluationJobFilesLocation,
+    EvaluationRequest,
+    EvaluationResponse,
+    FrameworkInfo,
+    HealthResponse,
+    JobStatus,
+    OCICoordinate,
+    PersistResponse,
+)
+logger = logging.getLogger(__name__)
+class AdapterConfig(BaseModel):
+    """Base configuration for framework adapters."""
+    # Adapter identification
+    framework_id: str = Field(..., description="Unique framework identifier")
+    adapter_name: str = Field(..., description="Adapter display name")
+    version: str = Field(default="1.0.0", description="Adapter version")
+    # Server configuration
+    host: str = Field(default="0.0.0.0", description="Server host to bind to")
+    port: int = Field(default=8000, description="Server port")
+    workers: int = Field(default=1, description="Number of worker processes")
+    # Framework-specific settings
+    framework_config: dict[str, Any] = Field(
+        default_factory=dict, description="Framework-specific configuration"
+    )
+    # Resource limits
+    max_concurrent_jobs: int = Field(
+        default=10, description="Maximum concurrent evaluation jobs"
+    )
+    job_timeout_seconds: int = Field(
+        default=3600, description="Maximum job execution time"
+    )
+    memory_limit_gb: float | None = Field(
+        default=None, description="Memory limit in GB"
+    )
+    # Logging and monitoring
+    log_level: str = Field(default="INFO", description="Logging level")
+    enable_metrics: bool = Field(default=True, description="Enable metrics collection")
+    health_check_interval: int = Field(
+        default=30, description="Health check interval in seconds"
+    )
+    class Config:
+        """Pydantic configuration."""
+        extra = "allow"
+class AdapterMetadata(BaseModel):
+    """Metadata for framework adapters."""
+    # Basic information
+    name: str = Field(..., description="Adapter name")
+    description: str | None = Field(None, description="Adapter description")
+    version: str = Field(..., description="Adapter version")
+    author: str | None = Field(None, description="Adapter author")
+    # Framework information
+    framework_name: str = Field(..., description="Name of the wrapped framework")
+    framework_version: str = Field(..., description="Version of the wrapped framework")
+    framework_url: str | None = Field(
+        None, description="Framework documentation/repository URL"
+    )
+    # Capabilities
+    supported_model_types: list[str] = Field(
+        default_factory=list, description="Supported model types"
+    )
+    supported_metrics: list[str] = Field(
+        default_factory=list, description="Supported evaluation metrics"
+    )
+    supports_batch_evaluation: bool = Field(
+        True, description="Supports batch evaluation"
+    )
+    supports_few_shot: bool = Field(True, description="Supports few-shot evaluation")
+    supports_custom_datasets: bool = Field(
+        False, description="Supports custom datasets"
+    )
+    # Resource requirements
+    min_memory_gb: float | None = Field(None, description="Minimum memory requirement")
+    requires_gpu: bool = Field(False, description="Requires GPU")
+    max_batch_size: int | None = Field(None, description="Maximum batch size")
+    # Contact and documentation
+    contact_email: str | None = Field(None, description="Contact email")
+    documentation_url: str | None = Field(None, description="Documentation URL")
+    repository_url: str | None = Field(None, description="Source repository URL")
+    license: str | None = Field(None, description="License information")
+class FrameworkAdapter(ABC):
+    """Abstract base class for framework adapters.
+    This class defines the interface that all framework adapters must implement
+    to integrate with EvalHub via the SDK.
+    """
+    def __init__(self, config: AdapterConfig):
+        """Initialize the adapter with configuration."""
+        self.config = config
+        self._jobs: dict[str, EvaluationJob] = {}
+        self._shutdown_event = asyncio.Event()
+    @abstractmethod
+    async def initialize(self) -> None:
+        """Initialize the framework adapter.
+        This method should:
+        - Load the underlying evaluation framework
+        - Verify dependencies are available
+        - Set up any required resources
+        - Prepare for evaluation requests
+        Raises:
+            Exception: If initialization fails
+        """
+        pass
+    @abstractmethod
+    async def get_framework_info(self) -> FrameworkInfo:
+        """Get information about this framework adapter.
+        Returns:
+            FrameworkInfo: Metadata about the framework and its capabilities
+        """
+        pass
+    @abstractmethod
+    async def list_benchmarks(self) -> list[BenchmarkInfo]:
+        """List all available benchmarks.
+        Returns:
+            List[BenchmarkInfo]: Available benchmarks with their metadata
+        """
+        pass
+    @abstractmethod
+    async def get_benchmark_info(self, benchmark_id: str) -> BenchmarkInfo | None:
+        """Get detailed information about a specific benchmark.
+        Args:
+            benchmark_id: The benchmark identifier
+        Returns:
+            BenchmarkInfo: Benchmark information, or None if not found
+        """
+        pass
+    @abstractmethod
+    async def submit_evaluation(self, request: EvaluationRequest) -> EvaluationJob:
+        """Submit an evaluation job.
+        Args:
+            request: The evaluation request
+        Returns:
+            EvaluationJob: The created job with initial status
+        Raises:
+            ValueError: If request is invalid
+            RuntimeError: If unable to submit job
+        """
+        pass
+    @abstractmethod
+    async def get_job_status(self, job_id: str) -> EvaluationJob | None:
+        """Get the current status of an evaluation job.
+        Args:
+            job_id: The job identifier
+        Returns:
+            EvaluationJob: Current job status, or None if not found
+        """
+        pass
+    @abstractmethod
+    async def get_evaluation_results(self, job_id: str) -> EvaluationResponse | None:
+        """Get the results of a completed evaluation.
+        Args:
+            job_id: The job identifier
+        Returns:
+            EvaluationResponse: Evaluation results, or None if not available
+        """
+        pass
+    @abstractmethod
+    async def cancel_job(self, job_id: str) -> bool:
+        """Cancel a running evaluation job.
+        Args:
+            job_id: The job identifier
+        Returns:
+            bool: True if job was cancelled, False if not found or already completed
+        """
+        pass
+    @abstractmethod
+    async def health_check(self) -> HealthResponse:
+        """Perform a health check of the framework adapter.
+        Returns:
+            HealthResponse: Current health status and resource information
+        """
+        pass
+    @abstractmethod
+    async def shutdown(self) -> None:
+        """Gracefully shutdown the framework adapter.
+        This method should:
+        - Cancel any running jobs
+        - Clean up resources
+        - Save any necessary state
+        """
+        pass
+    # Optional methods with default implementations
+    async def stream_job_updates(
+        self, job_id: str
+    ) -> AsyncGenerator[EvaluationJob, None]:
+        """Stream real-time updates for a job.
+        Default implementation polls get_job_status. Framework adapters
+        can override this to provide true streaming updates.
+        Args:
+            job_id: The job identifier
+        Yields:
+            EvaluationJob: Updated job status
+        """
+        while not self._shutdown_event.is_set():
+            job = await self.get_job_status(job_id)
+            if not job:
+                break
+            yield job
+            if job.status in [
+                JobStatus.COMPLETED,
+                JobStatus.FAILED,
+                JobStatus.CANCELLED,
+            ]:
+                break
+            await asyncio.sleep(1.0)  # Poll every second
+    async def list_active_jobs(self) -> list[EvaluationJob]:
+        """List all active evaluation jobs.
+        Returns:
+            List[EvaluationJob]: List of active jobs
+        """
+        active_jobs = []
+        for job in self._jobs.values():
+            if job.status in [JobStatus.PENDING, JobStatus.RUNNING]:
+                active_jobs.append(job)
+        return active_jobs
+    async def cleanup_completed_jobs(self, max_age_seconds: int = 3600) -> int:
+        """Clean up old completed jobs.
+        Args:
+            max_age_seconds: Maximum age for completed jobs
+        Returns:
+            int: Number of jobs cleaned up
+        """
+        from datetime import datetime, timezone
+        current_time = datetime.now(timezone.utc)
+        cleaned_count = 0
+        jobs_to_remove = []
+        for job_id, job in self._jobs.items():
+            if job.status in [
+                JobStatus.COMPLETED,
+                JobStatus.FAILED,
+                JobStatus.CANCELLED,
+            ]:
+                if job.completed_at:
+                    age_seconds = (current_time - job.completed_at).total_seconds()
+                    if age_seconds > max_age_seconds:
+                        jobs_to_remove.append(job_id)
+                        cleaned_count += 1
+        for job_id in jobs_to_remove:
+            del self._jobs[job_id]
+        return cleaned_count
+    def get_adapter_metadata(self) -> AdapterMetadata:
+        """Get metadata about this adapter.
+        Subclasses should override this to provide specific metadata.
+        Returns:
+            AdapterMetadata: Adapter metadata
+        """
+        return AdapterMetadata(
+            name=self.config.adapter_name,
+            description=f"Framework adapter for {self.config.framework_id}",
+            version=self.config.version,
+            framework_name=self.config.framework_id,
+            framework_version="unknown",
+            author=None,
+            framework_url=None,
+            supports_batch_evaluation=True,
+            supports_few_shot=True,
+            supports_custom_datasets=False,
+            min_memory_gb=None,
+            requires_gpu=False,
+            max_batch_size=None,
+            contact_email=None,
+            documentation_url=None,
+            repository_url=None,
+            license=None,
+        )
+    async def job_files(self, job_id: str) -> EvaluationJobFilesLocation:
+        """Default implementation returns empty files location.
+        User should override to specify files location when opting-in for OCI persistence.
+        Args:
+            job_id: The job identifier
+        Returns:
+            EvaluationJobFilesLocation: Files location (path=None by default)
+        """
+        return EvaluationJobFilesLocation(job_id=job_id, path=None)
+    async def persist_job_files_oci(
+        self, job_id: str, coordinate: OCICoordinate
+    ) -> PersistResponse | None:
+        """Persist evaluation job files as OCI artifact (manual trigger).
+        Args:
+            job_id: The job identifier
+            coordinate: OCI coordinates (reference and optional subject)
+        Returns:
+            PersistResponse: Persistence status and artifact digest
+            None: If no files to persist
+        Raises:
+            ValueError: If job not found, OCI reference invalid, or job not in completed state
+            RuntimeError: If persistence fails
+        """
+        # Validate job exists
+        job = self._jobs.get(job_id)
+        if not job:
+            raise ValueError(f"Job {job_id} not found")
+        # Validate job is completed
+        if job.status != JobStatus.COMPLETED:
+            raise ValueError(
+                f"Job {job_id} is not completed (status: {job.status}). "
+                "Only completed jobs can be persisted."
+            )
+        # Get files location
+        files_location = await self.job_files(job_id)
+        if not files_location.path:
+            logger.info(f"No files to persist for job {job_id}")
+            return None
+        # Create OCI artifact
+        from evalhub.adapter.oci.persister import OCIArtifactPersister
+        persister = OCIArtifactPersister()
+        return await persister.persist(
+            files_location=files_location,
+            coordinate=coordinate,
+            job=job,
+        )

evalhub/adapter/oci/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""OCI artifact persistence for evaluation job files."""
+from .persister import OCIArtifactPersister
+__all__ = ["OCIArtifactPersister"]

evalhub/adapter/oci/persister.py ADDED Viewed

@@ -0,0 +1,76 @@
+"""OCI artifact persistence for evaluation job files (placeholder implementation)."""
+import logging
+from pathlib import Path
+from evalhub.models.api import (
+    EvaluationJob,
+    EvaluationJobFilesLocation,
+    OCICoordinate,
+    PersistResponse,
+)
+logger = logging.getLogger(__name__)
+class OCIArtifactPersister:
+    """Handles OCI artifact creation (no-op placeholder for now).
+    Future implementation will integrate dependencies as needed for actual OCI artifact pushing.
+    """
+    async def persist(
+        self,
+        files_location: EvaluationJobFilesLocation,
+        coordinate: OCICoordinate,
+        job: EvaluationJob,
+    ) -> PersistResponse:
+        """Create and push OCI artifact with job files (no-op placeholder).
+        Currently returns a mock PersistResponse without actually persisting.
+        Future implementation will:
+        1. Validate source paths exist
+        2. Create temporary tarball with files
+        3. Generate OCI manifest (with subject if provided)
+        4. Push artifact using integrated dependencies
+        5. Return persistence response with digest
+        Args:
+            files_location: Files to persist
+            coordinate: OCI coordinates (reference and optional subject)
+            job: The evaluation job
+        Returns:
+            PersistResponse: Mock response with placeholder values
+        """
+        subject_info = (
+            f" with subject '{coordinate.oci_subject}'"
+            if coordinate.oci_subject
+            else ""
+        )
+        logger.warning(
+            f"OCI persister is a no-op placeholder. "
+            f"Would persist files from {files_location.path} to {coordinate.oci_ref}{subject_info}"
+        )
+        # Calculate number of files
+        files_count = 0
+        if files_location.path is not None:
+            source = Path(files_location.path)
+            if source.exists():
+                if source.is_file():
+                    files_count = 1
+                elif source.is_dir():
+                    files_count = sum(1 for f in source.rglob("*") if f.is_file())
+        # Return mock response
+        return PersistResponse(
+            job_id=job.job_id,
+            oci_ref=f"{coordinate.oci_ref}@sha256:{'0' * 64}",  # Placeholder digest
+            digest=f"sha256:{'0' * 64}",
+            files_count=files_count,
+            metadata={
+                "placeholder": True,
+                "message": "OCI persistence not yet implemented",
+            },
+        )

evalhub/adapter/server/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Server components for running framework adapters."""
+from .app import AdapterServer, create_adapter_app, run_adapter_server
+__all__ = ["AdapterServer", "create_adapter_app", "run_adapter_server"]

evalhub/adapter/server/app.py ADDED Viewed

@@ -0,0 +1,157 @@
+"""Server application for running framework adapters."""
+import logging
+import signal
+import sys
+from typing import Any
+# typing imports removed - using PEP 604 union syntax
+import uvicorn
+from fastapi import FastAPI
+from ...utils import setup_logging
+from ..api.router import AdapterAPIRouter
+from ..models.framework import AdapterConfig, FrameworkAdapter
+logger = logging.getLogger(__name__)
+class AdapterServer:
+    """Server for running framework adapters with the standard SDK API."""
+    def __init__(self, adapter: FrameworkAdapter):
+        """Initialize the server with a framework adapter.
+        Args:
+            adapter: The framework adapter to run
+        """
+        self.adapter = adapter
+        self.router = AdapterAPIRouter(adapter)
+        self.app = self.router.get_app()
+        setup_logging(level=self.adapter.config.log_level, stream=sys.stdout)
+    def run(
+        self,
+        host: str | None = None,
+        port: int | None = None,
+        workers: int | None = None,
+        reload: bool = False,
+        **kwargs: Any,
+    ) -> None:
+        """Run the adapter server.
+        Args:
+            host: Host to bind to (overrides config)
+            port: Port to bind to (overrides config)
+            workers: Number of workers (overrides config)
+            reload: Enable auto-reload for development
+            **kwargs: Additional arguments passed to uvicorn.run
+        """
+        config = self.adapter.config
+        # Use provided values or fall back to config
+        run_host = host or config.host
+        run_port = port or config.port
+        run_workers = workers or config.workers
+        logger.info(
+            f"Starting {config.adapter_name} server on {run_host}:{run_port} "
+            f"with {run_workers} worker(s)"
+        )
+        # Set up signal handlers for graceful shutdown
+        self._setup_signal_handlers()
+        try:
+            uvicorn.run(
+                self.app,
+                host=run_host,
+                port=run_port,
+                workers=run_workers if not reload else 1,  # Single worker for reload
+                reload=reload,
+                log_level=config.log_level.lower(),
+                access_log=True,
+                **kwargs,
+            )
+        except KeyboardInterrupt:
+            logger.info("Server stopped by user")
+        except Exception as e:
+            logger.exception(f"Server error: {e}")
+            sys.exit(1)
+    def _setup_signal_handlers(self) -> None:
+        """Set up signal handlers for graceful shutdown."""
+        def signal_handler(signum: int, frame: Any) -> None:
+            logger.info(f"Received signal {signum}, initiating shutdown...")
+            # The adapter shutdown will be handled by FastAPI's shutdown event
+            sys.exit(0)
+        signal.signal(signal.SIGINT, signal_handler)
+        signal.signal(signal.SIGTERM, signal_handler)
+    async def run_async(
+        self, host: str | None = None, port: int | None = None, **kwargs: Any
+    ) -> None:
+        """Run the server asynchronously.
+        Useful for embedding the server in other applications.
+        Args:
+            host: Host to bind to
+            port: Port to bind to
+            **kwargs: Additional uvicorn config options
+        """
+        config = self.adapter.config
+        uvicorn_config = uvicorn.Config(
+            self.app,
+            host=host or config.host,
+            port=port or config.port,
+            log_level=config.log_level.lower(),
+            **kwargs,
+        )
+        server = uvicorn.Server(uvicorn_config)
+        try:
+            await server.serve()
+        except KeyboardInterrupt:
+            logger.info("Server stopped")
+        except Exception as e:
+            logger.exception(f"Server error: {e}")
+            raise
+def create_adapter_app(adapter: FrameworkAdapter) -> FastAPI:
+    """Create a FastAPI application for the given adapter.
+    This function creates a FastAPI app configured with the adapter's API router.
+    Useful for testing and embedding the adapter in other applications.
+    Args:
+        adapter: The framework adapter instance
+    Returns:
+        FastAPI application instance
+    """
+    router = AdapterAPIRouter(adapter)
+    return router.get_app()
+def run_adapter_server(
+    adapter_class: type[FrameworkAdapter], config: AdapterConfig, **server_kwargs: Any
+) -> None:
+    """Convenience function to create and run an adapter server.
+    Args:
+        adapter_class: The FrameworkAdapter class to instantiate
+        config: Configuration for the adapter
+        **server_kwargs: Additional arguments for the server
+    """
+    # Create adapter instance
+    adapter = adapter_class(config)
+    # Create and run server
+    server = AdapterServer(adapter)
+    server.run(**server_kwargs)