eval-hub-sdk 0.1.0a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,404 @@
1
+ """Framework adapter models and base classes."""
2
+
3
+ import asyncio
4
+ import logging
5
+ from abc import ABC, abstractmethod
6
+ from collections.abc import AsyncGenerator
7
+ from typing import Any
8
+
9
+ from pydantic import BaseModel, Field
10
+
11
+ from ...models.api import (
12
+ BenchmarkInfo,
13
+ EvaluationJob,
14
+ EvaluationJobFilesLocation,
15
+ EvaluationRequest,
16
+ EvaluationResponse,
17
+ FrameworkInfo,
18
+ HealthResponse,
19
+ JobStatus,
20
+ OCICoordinate,
21
+ PersistResponse,
22
+ )
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ class AdapterConfig(BaseModel):
28
+ """Base configuration for framework adapters."""
29
+
30
+ # Adapter identification
31
+ framework_id: str = Field(..., description="Unique framework identifier")
32
+ adapter_name: str = Field(..., description="Adapter display name")
33
+ version: str = Field(default="1.0.0", description="Adapter version")
34
+
35
+ # Server configuration
36
+ host: str = Field(default="0.0.0.0", description="Server host to bind to")
37
+ port: int = Field(default=8000, description="Server port")
38
+ workers: int = Field(default=1, description="Number of worker processes")
39
+
40
+ # Framework-specific settings
41
+ framework_config: dict[str, Any] = Field(
42
+ default_factory=dict, description="Framework-specific configuration"
43
+ )
44
+
45
+ # Resource limits
46
+ max_concurrent_jobs: int = Field(
47
+ default=10, description="Maximum concurrent evaluation jobs"
48
+ )
49
+ job_timeout_seconds: int = Field(
50
+ default=3600, description="Maximum job execution time"
51
+ )
52
+ memory_limit_gb: float | None = Field(
53
+ default=None, description="Memory limit in GB"
54
+ )
55
+
56
+ # Logging and monitoring
57
+ log_level: str = Field(default="INFO", description="Logging level")
58
+ enable_metrics: bool = Field(default=True, description="Enable metrics collection")
59
+ health_check_interval: int = Field(
60
+ default=30, description="Health check interval in seconds"
61
+ )
62
+
63
+ class Config:
64
+ """Pydantic configuration."""
65
+
66
+ extra = "allow"
67
+
68
+
69
+ class AdapterMetadata(BaseModel):
70
+ """Metadata for framework adapters."""
71
+
72
+ # Basic information
73
+ name: str = Field(..., description="Adapter name")
74
+ description: str | None = Field(None, description="Adapter description")
75
+ version: str = Field(..., description="Adapter version")
76
+ author: str | None = Field(None, description="Adapter author")
77
+
78
+ # Framework information
79
+ framework_name: str = Field(..., description="Name of the wrapped framework")
80
+ framework_version: str = Field(..., description="Version of the wrapped framework")
81
+ framework_url: str | None = Field(
82
+ None, description="Framework documentation/repository URL"
83
+ )
84
+
85
+ # Capabilities
86
+ supported_model_types: list[str] = Field(
87
+ default_factory=list, description="Supported model types"
88
+ )
89
+ supported_metrics: list[str] = Field(
90
+ default_factory=list, description="Supported evaluation metrics"
91
+ )
92
+ supports_batch_evaluation: bool = Field(
93
+ True, description="Supports batch evaluation"
94
+ )
95
+ supports_few_shot: bool = Field(True, description="Supports few-shot evaluation")
96
+ supports_custom_datasets: bool = Field(
97
+ False, description="Supports custom datasets"
98
+ )
99
+
100
+ # Resource requirements
101
+ min_memory_gb: float | None = Field(None, description="Minimum memory requirement")
102
+ requires_gpu: bool = Field(False, description="Requires GPU")
103
+ max_batch_size: int | None = Field(None, description="Maximum batch size")
104
+
105
+ # Contact and documentation
106
+ contact_email: str | None = Field(None, description="Contact email")
107
+ documentation_url: str | None = Field(None, description="Documentation URL")
108
+ repository_url: str | None = Field(None, description="Source repository URL")
109
+ license: str | None = Field(None, description="License information")
110
+
111
+
112
+ class FrameworkAdapter(ABC):
113
+ """Abstract base class for framework adapters.
114
+
115
+ This class defines the interface that all framework adapters must implement
116
+ to integrate with EvalHub via the SDK.
117
+ """
118
+
119
+ def __init__(self, config: AdapterConfig):
120
+ """Initialize the adapter with configuration."""
121
+ self.config = config
122
+ self._jobs: dict[str, EvaluationJob] = {}
123
+ self._shutdown_event = asyncio.Event()
124
+
125
+ @abstractmethod
126
+ async def initialize(self) -> None:
127
+ """Initialize the framework adapter.
128
+
129
+ This method should:
130
+ - Load the underlying evaluation framework
131
+ - Verify dependencies are available
132
+ - Set up any required resources
133
+ - Prepare for evaluation requests
134
+
135
+ Raises:
136
+ Exception: If initialization fails
137
+ """
138
+ pass
139
+
140
+ @abstractmethod
141
+ async def get_framework_info(self) -> FrameworkInfo:
142
+ """Get information about this framework adapter.
143
+
144
+ Returns:
145
+ FrameworkInfo: Metadata about the framework and its capabilities
146
+ """
147
+ pass
148
+
149
+ @abstractmethod
150
+ async def list_benchmarks(self) -> list[BenchmarkInfo]:
151
+ """List all available benchmarks.
152
+
153
+ Returns:
154
+ List[BenchmarkInfo]: Available benchmarks with their metadata
155
+ """
156
+ pass
157
+
158
+ @abstractmethod
159
+ async def get_benchmark_info(self, benchmark_id: str) -> BenchmarkInfo | None:
160
+ """Get detailed information about a specific benchmark.
161
+
162
+ Args:
163
+ benchmark_id: The benchmark identifier
164
+
165
+ Returns:
166
+ BenchmarkInfo: Benchmark information, or None if not found
167
+ """
168
+ pass
169
+
170
+ @abstractmethod
171
+ async def submit_evaluation(self, request: EvaluationRequest) -> EvaluationJob:
172
+ """Submit an evaluation job.
173
+
174
+ Args:
175
+ request: The evaluation request
176
+
177
+ Returns:
178
+ EvaluationJob: The created job with initial status
179
+
180
+ Raises:
181
+ ValueError: If request is invalid
182
+ RuntimeError: If unable to submit job
183
+ """
184
+ pass
185
+
186
+ @abstractmethod
187
+ async def get_job_status(self, job_id: str) -> EvaluationJob | None:
188
+ """Get the current status of an evaluation job.
189
+
190
+ Args:
191
+ job_id: The job identifier
192
+
193
+ Returns:
194
+ EvaluationJob: Current job status, or None if not found
195
+ """
196
+ pass
197
+
198
+ @abstractmethod
199
+ async def get_evaluation_results(self, job_id: str) -> EvaluationResponse | None:
200
+ """Get the results of a completed evaluation.
201
+
202
+ Args:
203
+ job_id: The job identifier
204
+
205
+ Returns:
206
+ EvaluationResponse: Evaluation results, or None if not available
207
+ """
208
+ pass
209
+
210
+ @abstractmethod
211
+ async def cancel_job(self, job_id: str) -> bool:
212
+ """Cancel a running evaluation job.
213
+
214
+ Args:
215
+ job_id: The job identifier
216
+
217
+ Returns:
218
+ bool: True if job was cancelled, False if not found or already completed
219
+ """
220
+ pass
221
+
222
+ @abstractmethod
223
+ async def health_check(self) -> HealthResponse:
224
+ """Perform a health check of the framework adapter.
225
+
226
+ Returns:
227
+ HealthResponse: Current health status and resource information
228
+ """
229
+ pass
230
+
231
+ @abstractmethod
232
+ async def shutdown(self) -> None:
233
+ """Gracefully shutdown the framework adapter.
234
+
235
+ This method should:
236
+ - Cancel any running jobs
237
+ - Clean up resources
238
+ - Save any necessary state
239
+ """
240
+ pass
241
+
242
+ # Optional methods with default implementations
243
+
244
+ async def stream_job_updates(
245
+ self, job_id: str
246
+ ) -> AsyncGenerator[EvaluationJob, None]:
247
+ """Stream real-time updates for a job.
248
+
249
+ Default implementation polls get_job_status. Framework adapters
250
+ can override this to provide true streaming updates.
251
+
252
+ Args:
253
+ job_id: The job identifier
254
+
255
+ Yields:
256
+ EvaluationJob: Updated job status
257
+ """
258
+ while not self._shutdown_event.is_set():
259
+ job = await self.get_job_status(job_id)
260
+ if not job:
261
+ break
262
+
263
+ yield job
264
+
265
+ if job.status in [
266
+ JobStatus.COMPLETED,
267
+ JobStatus.FAILED,
268
+ JobStatus.CANCELLED,
269
+ ]:
270
+ break
271
+
272
+ await asyncio.sleep(1.0) # Poll every second
273
+
274
+ async def list_active_jobs(self) -> list[EvaluationJob]:
275
+ """List all active evaluation jobs.
276
+
277
+ Returns:
278
+ List[EvaluationJob]: List of active jobs
279
+ """
280
+ active_jobs = []
281
+ for job in self._jobs.values():
282
+ if job.status in [JobStatus.PENDING, JobStatus.RUNNING]:
283
+ active_jobs.append(job)
284
+ return active_jobs
285
+
286
+ async def cleanup_completed_jobs(self, max_age_seconds: int = 3600) -> int:
287
+ """Clean up old completed jobs.
288
+
289
+ Args:
290
+ max_age_seconds: Maximum age for completed jobs
291
+
292
+ Returns:
293
+ int: Number of jobs cleaned up
294
+ """
295
+ from datetime import datetime, timezone
296
+
297
+ current_time = datetime.now(timezone.utc)
298
+ cleaned_count = 0
299
+
300
+ jobs_to_remove = []
301
+ for job_id, job in self._jobs.items():
302
+ if job.status in [
303
+ JobStatus.COMPLETED,
304
+ JobStatus.FAILED,
305
+ JobStatus.CANCELLED,
306
+ ]:
307
+ if job.completed_at:
308
+ age_seconds = (current_time - job.completed_at).total_seconds()
309
+ if age_seconds > max_age_seconds:
310
+ jobs_to_remove.append(job_id)
311
+ cleaned_count += 1
312
+
313
+ for job_id in jobs_to_remove:
314
+ del self._jobs[job_id]
315
+
316
+ return cleaned_count
317
+
318
+ def get_adapter_metadata(self) -> AdapterMetadata:
319
+ """Get metadata about this adapter.
320
+
321
+ Subclasses should override this to provide specific metadata.
322
+
323
+ Returns:
324
+ AdapterMetadata: Adapter metadata
325
+ """
326
+ return AdapterMetadata(
327
+ name=self.config.adapter_name,
328
+ description=f"Framework adapter for {self.config.framework_id}",
329
+ version=self.config.version,
330
+ framework_name=self.config.framework_id,
331
+ framework_version="unknown",
332
+ author=None,
333
+ framework_url=None,
334
+ supports_batch_evaluation=True,
335
+ supports_few_shot=True,
336
+ supports_custom_datasets=False,
337
+ min_memory_gb=None,
338
+ requires_gpu=False,
339
+ max_batch_size=None,
340
+ contact_email=None,
341
+ documentation_url=None,
342
+ repository_url=None,
343
+ license=None,
344
+ )
345
+
346
+ async def job_files(self, job_id: str) -> EvaluationJobFilesLocation:
347
+ """Default implementation returns empty files location.
348
+
349
+ User should override to specify files location when opting-in for OCI persistence.
350
+
351
+ Args:
352
+ job_id: The job identifier
353
+
354
+ Returns:
355
+ EvaluationJobFilesLocation: Files location (path=None by default)
356
+ """
357
+ return EvaluationJobFilesLocation(job_id=job_id, path=None)
358
+
359
+ async def persist_job_files_oci(
360
+ self, job_id: str, coordinate: OCICoordinate
361
+ ) -> PersistResponse | None:
362
+ """Persist evaluation job files as OCI artifact (manual trigger).
363
+
364
+ Args:
365
+ job_id: The job identifier
366
+ coordinate: OCI coordinates (reference and optional subject)
367
+
368
+ Returns:
369
+ PersistResponse: Persistence status and artifact digest
370
+ None: If no files to persist
371
+
372
+ Raises:
373
+ ValueError: If job not found, OCI reference invalid, or job not in completed state
374
+ RuntimeError: If persistence fails
375
+ """
376
+ # Validate job exists
377
+ job = self._jobs.get(job_id)
378
+ if not job:
379
+ raise ValueError(f"Job {job_id} not found")
380
+
381
+ # Validate job is completed
382
+ if job.status != JobStatus.COMPLETED:
383
+ raise ValueError(
384
+ f"Job {job_id} is not completed (status: {job.status}). "
385
+ "Only completed jobs can be persisted."
386
+ )
387
+
388
+ # Get files location
389
+ files_location = await self.job_files(job_id)
390
+
391
+ if not files_location.path:
392
+ logger.info(f"No files to persist for job {job_id}")
393
+ return None
394
+
395
+ # Create OCI artifact
396
+ from evalhub.adapter.oci.persister import OCIArtifactPersister
397
+
398
+ persister = OCIArtifactPersister()
399
+
400
+ return await persister.persist(
401
+ files_location=files_location,
402
+ coordinate=coordinate,
403
+ job=job,
404
+ )
@@ -0,0 +1,5 @@
1
+ """OCI artifact persistence for evaluation job files."""
2
+
3
+ from .persister import OCIArtifactPersister
4
+
5
+ __all__ = ["OCIArtifactPersister"]
@@ -0,0 +1,76 @@
1
+ """OCI artifact persistence for evaluation job files (placeholder implementation)."""
2
+
3
+ import logging
4
+ from pathlib import Path
5
+
6
+ from evalhub.models.api import (
7
+ EvaluationJob,
8
+ EvaluationJobFilesLocation,
9
+ OCICoordinate,
10
+ PersistResponse,
11
+ )
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class OCIArtifactPersister:
17
+ """Handles OCI artifact creation (no-op placeholder for now).
18
+
19
+ Future implementation will integrate dependencies as needed for actual OCI artifact pushing.
20
+ """
21
+
22
+ async def persist(
23
+ self,
24
+ files_location: EvaluationJobFilesLocation,
25
+ coordinate: OCICoordinate,
26
+ job: EvaluationJob,
27
+ ) -> PersistResponse:
28
+ """Create and push OCI artifact with job files (no-op placeholder).
29
+
30
+ Currently returns a mock PersistResponse without actually persisting.
31
+ Future implementation will:
32
+ 1. Validate source paths exist
33
+ 2. Create temporary tarball with files
34
+ 3. Generate OCI manifest (with subject if provided)
35
+ 4. Push artifact using integrated dependencies
36
+ 5. Return persistence response with digest
37
+
38
+ Args:
39
+ files_location: Files to persist
40
+ coordinate: OCI coordinates (reference and optional subject)
41
+ job: The evaluation job
42
+
43
+ Returns:
44
+ PersistResponse: Mock response with placeholder values
45
+ """
46
+ subject_info = (
47
+ f" with subject '{coordinate.oci_subject}'"
48
+ if coordinate.oci_subject
49
+ else ""
50
+ )
51
+ logger.warning(
52
+ f"OCI persister is a no-op placeholder. "
53
+ f"Would persist files from {files_location.path} to {coordinate.oci_ref}{subject_info}"
54
+ )
55
+
56
+ # Calculate number of files
57
+ files_count = 0
58
+ if files_location.path is not None:
59
+ source = Path(files_location.path)
60
+ if source.exists():
61
+ if source.is_file():
62
+ files_count = 1
63
+ elif source.is_dir():
64
+ files_count = sum(1 for f in source.rglob("*") if f.is_file())
65
+
66
+ # Return mock response
67
+ return PersistResponse(
68
+ job_id=job.job_id,
69
+ oci_ref=f"{coordinate.oci_ref}@sha256:{'0' * 64}", # Placeholder digest
70
+ digest=f"sha256:{'0' * 64}",
71
+ files_count=files_count,
72
+ metadata={
73
+ "placeholder": True,
74
+ "message": "OCI persistence not yet implemented",
75
+ },
76
+ )
@@ -0,0 +1,5 @@
1
+ """Server components for running framework adapters."""
2
+
3
+ from .app import AdapterServer, create_adapter_app, run_adapter_server
4
+
5
+ __all__ = ["AdapterServer", "create_adapter_app", "run_adapter_server"]
@@ -0,0 +1,157 @@
1
+ """Server application for running framework adapters."""
2
+
3
+ import logging
4
+ import signal
5
+ import sys
6
+ from typing import Any
7
+
8
+ # typing imports removed - using PEP 604 union syntax
9
+ import uvicorn
10
+ from fastapi import FastAPI
11
+
12
+ from ...utils import setup_logging
13
+ from ..api.router import AdapterAPIRouter
14
+ from ..models.framework import AdapterConfig, FrameworkAdapter
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class AdapterServer:
20
+ """Server for running framework adapters with the standard SDK API."""
21
+
22
+ def __init__(self, adapter: FrameworkAdapter):
23
+ """Initialize the server with a framework adapter.
24
+
25
+ Args:
26
+ adapter: The framework adapter to run
27
+ """
28
+ self.adapter = adapter
29
+ self.router = AdapterAPIRouter(adapter)
30
+ self.app = self.router.get_app()
31
+ setup_logging(level=self.adapter.config.log_level, stream=sys.stdout)
32
+
33
+ def run(
34
+ self,
35
+ host: str | None = None,
36
+ port: int | None = None,
37
+ workers: int | None = None,
38
+ reload: bool = False,
39
+ **kwargs: Any,
40
+ ) -> None:
41
+ """Run the adapter server.
42
+
43
+ Args:
44
+ host: Host to bind to (overrides config)
45
+ port: Port to bind to (overrides config)
46
+ workers: Number of workers (overrides config)
47
+ reload: Enable auto-reload for development
48
+ **kwargs: Additional arguments passed to uvicorn.run
49
+ """
50
+ config = self.adapter.config
51
+
52
+ # Use provided values or fall back to config
53
+ run_host = host or config.host
54
+ run_port = port or config.port
55
+ run_workers = workers or config.workers
56
+
57
+ logger.info(
58
+ f"Starting {config.adapter_name} server on {run_host}:{run_port} "
59
+ f"with {run_workers} worker(s)"
60
+ )
61
+
62
+ # Set up signal handlers for graceful shutdown
63
+ self._setup_signal_handlers()
64
+
65
+ try:
66
+ uvicorn.run(
67
+ self.app,
68
+ host=run_host,
69
+ port=run_port,
70
+ workers=run_workers if not reload else 1, # Single worker for reload
71
+ reload=reload,
72
+ log_level=config.log_level.lower(),
73
+ access_log=True,
74
+ **kwargs,
75
+ )
76
+ except KeyboardInterrupt:
77
+ logger.info("Server stopped by user")
78
+ except Exception as e:
79
+ logger.exception(f"Server error: {e}")
80
+ sys.exit(1)
81
+
82
+ def _setup_signal_handlers(self) -> None:
83
+ """Set up signal handlers for graceful shutdown."""
84
+
85
+ def signal_handler(signum: int, frame: Any) -> None:
86
+ logger.info(f"Received signal {signum}, initiating shutdown...")
87
+ # The adapter shutdown will be handled by FastAPI's shutdown event
88
+ sys.exit(0)
89
+
90
+ signal.signal(signal.SIGINT, signal_handler)
91
+ signal.signal(signal.SIGTERM, signal_handler)
92
+
93
+ async def run_async(
94
+ self, host: str | None = None, port: int | None = None, **kwargs: Any
95
+ ) -> None:
96
+ """Run the server asynchronously.
97
+
98
+ Useful for embedding the server in other applications.
99
+
100
+ Args:
101
+ host: Host to bind to
102
+ port: Port to bind to
103
+ **kwargs: Additional uvicorn config options
104
+ """
105
+ config = self.adapter.config
106
+
107
+ uvicorn_config = uvicorn.Config(
108
+ self.app,
109
+ host=host or config.host,
110
+ port=port or config.port,
111
+ log_level=config.log_level.lower(),
112
+ **kwargs,
113
+ )
114
+
115
+ server = uvicorn.Server(uvicorn_config)
116
+
117
+ try:
118
+ await server.serve()
119
+ except KeyboardInterrupt:
120
+ logger.info("Server stopped")
121
+ except Exception as e:
122
+ logger.exception(f"Server error: {e}")
123
+ raise
124
+
125
+
126
+ def create_adapter_app(adapter: FrameworkAdapter) -> FastAPI:
127
+ """Create a FastAPI application for the given adapter.
128
+
129
+ This function creates a FastAPI app configured with the adapter's API router.
130
+ Useful for testing and embedding the adapter in other applications.
131
+
132
+ Args:
133
+ adapter: The framework adapter instance
134
+
135
+ Returns:
136
+ FastAPI application instance
137
+ """
138
+ router = AdapterAPIRouter(adapter)
139
+ return router.get_app()
140
+
141
+
142
+ def run_adapter_server(
143
+ adapter_class: type[FrameworkAdapter], config: AdapterConfig, **server_kwargs: Any
144
+ ) -> None:
145
+ """Convenience function to create and run an adapter server.
146
+
147
+ Args:
148
+ adapter_class: The FrameworkAdapter class to instantiate
149
+ config: Configuration for the adapter
150
+ **server_kwargs: Additional arguments for the server
151
+ """
152
+ # Create adapter instance
153
+ adapter = adapter_class(config)
154
+
155
+ # Create and run server
156
+ server = AdapterServer(adapter)
157
+ server.run(**server_kwargs)