eval-hub-sdk 0.1.0a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,418 @@
1
+ """Client for communicating with framework adapters via the standard SDK API."""
2
+
3
+ import logging
4
+ from collections.abc import AsyncGenerator
5
+ from typing import Any
6
+
7
+ # typing imports removed - using PEP 604 union syntax
8
+ import httpx
9
+
10
+ from ...models.api import (
11
+ BenchmarkInfo,
12
+ EvaluationJob,
13
+ EvaluationRequest,
14
+ EvaluationResponse,
15
+ FrameworkInfo,
16
+ HealthResponse,
17
+ JobStatus,
18
+ OCICoordinate,
19
+ PersistResponse,
20
+ )
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class ClientError(Exception):
26
+ """Base exception for client errors."""
27
+
28
+ def __init__(self, message: str, cause: Exception | None = None) -> None:
29
+ super().__init__(message)
30
+ self.cause = cause
31
+
32
+
33
+ class AdapterClient:
34
+ """Client for communicating with framework adapters.
35
+
36
+ This client provides a standardized way for EvalHub to communicate
37
+ with any framework adapter that implements the SDK API.
38
+ """
39
+
40
+ def __init__(self, base_url: str, timeout: float = 30.0, max_retries: int = 3):
41
+ """Initialize the adapter client.
42
+
43
+ Args:
44
+ base_url: Base URL of the framework adapter (e.g., "http://adapter:8080")
45
+ timeout: Request timeout in seconds
46
+ max_retries: Maximum number of retry attempts
47
+ """
48
+ self.base_url = base_url.rstrip("/")
49
+ self.api_base = f"{self.base_url}/api/v1"
50
+
51
+ self._client = httpx.AsyncClient(
52
+ timeout=httpx.Timeout(timeout),
53
+ limits=httpx.Limits(max_connections=20, max_keepalive_connections=5),
54
+ )
55
+
56
+ self.max_retries = max_retries
57
+
58
+ async def close(self) -> None:
59
+ """Close the HTTP client."""
60
+ await self._client.aclose()
61
+
62
+ async def __aenter__(self) -> "AdapterClient":
63
+ """Async context manager entry."""
64
+ return self
65
+
66
+ async def __aexit__(
67
+ self,
68
+ exc_type: type[BaseException] | None,
69
+ exc_val: BaseException | None,
70
+ exc_tb: Any,
71
+ ) -> None:
72
+ """Async context manager exit."""
73
+ await self.close()
74
+
75
+ async def _request(self, method: str, path: str, **kwargs: Any) -> httpx.Response:
76
+ """Make HTTP request with retry logic.
77
+
78
+ Args:
79
+ method: HTTP method
80
+ path: API path (without base URL)
81
+ **kwargs: Additional arguments for httpx
82
+
83
+ Returns:
84
+ httpx.Response: Response object
85
+
86
+ Raises:
87
+ httpx.HTTPError: If request fails after retries
88
+ """
89
+ url = f"{self.api_base}{path}"
90
+
91
+ for attempt in range(self.max_retries + 1):
92
+ try:
93
+ response = await self._client.request(method, url, **kwargs)
94
+ response.raise_for_status()
95
+ return response
96
+
97
+ except httpx.TimeoutException:
98
+ if attempt == self.max_retries:
99
+ logger.error(
100
+ f"Request to {url} timed out after {self.max_retries} retries"
101
+ )
102
+ raise
103
+ logger.warning(
104
+ f"Request to {url} timed out, retrying ({attempt + 1}/{self.max_retries})"
105
+ )
106
+
107
+ except httpx.HTTPStatusError as e:
108
+ # Don't retry client errors (4xx), only server errors (5xx)
109
+ if e.response.status_code < 500 or attempt == self.max_retries:
110
+ raise
111
+ logger.warning(
112
+ f"Server error {e.response.status_code} for {url}, retrying ({attempt + 1}/{self.max_retries})"
113
+ )
114
+
115
+ except httpx.RequestError as e:
116
+ if attempt == self.max_retries:
117
+ logger.error(
118
+ f"Connection error to {url} after {self.max_retries} retries: {e}"
119
+ )
120
+ raise
121
+ logger.warning(
122
+ f"Connection error to {url}, retrying ({attempt + 1}/{self.max_retries}): {e}"
123
+ )
124
+
125
+ # This should never be reached, but mypy needs a return
126
+ raise RuntimeError("Request retry loop completed without returning")
127
+
128
+ # Health and Info endpoints
129
+
130
+ async def health_check(self) -> HealthResponse:
131
+ """Check the health of the framework adapter.
132
+
133
+ Returns:
134
+ HealthResponse: Current health status
135
+
136
+ Raises:
137
+ httpx.HTTPError: If health check fails
138
+ """
139
+ response = await self._request("GET", "/health")
140
+ return HealthResponse(**response.json())
141
+
142
+ async def get_framework_info(self) -> FrameworkInfo:
143
+ """Get information about the framework adapter.
144
+
145
+ Returns:
146
+ FrameworkInfo: Framework capabilities and metadata
147
+
148
+ Raises:
149
+ httpx.HTTPError: If request fails
150
+ """
151
+ response = await self._request("GET", "/info")
152
+ return FrameworkInfo(**response.json())
153
+
154
+ # Benchmark endpoints
155
+
156
+ async def list_benchmarks(self) -> list[BenchmarkInfo]:
157
+ """List all available benchmarks.
158
+
159
+ Returns:
160
+ List[BenchmarkInfo]: Available benchmarks
161
+
162
+ Raises:
163
+ httpx.HTTPError: If request fails
164
+ """
165
+ response = await self._request("GET", "/benchmarks")
166
+ return [BenchmarkInfo(**benchmark) for benchmark in response.json()]
167
+
168
+ async def get_benchmark_info(self, benchmark_id: str) -> BenchmarkInfo:
169
+ """Get detailed information about a specific benchmark.
170
+
171
+ Args:
172
+ benchmark_id: The benchmark identifier
173
+
174
+ Returns:
175
+ BenchmarkInfo: Benchmark information
176
+
177
+ Raises:
178
+ httpx.HTTPError: If benchmark not found or request fails
179
+ """
180
+ response = await self._request("GET", f"/benchmarks/{benchmark_id}")
181
+ return BenchmarkInfo(**response.json())
182
+
183
+ # Evaluation endpoints
184
+
185
+ async def submit_evaluation(self, request: EvaluationRequest) -> EvaluationJob:
186
+ """Submit an evaluation job.
187
+
188
+ Args:
189
+ request: The evaluation request
190
+
191
+ Returns:
192
+ EvaluationJob: The submitted job
193
+
194
+ Raises:
195
+ httpx.HTTPError: If request fails or is invalid
196
+ """
197
+ response = await self._request(
198
+ "POST", "/evaluations", json=request.model_dump()
199
+ )
200
+ return EvaluationJob(**response.json())
201
+
202
+ async def get_job_status(self, job_id: str) -> EvaluationJob:
203
+ """Get the status of an evaluation job.
204
+
205
+ Args:
206
+ job_id: The job identifier
207
+
208
+ Returns:
209
+ EvaluationJob: Current job status
210
+
211
+ Raises:
212
+ httpx.HTTPError: If job not found or request fails
213
+ """
214
+ response = await self._request("GET", f"/evaluations/{job_id}")
215
+ return EvaluationJob(**response.json())
216
+
217
+ async def get_evaluation_results(self, job_id: str) -> EvaluationResponse:
218
+ """Get the results of a completed evaluation.
219
+
220
+ Args:
221
+ job_id: The job identifier
222
+
223
+ Returns:
224
+ EvaluationResponse: Evaluation results
225
+
226
+ Raises:
227
+ httpx.HTTPError: If results not available or request fails
228
+ """
229
+ response = await self._request("GET", f"/evaluations/{job_id}/results")
230
+ return EvaluationResponse(**response.json())
231
+
232
+ async def cancel_job(self, job_id: str) -> bool:
233
+ """Cancel an evaluation job.
234
+
235
+ Args:
236
+ job_id: The job identifier
237
+
238
+ Returns:
239
+ bool: True if job was cancelled
240
+
241
+ Raises:
242
+ httpx.HTTPError: If request fails
243
+ """
244
+ try:
245
+ await self._request("DELETE", f"/evaluations/{job_id}")
246
+ return True
247
+ except httpx.HTTPStatusError as e:
248
+ if e.response.status_code == 404:
249
+ return False # Job not found
250
+ elif e.response.status_code == 409:
251
+ return False # Job cannot be cancelled
252
+ else:
253
+ raise
254
+
255
+ async def list_jobs(
256
+ self, status: JobStatus | None = None, limit: int | None = None
257
+ ) -> list[EvaluationJob]:
258
+ """List evaluation jobs.
259
+
260
+ Args:
261
+ status: Filter by job status
262
+ limit: Maximum number of jobs to return
263
+
264
+ Returns:
265
+ List[EvaluationJob]: List of jobs
266
+
267
+ Raises:
268
+ httpx.HTTPError: If request fails
269
+ """
270
+ params = {}
271
+ if status:
272
+ params["status"] = status.value
273
+ if limit:
274
+ params["limit"] = str(limit)
275
+
276
+ response = await self._request("GET", "/evaluations", params=params)
277
+ return [EvaluationJob(**job) for job in response.json()]
278
+
279
+ async def stream_job_updates(
280
+ self, job_id: str
281
+ ) -> AsyncGenerator[EvaluationJob, None]:
282
+ """Stream real-time updates for an evaluation job.
283
+
284
+ Args:
285
+ job_id: The job identifier
286
+
287
+ Yields:
288
+ EvaluationJob: Updated job status
289
+
290
+ Raises:
291
+ httpx.HTTPError: If streaming fails
292
+ """
293
+ url = f"{self.api_base}/evaluations/{job_id}/stream"
294
+
295
+ try:
296
+ async with self._client.stream("GET", url) as response:
297
+ response.raise_for_status()
298
+
299
+ async for line in response.aiter_lines():
300
+ if line.startswith("data: "):
301
+ data = line[6:] # Remove "data: " prefix
302
+ if data.strip():
303
+ try:
304
+ import json
305
+
306
+ job_data = json.loads(data)
307
+ yield EvaluationJob(**job_data)
308
+ except Exception as e:
309
+ logger.warning(f"Failed to parse streaming data: {e}")
310
+
311
+ except httpx.HTTPError:
312
+ # Fall back to polling if streaming is not supported
313
+ logger.info(
314
+ f"Streaming not available for {job_id}, falling back to polling"
315
+ )
316
+ async for job_update in self._poll_job_updates(job_id):
317
+ yield job_update
318
+
319
+ async def _poll_job_updates(
320
+ self, job_id: str, interval: float = 2.0
321
+ ) -> AsyncGenerator[EvaluationJob, None]:
322
+ """Poll for job updates (fallback for streaming).
323
+
324
+ Args:
325
+ job_id: The job identifier
326
+ interval: Polling interval in seconds
327
+
328
+ Yields:
329
+ EvaluationJob: Updated job status
330
+ """
331
+ import asyncio
332
+
333
+ while True:
334
+ try:
335
+ job = await self.get_job_status(job_id)
336
+ yield job
337
+
338
+ if job.status in [
339
+ JobStatus.COMPLETED,
340
+ JobStatus.FAILED,
341
+ JobStatus.CANCELLED,
342
+ ]:
343
+ break
344
+
345
+ await asyncio.sleep(interval)
346
+
347
+ except httpx.HTTPStatusError as e:
348
+ if e.response.status_code == 404:
349
+ break # Job not found
350
+ else:
351
+ raise
352
+
353
+ async def wait_for_completion(
354
+ self, job_id: str, timeout: float | None = None, poll_interval: float = 5.0
355
+ ) -> EvaluationJob:
356
+ """Wait for an evaluation job to complete.
357
+
358
+ Args:
359
+ job_id: The job identifier
360
+ timeout: Maximum time to wait in seconds
361
+ poll_interval: Polling interval in seconds
362
+
363
+ Returns:
364
+ EvaluationJob: Final job status
365
+
366
+ Raises:
367
+ TimeoutError: If job doesn't complete within timeout
368
+ httpx.HTTPError: If request fails
369
+ """
370
+ import asyncio
371
+ import time
372
+
373
+ start_time = time.time()
374
+
375
+ while True:
376
+ job = await self.get_job_status(job_id)
377
+
378
+ if job.status in [
379
+ JobStatus.COMPLETED,
380
+ JobStatus.FAILED,
381
+ JobStatus.CANCELLED,
382
+ ]:
383
+ return job
384
+
385
+ if timeout and (time.time() - start_time) > timeout:
386
+ raise TimeoutError(
387
+ f"Job {job_id} did not complete within {timeout} seconds"
388
+ )
389
+
390
+ await asyncio.sleep(poll_interval)
391
+
392
+ async def persist_job_files(
393
+ self, job_id: str, coordinate: OCICoordinate
394
+ ) -> PersistResponse:
395
+ """Persist job files as OCI artifact.
396
+
397
+ Args:
398
+ job_id: The job identifier
399
+ coordinate: OCI coordinates (reference and optional subject)
400
+
401
+ Returns:
402
+ PersistResponse: Persistence status and artifact information
403
+
404
+ Raises:
405
+ httpx.HTTPError: If request fails
406
+ ClientError: If job not found or has no files to persist
407
+ """
408
+ try:
409
+ response = await self._request(
410
+ "POST", f"/evaluations/{job_id}/persist", json=coordinate.model_dump()
411
+ )
412
+ return PersistResponse(**response.json())
413
+ except httpx.HTTPStatusError as e:
414
+ if e.response.status_code == 404:
415
+ raise ClientError(
416
+ f"Job {job_id} not found or has no files to persist", cause=e
417
+ )
418
+ raise
@@ -0,0 +1,275 @@
1
+ """Discovery service for finding and managing framework adapters."""
2
+
3
+ import asyncio
4
+ import logging
5
+ from dataclasses import dataclass
6
+
7
+ # typing imports removed - using PEP 604 union syntax
8
+ from ...models.api import FrameworkInfo, HealthResponse
9
+ from .adapter_client import AdapterClient
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ @dataclass
15
+ class AdapterEndpoint:
16
+ """Information about a discovered adapter endpoint."""
17
+
18
+ url: str
19
+ framework_id: str
20
+ name: str
21
+ version: str
22
+ status: str # "healthy", "unhealthy", "unreachable"
23
+ last_checked: float | None = None
24
+ framework_info: FrameworkInfo | None = None
25
+ health_info: HealthResponse | None = None
26
+
27
+
28
+ class AdapterDiscovery:
29
+ """Service for discovering and managing framework adapter endpoints.
30
+
31
+ This helps EvalHub automatically discover available framework adapters
32
+ and route requests to the appropriate adapter.
33
+ """
34
+
35
+ def __init__(self) -> None:
36
+ """Initialize the discovery service."""
37
+ self._adapters: dict[str, AdapterEndpoint] = {}
38
+ self._check_interval = 30.0 # Health check interval in seconds
39
+ self._running = False
40
+ self._health_check_task: asyncio.Task | None = None
41
+
42
+ def register_adapter(self, url: str, framework_id: str | None = None) -> None:
43
+ """Manually register a framework adapter.
44
+
45
+ Args:
46
+ url: The adapter's base URL
47
+ framework_id: Optional framework ID (will be discovered if not provided)
48
+ """
49
+ adapter = AdapterEndpoint(
50
+ url=url,
51
+ framework_id=framework_id or f"unknown_{len(self._adapters)}",
52
+ name="Unknown",
53
+ version="unknown",
54
+ status="unknown",
55
+ )
56
+
57
+ self._adapters[url] = adapter
58
+ logger.info(f"Registered adapter: {url}")
59
+
60
+ def unregister_adapter(self, url: str) -> bool:
61
+ """Unregister a framework adapter.
62
+
63
+ Args:
64
+ url: The adapter's base URL
65
+
66
+ Returns:
67
+ bool: True if adapter was unregistered
68
+ """
69
+ if url in self._adapters:
70
+ del self._adapters[url]
71
+ logger.info(f"Unregistered adapter: {url}")
72
+ return True
73
+ return False
74
+
75
+ async def discover_adapter(self, url: str) -> AdapterEndpoint | None:
76
+ """Discover information about an adapter at the given URL.
77
+
78
+ Args:
79
+ url: The adapter's base URL
80
+
81
+ Returns:
82
+ AdapterEndpoint: Adapter information, or None if unreachable
83
+ """
84
+ try:
85
+ async with AdapterClient(url, timeout=10.0) as client:
86
+ # Get framework info
87
+ framework_info = await client.get_framework_info()
88
+
89
+ # Get health status
90
+ health_info = await client.health_check()
91
+
92
+ adapter = AdapterEndpoint(
93
+ url=url,
94
+ framework_id=framework_info.framework_id,
95
+ name=framework_info.name,
96
+ version=framework_info.version,
97
+ status=health_info.status,
98
+ last_checked=asyncio.get_event_loop().time(),
99
+ framework_info=framework_info,
100
+ health_info=health_info,
101
+ )
102
+
103
+ logger.info(
104
+ f"Discovered adapter: {framework_info.name} "
105
+ f"({framework_info.framework_id}) at {url}"
106
+ )
107
+
108
+ return adapter
109
+
110
+ except Exception as e:
111
+ logger.warning(f"Failed to discover adapter at {url}: {e}")
112
+ return None
113
+
114
+ async def check_adapter_health(self, adapter: AdapterEndpoint) -> AdapterEndpoint:
115
+ """Check the health of a specific adapter.
116
+
117
+ Args:
118
+ adapter: The adapter to check
119
+
120
+ Returns:
121
+ AdapterEndpoint: Updated adapter information
122
+ """
123
+ try:
124
+ async with AdapterClient(adapter.url, timeout=5.0) as client:
125
+ health_info = await client.health_check()
126
+
127
+ adapter.status = health_info.status
128
+ adapter.health_info = health_info
129
+ adapter.last_checked = asyncio.get_event_loop().time()
130
+
131
+ logger.debug(
132
+ f"Health check: {adapter.framework_id} is {adapter.status}"
133
+ )
134
+
135
+ except Exception as e:
136
+ adapter.status = "unreachable"
137
+ adapter.last_checked = asyncio.get_event_loop().time()
138
+ logger.warning(f"Health check failed for {adapter.framework_id}: {e}")
139
+
140
+ return adapter
141
+
142
+ async def refresh_all_adapters(self) -> None:
143
+ """Refresh information for all registered adapters."""
144
+ if not self._adapters:
145
+ logger.debug("No adapters registered for health check")
146
+ return
147
+
148
+ logger.debug(f"Checking health of {len(self._adapters)} adapters")
149
+
150
+ # Check all adapters concurrently
151
+ tasks = []
152
+ for adapter in self._adapters.values():
153
+ task = asyncio.create_task(self.check_adapter_health(adapter))
154
+ tasks.append(task)
155
+
156
+ # Wait for all health checks to complete
157
+ await asyncio.gather(*tasks, return_exceptions=True)
158
+
159
+ # Log summary
160
+ healthy_count = len(
161
+ [a for a in self._adapters.values() if a.status == "healthy"]
162
+ )
163
+ logger.info(
164
+ f"Health check complete: {healthy_count}/{len(self._adapters)} adapters healthy"
165
+ )
166
+
167
+ async def auto_discover_from_config(self, config: dict[str, str]) -> None:
168
+ """Auto-discover adapters from configuration.
169
+
170
+ Args:
171
+ config: Dictionary mapping framework_id to URL
172
+ """
173
+ for framework_id, url in config.items():
174
+ logger.info(f"Discovering adapter for {framework_id} at {url}")
175
+
176
+ adapter = await self.discover_adapter(url)
177
+ if adapter:
178
+ self._adapters[url] = adapter
179
+ else:
180
+ # Still register even if discovery fails
181
+ self.register_adapter(url, framework_id)
182
+
183
+ def get_adapters(
184
+ self, status: str | None = None, framework_id: str | None = None
185
+ ) -> list[AdapterEndpoint]:
186
+ """Get list of registered adapters.
187
+
188
+ Args:
189
+ status: Filter by status ("healthy", "unhealthy", "unreachable")
190
+ framework_id: Filter by framework ID
191
+
192
+ Returns:
193
+ List[AdapterEndpoint]: Matching adapters
194
+ """
195
+ adapters = list(self._adapters.values())
196
+
197
+ if status:
198
+ adapters = [a for a in adapters if a.status == status]
199
+
200
+ if framework_id:
201
+ adapters = [a for a in adapters if a.framework_id == framework_id]
202
+
203
+ return adapters
204
+
205
+ def get_adapter_for_framework(self, framework_id: str) -> AdapterEndpoint | None:
206
+ """Get a healthy adapter for a specific framework.
207
+
208
+ Args:
209
+ framework_id: The framework identifier
210
+
211
+ Returns:
212
+ AdapterEndpoint: Healthy adapter, or None if not available
213
+ """
214
+ for adapter in self._adapters.values():
215
+ if adapter.framework_id == framework_id and adapter.status == "healthy":
216
+ return adapter
217
+
218
+ return None
219
+
220
+ def get_healthy_adapters(self) -> list[AdapterEndpoint]:
221
+ """Get all healthy adapters.
222
+
223
+ Returns:
224
+ List[AdapterEndpoint]: All healthy adapters
225
+ """
226
+ return [a for a in self._adapters.values() if a.status == "healthy"]
227
+
228
+ async def start_health_monitoring(self, interval: float | None = None) -> None:
229
+ """Start continuous health monitoring of adapters.
230
+
231
+ Args:
232
+ interval: Health check interval in seconds
233
+ """
234
+ if self._running:
235
+ logger.warning("Health monitoring is already running")
236
+ return
237
+
238
+ if interval:
239
+ self._check_interval = interval
240
+
241
+ self._running = True
242
+ self._health_check_task = asyncio.create_task(self._health_monitor_loop())
243
+ logger.info(f"Started health monitoring (interval: {self._check_interval}s)")
244
+
245
+ async def stop_health_monitoring(self) -> None:
246
+ """Stop health monitoring."""
247
+ self._running = False
248
+
249
+ if self._health_check_task:
250
+ self._health_check_task.cancel()
251
+ try:
252
+ await self._health_check_task
253
+ except asyncio.CancelledError:
254
+ pass
255
+ self._health_check_task = None
256
+
257
+ logger.info("Stopped health monitoring")
258
+
259
+ async def _health_monitor_loop(self) -> None:
260
+ """Background loop for health monitoring."""
261
+ while self._running:
262
+ try:
263
+ await self.refresh_all_adapters()
264
+ await asyncio.sleep(self._check_interval)
265
+ except asyncio.CancelledError:
266
+ break
267
+ except Exception as e:
268
+ logger.exception(f"Error in health monitoring loop: {e}")
269
+ await asyncio.sleep(5.0) # Short delay before retrying
270
+
271
+ async def shutdown(self) -> None:
272
+ """Shutdown the discovery service."""
273
+ await self.stop_health_monitoring()
274
+ self._adapters.clear()
275
+ logger.info("Discovery service shut down")
@@ -0,0 +1,9 @@
1
+ """Adapter-specific models and base classes."""
2
+
3
+ from .framework import AdapterConfig, AdapterMetadata, FrameworkAdapter
4
+
5
+ __all__ = [
6
+ "AdapterConfig",
7
+ "AdapterMetadata",
8
+ "FrameworkAdapter",
9
+ ]