isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +35 -80
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/types.py +1 -0
  26. isa_model/deployment/__init__.py +5 -48
  27. isa_model/deployment/core/__init__.py +2 -31
  28. isa_model/deployment/core/deployment_manager.py +1278 -370
  29. isa_model/deployment/modal/__init__.py +8 -0
  30. isa_model/deployment/modal/config.py +136 -0
  31. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  32. isa_model/deployment/modal/services/__init__.py +3 -0
  33. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  34. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  35. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  36. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  37. isa_model/deployment/modal/services/video/__init__.py +1 -0
  38. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  39. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  40. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  41. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  42. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  43. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  44. isa_model/deployment/storage/__init__.py +5 -0
  45. isa_model/deployment/storage/deployment_repository.py +824 -0
  46. isa_model/deployment/triton/__init__.py +10 -0
  47. isa_model/deployment/triton/config.py +196 -0
  48. isa_model/deployment/triton/configs/__init__.py +1 -0
  49. isa_model/deployment/triton/provider.py +512 -0
  50. isa_model/deployment/triton/scripts/__init__.py +1 -0
  51. isa_model/deployment/triton/templates/__init__.py +1 -0
  52. isa_model/inference/__init__.py +47 -1
  53. isa_model/inference/ai_factory.py +137 -10
  54. isa_model/inference/legacy_services/__init__.py +21 -0
  55. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  56. isa_model/inference/legacy_services/model_service.py +573 -0
  57. isa_model/inference/legacy_services/model_serving.py +717 -0
  58. isa_model/inference/legacy_services/model_training.py +561 -0
  59. isa_model/inference/models/__init__.py +21 -0
  60. isa_model/inference/models/inference_config.py +551 -0
  61. isa_model/inference/models/inference_record.py +675 -0
  62. isa_model/inference/models/performance_models.py +714 -0
  63. isa_model/inference/repositories/__init__.py +9 -0
  64. isa_model/inference/repositories/inference_repository.py +828 -0
  65. isa_model/inference/services/audio/base_stt_service.py +184 -11
  66. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  67. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  68. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  69. isa_model/inference/services/llm/__init__.py +10 -2
  70. isa_model/inference/services/llm/base_llm_service.py +335 -24
  71. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  72. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  73. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  74. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  75. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  76. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  77. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  78. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  79. isa_model/inference/services/vision/__init__.py +22 -1
  80. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  81. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  82. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  83. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  84. isa_model/serving/api/cache_manager.py +245 -0
  85. isa_model/serving/api/dependencies/__init__.py +1 -0
  86. isa_model/serving/api/dependencies/auth.py +194 -0
  87. isa_model/serving/api/dependencies/database.py +139 -0
  88. isa_model/serving/api/error_handlers.py +284 -0
  89. isa_model/serving/api/fastapi_server.py +172 -22
  90. isa_model/serving/api/middleware/auth.py +8 -2
  91. isa_model/serving/api/middleware/security.py +23 -33
  92. isa_model/serving/api/middleware/tenant_context.py +414 -0
  93. isa_model/serving/api/routes/analytics.py +4 -1
  94. isa_model/serving/api/routes/config.py +645 -0
  95. isa_model/serving/api/routes/deployment_billing.py +315 -0
  96. isa_model/serving/api/routes/deployments.py +138 -2
  97. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  98. isa_model/serving/api/routes/health.py +32 -12
  99. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  100. isa_model/serving/api/routes/local_deployments.py +448 -0
  101. isa_model/serving/api/routes/tenants.py +575 -0
  102. isa_model/serving/api/routes/unified.py +680 -18
  103. isa_model/serving/api/routes/webhooks.py +479 -0
  104. isa_model/serving/api/startup.py +68 -54
  105. isa_model/utils/gpu_utils.py +311 -0
  106. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
  107. isa_model-0.4.4.dist-info/RECORD +180 -0
  108. isa_model/core/security/secrets.py +0 -358
  109. isa_model/core/storage/hf_storage.py +0 -419
  110. isa_model/core/storage/minio_storage.py +0 -0
  111. isa_model/deployment/cloud/__init__.py +0 -9
  112. isa_model/deployment/cloud/modal/__init__.py +0 -10
  113. isa_model/deployment/core/deployment_config.py +0 -356
  114. isa_model/deployment/core/isa_deployment_service.py +0 -401
  115. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  116. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  117. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  118. isa_model/deployment/runtime/deployed_service.py +0 -338
  119. isa_model/deployment/services/__init__.py +0 -9
  120. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  121. isa_model/deployment/services/model_service.py +0 -332
  122. isa_model/deployment/services/service_monitor.py +0 -356
  123. isa_model/deployment/services/service_registry.py +0 -527
  124. isa_model/eval/__init__.py +0 -92
  125. isa_model/eval/benchmarks/__init__.py +0 -27
  126. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  127. isa_model/eval/benchmarks.py +0 -701
  128. isa_model/eval/config/__init__.py +0 -10
  129. isa_model/eval/config/evaluation_config.py +0 -108
  130. isa_model/eval/evaluators/__init__.py +0 -24
  131. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  132. isa_model/eval/evaluators/base_evaluator.py +0 -503
  133. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  134. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  135. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  136. isa_model/eval/example_evaluation.py +0 -395
  137. isa_model/eval/factory.py +0 -798
  138. isa_model/eval/infrastructure/__init__.py +0 -24
  139. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  140. isa_model/eval/isa_benchmarks.py +0 -700
  141. isa_model/eval/isa_integration.py +0 -582
  142. isa_model/eval/metrics.py +0 -951
  143. isa_model/eval/tests/unit/test_basic.py +0 -396
  144. isa_model/serving/api/routes/evaluations.py +0 -579
  145. isa_model/training/__init__.py +0 -168
  146. isa_model/training/annotation/annotation_schema.py +0 -47
  147. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  148. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  149. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  150. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  151. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  152. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  153. isa_model/training/annotation/views/annotation_controller.py +0 -158
  154. isa_model/training/cloud/__init__.py +0 -22
  155. isa_model/training/cloud/job_orchestrator.py +0 -402
  156. isa_model/training/cloud/runpod_trainer.py +0 -454
  157. isa_model/training/cloud/storage_manager.py +0 -482
  158. isa_model/training/core/__init__.py +0 -26
  159. isa_model/training/core/config.py +0 -181
  160. isa_model/training/core/dataset.py +0 -222
  161. isa_model/training/core/trainer.py +0 -720
  162. isa_model/training/core/utils.py +0 -213
  163. isa_model/training/examples/intelligent_training_example.py +0 -281
  164. isa_model/training/factory.py +0 -424
  165. isa_model/training/intelligent/__init__.py +0 -25
  166. isa_model/training/intelligent/decision_engine.py +0 -643
  167. isa_model/training/intelligent/intelligent_factory.py +0 -888
  168. isa_model/training/intelligent/knowledge_base.py +0 -751
  169. isa_model/training/intelligent/resource_optimizer.py +0 -839
  170. isa_model/training/intelligent/task_classifier.py +0 -576
  171. isa_model/training/storage/__init__.py +0 -24
  172. isa_model/training/storage/core_integration.py +0 -439
  173. isa_model/training/storage/training_repository.py +0 -552
  174. isa_model/training/storage/training_storage.py +0 -628
  175. isa_model-0.4.0.dist-info/RECORD +0 -182
  176. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  177. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  178. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  179. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  180. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  181. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  182. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  183. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  184. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  185. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  186. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  187. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  188. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
  189. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
@@ -1,579 +0,0 @@
1
- """
2
- Evaluation API Routes
3
-
4
- Provides comprehensive evaluation capabilities for AI models including
5
- benchmark testing, performance analysis, and comparison metrics.
6
- """
7
-
8
- from fastapi import APIRouter, Query, HTTPException, Depends, BackgroundTasks
9
- from fastapi.responses import StreamingResponse
10
- from pydantic import BaseModel, Field
11
- from typing import Optional, List, Dict, Any, Union
12
- import logging
13
- from datetime import datetime, timedelta
14
- import asyncpg
15
- import asyncio
16
- import json
17
- import os
18
- import uuid
19
- from enum import Enum
20
-
21
- try:
22
- from ..middleware.auth import require_read_access, require_write_access
23
- except ImportError:
24
- # For development/testing when auth is not required
25
- def require_read_access():
26
- return {"user_id": "test_user"}
27
-
28
- def require_write_access():
29
- return {"user_id": "test_user"}
30
-
31
- logger = logging.getLogger(__name__)
32
-
33
- router = APIRouter()
34
-
35
- # Database connection configuration
36
- DATABASE_URL = os.getenv("DATABASE_URL", "postgresql://postgres:postgres@127.0.0.1:54322/postgres?options=-c%20search_path%3Ddev")
37
-
38
- # Enums
39
- class TaskStatus(str, Enum):
40
- PENDING = "pending"
41
- RUNNING = "running"
42
- COMPLETED = "completed"
43
- FAILED = "failed"
44
- CANCELLED = "cancelled"
45
-
46
- class EvaluationPriority(int, Enum):
47
- LOW = 1
48
- MEDIUM = 5
49
- HIGH = 10
50
-
51
- # Request Models
52
- class EvaluationRequest(BaseModel):
53
- name: str = Field(..., description="评估任务名称", min_length=1, max_length=255)
54
- models: List[str] = Field(..., description="待评估模型列表", min_items=1)
55
- benchmark: str = Field(..., description="基准测试名称")
56
- dataset: Optional[str] = Field(None, description="数据集名称")
57
- config: Optional[Dict[str, Any]] = Field(None, description="评估配置参数")
58
- priority: EvaluationPriority = Field(EvaluationPriority.MEDIUM, description="任务优先级")
59
- timeout_minutes: Optional[int] = Field(60, description="超时时间(分钟)", ge=5, le=1440)
60
-
61
- class BatchEvaluationRequest(BaseModel):
62
- name_prefix: str = Field(..., description="批量任务名称前缀")
63
- models: List[str] = Field(..., description="待评估模型列表", min_items=1)
64
- benchmarks: List[str] = Field(..., description="基准测试列表", min_items=1)
65
- config: Optional[Dict[str, Any]] = Field(None, description="通用评估配置")
66
- priority: EvaluationPriority = Field(EvaluationPriority.MEDIUM, description="任务优先级")
67
-
68
- # Response Models
69
- class EvaluationResponse(BaseModel):
70
- success: bool
71
- task_id: str
72
- status: TaskStatus
73
- message: Optional[str] = None
74
- estimated_time_minutes: Optional[int] = None
75
-
76
- class EvaluationStatusResponse(BaseModel):
77
- task_id: str
78
- name: str
79
- status: TaskStatus
80
- models: List[str]
81
- benchmark: str
82
- progress: float = Field(0.0, description="完成进度 (0.0-1.0)")
83
- current_model: Optional[str] = None
84
- created_at: datetime
85
- started_at: Optional[datetime] = None
86
- completed_at: Optional[datetime] = None
87
- estimated_completion: Optional[datetime] = None
88
- error_message: Optional[str] = None
89
-
90
- class ModelResult(BaseModel):
91
- model_name: str
92
- metrics: Dict[str, float]
93
- raw_results: Optional[List[Any]] = None
94
- execution_time_seconds: float
95
- status: str
96
-
97
- class EvaluationResult(BaseModel):
98
- task_id: str
99
- name: str
100
- status: TaskStatus
101
- models: List[ModelResult]
102
- benchmark: str
103
- dataset: Optional[str] = None
104
- summary: Dict[str, Any]
105
- config: Optional[Dict[str, Any]] = None
106
- created_at: datetime
107
- started_at: Optional[datetime] = None
108
- completed_at: Optional[datetime] = None
109
- total_execution_time_seconds: Optional[float] = None
110
-
111
- class BenchmarkInfo(BaseModel):
112
- name: str
113
- description: str
114
- category: str
115
- metrics: List[str]
116
- config_schema: Optional[Dict[str, Any]] = None
117
-
118
- # Database connection helper
119
- async def get_db_connection():
120
- """Get database connection"""
121
- try:
122
- return await asyncpg.connect(DATABASE_URL)
123
- except Exception as e:
124
- logger.error(f"Database connection failed: {e}")
125
- raise HTTPException(status_code=500, detail="Database connection failed")
126
-
127
- # Task Management Functions
128
- async def create_task_record(task_id: str, request: EvaluationRequest) -> None:
129
- """Create evaluation task record in database"""
130
- conn = await get_db_connection()
131
- try:
132
- await conn.execute("""
133
- INSERT INTO evaluations (id, name, status, models, benchmark, dataset, config, priority, timeout_minutes, created_at)
134
- VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
135
- """, task_id, request.name, TaskStatus.PENDING.value, request.models,
136
- request.benchmark, request.dataset, json.dumps(request.config) if request.config else None,
137
- request.priority, request.timeout_minutes, datetime.utcnow())
138
- finally:
139
- await conn.close()
140
-
141
- async def update_task_status(task_id: str, status: TaskStatus,
142
- progress: Optional[float] = None,
143
- current_model: Optional[str] = None,
144
- error_message: Optional[str] = None) -> None:
145
- """Update evaluation task status"""
146
- conn = await get_db_connection()
147
- try:
148
- updates = ["status = $2"]
149
- params = [task_id, status.value]
150
- param_count = 2
151
-
152
- if progress is not None:
153
- param_count += 1
154
- updates.append(f"progress = ${param_count}")
155
- params.append(progress)
156
-
157
- if current_model is not None:
158
- param_count += 1
159
- updates.append(f"current_model = ${param_count}")
160
- params.append(current_model)
161
-
162
- if error_message is not None:
163
- param_count += 1
164
- updates.append(f"error_message = ${param_count}")
165
- params.append(error_message)
166
-
167
- if status == TaskStatus.RUNNING:
168
- param_count += 1
169
- updates.append(f"started_at = ${param_count}")
170
- params.append(datetime.utcnow())
171
- elif status in [TaskStatus.COMPLETED, TaskStatus.FAILED, TaskStatus.CANCELLED]:
172
- param_count += 1
173
- updates.append(f"completed_at = ${param_count}")
174
- params.append(datetime.utcnow())
175
-
176
- query = f"UPDATE evaluations SET {', '.join(updates)} WHERE id = $1"
177
- await conn.execute(query, *params)
178
- finally:
179
- await conn.close()
180
-
181
- async def get_task_status(task_id: str) -> Optional[Dict[str, Any]]:
182
- """Get evaluation task status"""
183
- conn = await get_db_connection()
184
- try:
185
- result = await conn.fetchrow("""
186
- SELECT * FROM evaluations WHERE id = $1
187
- """, task_id)
188
-
189
- if not result:
190
- return None
191
-
192
- return {
193
- 'task_id': str(result['id']),
194
- 'name': result['name'],
195
- 'status': result['status'],
196
- 'models': result['models'],
197
- 'benchmark': result['benchmark'],
198
- 'dataset': result['dataset'],
199
- 'progress': result.get('progress', 0.0),
200
- 'current_model': result.get('current_model'),
201
- 'created_at': result['created_at'],
202
- 'started_at': result.get('started_at'),
203
- 'completed_at': result.get('completed_at'),
204
- 'estimated_completion': result.get('estimated_completion'),
205
- 'error_message': result.get('error_message')
206
- }
207
- finally:
208
- await conn.close()
209
-
210
- def generate_task_id() -> str:
211
- """Generate unique task ID"""
212
- return str(uuid.uuid4())
213
-
214
- # Background task functions
215
- async def run_evaluation_task(task_id: str, request: EvaluationRequest):
216
- """Run evaluation task in background"""
217
- try:
218
- logger.info(f"Starting evaluation task {task_id}: {request.name}")
219
- await update_task_status(task_id, TaskStatus.RUNNING)
220
-
221
- # For now, create a mock evaluation for testing
222
- import random
223
-
224
- total_models = len(request.models)
225
- results = []
226
-
227
- for i, model in enumerate(request.models):
228
- logger.info(f"Evaluating model {model} ({i+1}/{total_models})")
229
- await update_task_status(task_id, TaskStatus.RUNNING,
230
- progress=i/total_models, current_model=model)
231
-
232
- # Simulate evaluation time
233
- await asyncio.sleep(2)
234
-
235
- # Mock evaluation results
236
- model_result = {
237
- 'model_name': model,
238
- 'metrics': {
239
- 'accuracy': round(random.uniform(0.6, 0.95), 4),
240
- 'f1_score': round(random.uniform(0.55, 0.92), 4),
241
- 'overall_score': round(random.uniform(0.6, 0.9), 4)
242
- },
243
- 'raw_results': [f"sample_prediction_{j}" for j in range(5)], # Mock predictions
244
- 'execution_time_seconds': round(random.uniform(1.5, 4.0), 2)
245
- }
246
- results.append(model_result)
247
-
248
- # Update progress
249
- await update_task_status(task_id, TaskStatus.RUNNING,
250
- progress=(i+1)/total_models, current_model=model)
251
-
252
- # Save final results
253
- await save_evaluation_results(task_id, results)
254
- await update_task_status(task_id, TaskStatus.COMPLETED, progress=1.0)
255
-
256
- logger.info(f"Completed evaluation task {task_id}")
257
-
258
- except Exception as e:
259
- logger.error(f"Evaluation task {task_id} failed: {e}")
260
- await update_task_status(task_id, TaskStatus.FAILED, error_message=str(e))
261
-
262
- async def save_evaluation_results(task_id: str, results: List[Dict[str, Any]]):
263
- """Save evaluation results to database"""
264
- conn = await get_db_connection()
265
- try:
266
- for result in results:
267
- await conn.execute("""
268
- INSERT INTO evaluation_results (evaluation_id, model_name, metrics, raw_results, execution_time_seconds, created_at)
269
- VALUES ($1, $2, $3, $4, $5, $6)
270
- """, task_id, result['model_name'], json.dumps(result['metrics']),
271
- json.dumps(result.get('raw_results')), result['execution_time_seconds'], datetime.utcnow())
272
- finally:
273
- await conn.close()
274
-
275
- # API Endpoints
276
-
277
- @router.post("/", response_model=EvaluationResponse)
278
- async def create_evaluation(
279
- request: EvaluationRequest,
280
- background_tasks: BackgroundTasks
281
- ):
282
- """Create new evaluation task"""
283
- try:
284
- task_id = generate_task_id()
285
-
286
- # Create task record
287
- await create_task_record(task_id, request)
288
-
289
- # Start background evaluation
290
- background_tasks.add_task(run_evaluation_task, task_id, request)
291
-
292
- return EvaluationResponse(
293
- success=True,
294
- task_id=task_id,
295
- status=TaskStatus.PENDING,
296
- estimated_time_minutes=request.timeout_minutes
297
- )
298
-
299
- except Exception as e:
300
- logger.error(f"Failed to create evaluation: {e}")
301
- raise HTTPException(status_code=500, detail=f"Failed to create evaluation: {str(e)}")
302
-
303
- @router.post("/batch", response_model=List[EvaluationResponse])
304
- async def create_batch_evaluation(
305
- request: BatchEvaluationRequest,
306
- background_tasks: BackgroundTasks,
307
- user: Dict = Depends(require_write_access)
308
- ):
309
- """Create batch evaluation tasks"""
310
- try:
311
- responses = []
312
-
313
- for i, benchmark in enumerate(request.benchmarks):
314
- task_id = generate_task_id()
315
- eval_request = EvaluationRequest(
316
- name=f"{request.name_prefix}_{benchmark}_{i+1}",
317
- models=request.models,
318
- benchmark=benchmark,
319
- config=request.config,
320
- priority=request.priority
321
- )
322
-
323
- await create_task_record(task_id, eval_request)
324
- background_tasks.add_task(run_evaluation_task, task_id, eval_request)
325
-
326
- responses.append(EvaluationResponse(
327
- success=True,
328
- task_id=task_id,
329
- status=TaskStatus.PENDING
330
- ))
331
-
332
- return responses
333
-
334
- except Exception as e:
335
- logger.error(f"Failed to create batch evaluation: {e}")
336
- raise HTTPException(status_code=500, detail=f"Failed to create batch evaluation: {str(e)}")
337
-
338
- @router.get("/", response_model=List[EvaluationStatusResponse])
339
- async def list_evaluations(
340
- status: Optional[TaskStatus] = None,
341
- limit: int = Query(default=50, le=200),
342
- offset: int = Query(default=0, ge=0)
343
- ):
344
- """List evaluation tasks"""
345
- try:
346
- conn = await get_db_connection()
347
- try:
348
- query = "SELECT * FROM evaluations"
349
- params = []
350
-
351
- if status:
352
- query += " WHERE status = $1"
353
- params.append(status.value)
354
-
355
- query += " ORDER BY created_at DESC LIMIT $" + str(len(params) + 1) + " OFFSET $" + str(len(params) + 2)
356
- params.extend([limit, offset])
357
-
358
- results = await conn.fetch(query, *params)
359
-
360
- return [
361
- EvaluationStatusResponse(
362
- task_id=str(row['id']),
363
- name=row['name'],
364
- status=TaskStatus(row['status']),
365
- models=row['models'],
366
- benchmark=row['benchmark'],
367
- progress=row.get('progress', 0.0),
368
- current_model=row.get('current_model'),
369
- created_at=row['created_at'],
370
- started_at=row.get('started_at'),
371
- completed_at=row.get('completed_at'),
372
- estimated_completion=row.get('estimated_completion'),
373
- error_message=row.get('error_message')
374
- )
375
- for row in results
376
- ]
377
- finally:
378
- await conn.close()
379
-
380
- except Exception as e:
381
- logger.error(f"Failed to list evaluations: {e}")
382
- raise HTTPException(status_code=500, detail=f"Failed to list evaluations: {str(e)}")
383
-
384
- @router.get("/{task_id}/status", response_model=EvaluationStatusResponse)
385
- async def get_evaluation_status(
386
- task_id: str
387
- ):
388
- """Get evaluation task status"""
389
- try:
390
- status = await get_task_status(task_id)
391
- if not status:
392
- raise HTTPException(status_code=404, detail="Task not found")
393
-
394
- return EvaluationStatusResponse(**status)
395
-
396
- except HTTPException:
397
- raise
398
- except Exception as e:
399
- logger.error(f"Failed to get evaluation status: {e}")
400
- raise HTTPException(status_code=500, detail=f"Failed to get evaluation status: {str(e)}")
401
-
402
- @router.get("/{task_id}/results", response_model=EvaluationResult)
403
- async def get_evaluation_results(
404
- task_id: str
405
- ):
406
- """Get evaluation results"""
407
- try:
408
- # Get task info
409
- status = await get_task_status(task_id)
410
- if not status:
411
- raise HTTPException(status_code=404, detail="Task not found")
412
-
413
- # Get results
414
- conn = await get_db_connection()
415
- try:
416
- results = await conn.fetch("""
417
- SELECT * FROM evaluation_results WHERE evaluation_id = $1 ORDER BY created_at
418
- """, task_id)
419
-
420
- model_results = [
421
- ModelResult(
422
- model_name=row['model_name'],
423
- metrics=json.loads(row['metrics']),
424
- raw_results=json.loads(row['raw_results']) if row['raw_results'] else None,
425
- execution_time_seconds=float(row['execution_time_seconds']),
426
- status="completed"
427
- )
428
- for row in results
429
- ]
430
-
431
- # Calculate summary
432
- summary = {}
433
- if model_results:
434
- all_metrics = [r.metrics for r in model_results]
435
- if all_metrics:
436
- metric_names = set()
437
- for metrics in all_metrics:
438
- metric_names.update(metrics.keys())
439
-
440
- for metric in metric_names:
441
- values = [m.get(metric, 0) for m in all_metrics if metric in m]
442
- if values:
443
- summary[f"avg_{metric}"] = sum(values) / len(values)
444
- summary[f"max_{metric}"] = max(values)
445
- summary[f"min_{metric}"] = min(values)
446
-
447
- return EvaluationResult(
448
- task_id=task_id,
449
- name=status['name'],
450
- status=TaskStatus(status['status']),
451
- models=model_results,
452
- benchmark=status['benchmark'],
453
- dataset=status.get('dataset'),
454
- summary=summary,
455
- created_at=status['created_at'],
456
- started_at=status.get('started_at'),
457
- completed_at=status.get('completed_at')
458
- )
459
- finally:
460
- await conn.close()
461
-
462
- except HTTPException:
463
- raise
464
- except Exception as e:
465
- logger.error(f"Failed to get evaluation results: {e}")
466
- raise HTTPException(status_code=500, detail=f"Failed to get evaluation results: {str(e)}")
467
-
468
- @router.post("/{task_id}/cancel")
469
- async def cancel_evaluation(
470
- task_id: str
471
- ):
472
- """Cancel evaluation task"""
473
- try:
474
- status = await get_task_status(task_id)
475
- if not status:
476
- raise HTTPException(status_code=404, detail="Task not found")
477
-
478
- current_status = TaskStatus(status['status'])
479
- if current_status in [TaskStatus.COMPLETED, TaskStatus.FAILED, TaskStatus.CANCELLED]:
480
- raise HTTPException(status_code=400, detail=f"Cannot cancel task with status: {current_status}")
481
-
482
- await update_task_status(task_id, TaskStatus.CANCELLED)
483
-
484
- return {"success": True, "message": "Task cancelled successfully"}
485
-
486
- except HTTPException:
487
- raise
488
- except Exception as e:
489
- logger.error(f"Failed to cancel evaluation: {e}")
490
- raise HTTPException(status_code=500, detail=f"Failed to cancel evaluation: {str(e)}")
491
-
492
- @router.get("/{task_id}/stream")
493
- async def stream_evaluation_progress(
494
- task_id: str
495
- ):
496
- """Stream evaluation progress in real-time"""
497
-
498
- async def generate():
499
- """Generate SSE stream for evaluation progress"""
500
- last_status = None
501
-
502
- while True:
503
- try:
504
- current_status = await get_task_status(task_id)
505
- if not current_status:
506
- yield f"data: {json.dumps({'error': 'Task not found'})}\n\n"
507
- break
508
-
509
- # Only send update if status changed
510
- if current_status != last_status:
511
- yield f"data: {json.dumps(current_status)}\n\n"
512
- last_status = current_status
513
-
514
- # Stop streaming if task is complete
515
- status_enum = TaskStatus(current_status['status'])
516
- if status_enum in [TaskStatus.COMPLETED, TaskStatus.FAILED, TaskStatus.CANCELLED]:
517
- break
518
-
519
- await asyncio.sleep(2) # Update every 2 seconds
520
-
521
- except Exception as e:
522
- logger.error(f"Error in stream: {e}")
523
- yield f"data: {json.dumps({'error': str(e)})}\n\n"
524
- break
525
-
526
- return StreamingResponse(generate(), media_type="text/plain")
527
-
528
- @router.get("/benchmarks", response_model=List[BenchmarkInfo])
529
- async def list_benchmarks(
530
- category: Optional[str] = None
531
- ):
532
- """List available benchmarks"""
533
- try:
534
- # Get benchmarks from database
535
- conn = await get_db_connection()
536
- try:
537
- results = await conn.fetch("SELECT * FROM dev.benchmarks ORDER BY category, name")
538
- benchmarks = [
539
- BenchmarkInfo(
540
- name=row['name'],
541
- description=row['description'],
542
- category=row['category'],
543
- metrics=row['metrics'] if isinstance(row['metrics'], list) else json.loads(row['metrics']) if row['metrics'] else [],
544
- config_schema=row['config_schema'] if isinstance(row['config_schema'], dict) else json.loads(row['config_schema']) if row['config_schema'] else None
545
- )
546
- for row in results
547
- ]
548
- finally:
549
- await conn.close()
550
-
551
- if category:
552
- benchmarks = [b for b in benchmarks if b.category == category]
553
-
554
- return benchmarks
555
-
556
- except Exception as e:
557
- logger.error(f"Failed to list benchmarks: {e}")
558
- raise HTTPException(status_code=500, detail=f"Failed to list benchmarks: {str(e)}")
559
-
560
- @router.get("/models")
561
- async def list_evaluatable_models():
562
- """List models available for evaluation"""
563
- try:
564
- # This would integrate with your model registry
565
- # For now, return common models
566
- return {
567
- "success": True,
568
- "models": [
569
- {"name": "gpt-4", "provider": "openai", "type": "llm"},
570
- {"name": "gpt-3.5-turbo", "provider": "openai", "type": "llm"},
571
- {"name": "claude-3-opus", "provider": "anthropic", "type": "llm"},
572
- {"name": "claude-3-sonnet", "provider": "anthropic", "type": "llm"},
573
- {"name": "llama-2-70b", "provider": "meta", "type": "llm"},
574
- ]
575
- }
576
-
577
- except Exception as e:
578
- logger.error(f"Failed to list models: {e}")
579
- raise HTTPException(status_code=500, detail=f"Failed to list models: {str(e)}")