isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +466 -43
- isa_model/core/cache/redis_cache.py +12 -3
- isa_model/core/config/config_manager.py +230 -3
- isa_model/core/config.py +90 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +21 -1
- isa_model/core/database/supabase_client.py +154 -19
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +27 -18
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_manager.py +35 -80
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +174 -18
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/services/intelligent_model_selector.py +399 -21
- isa_model/core/types.py +1 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -370
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +137 -10
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/openai_stt_service.py +22 -6
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +335 -24
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/ollama_llm_service.py +9 -2
- isa_model/inference/services/llm/openai_llm_service.py +33 -16
- isa_model/inference/services/llm/yyds_llm_service.py +8 -2
- isa_model/inference/services/vision/__init__.py +22 -1
- isa_model/inference/services/vision/helpers/image_utils.py +8 -5
- isa_model/inference/services/vision/isa_vision_service.py +65 -4
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +172 -22
- isa_model/serving/api/middleware/auth.py +8 -2
- isa_model/serving/api/middleware/security.py +23 -33
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +4 -1
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +138 -2
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +680 -18
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +68 -54
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
- isa_model-0.4.4.dist-info/RECORD +180 -0
- isa_model/core/security/secrets.py +0 -358
- isa_model/core/storage/hf_storage.py +0 -419
- isa_model/core/storage/minio_storage.py +0 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks/__init__.py +0 -27
- isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
- isa_model/eval/benchmarks.py +0 -701
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -24
- isa_model/eval/evaluators/audio_evaluator.py +0 -727
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/embedding_evaluator.py +0 -742
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/evaluators/vision_evaluator.py +0 -564
- isa_model/eval/example_evaluation.py +0 -395
- isa_model/eval/factory.py +0 -798
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/isa_benchmarks.py +0 -700
- isa_model/eval/isa_integration.py +0 -582
- isa_model/eval/metrics.py +0 -951
- isa_model/eval/tests/unit/test_basic.py +0 -396
- isa_model/serving/api/routes/evaluations.py +0 -579
- isa_model/training/__init__.py +0 -168
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -26
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/examples/intelligent_training_example.py +0 -281
- isa_model/training/factory.py +0 -424
- isa_model/training/intelligent/__init__.py +0 -25
- isa_model/training/intelligent/decision_engine.py +0 -643
- isa_model/training/intelligent/intelligent_factory.py +0 -888
- isa_model/training/intelligent/knowledge_base.py +0 -751
- isa_model/training/intelligent/resource_optimizer.py +0 -839
- isa_model/training/intelligent/task_classifier.py +0 -576
- isa_model/training/storage/__init__.py +0 -24
- isa_model/training/storage/core_integration.py +0 -439
- isa_model/training/storage/training_repository.py +0 -552
- isa_model/training/storage/training_storage.py +0 -628
- isa_model-0.4.0.dist-info/RECORD +0 -182
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
@@ -1,579 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Evaluation API Routes
|
3
|
-
|
4
|
-
Provides comprehensive evaluation capabilities for AI models including
|
5
|
-
benchmark testing, performance analysis, and comparison metrics.
|
6
|
-
"""
|
7
|
-
|
8
|
-
from fastapi import APIRouter, Query, HTTPException, Depends, BackgroundTasks
|
9
|
-
from fastapi.responses import StreamingResponse
|
10
|
-
from pydantic import BaseModel, Field
|
11
|
-
from typing import Optional, List, Dict, Any, Union
|
12
|
-
import logging
|
13
|
-
from datetime import datetime, timedelta
|
14
|
-
import asyncpg
|
15
|
-
import asyncio
|
16
|
-
import json
|
17
|
-
import os
|
18
|
-
import uuid
|
19
|
-
from enum import Enum
|
20
|
-
|
21
|
-
try:
|
22
|
-
from ..middleware.auth import require_read_access, require_write_access
|
23
|
-
except ImportError:
|
24
|
-
# For development/testing when auth is not required
|
25
|
-
def require_read_access():
|
26
|
-
return {"user_id": "test_user"}
|
27
|
-
|
28
|
-
def require_write_access():
|
29
|
-
return {"user_id": "test_user"}
|
30
|
-
|
31
|
-
logger = logging.getLogger(__name__)
|
32
|
-
|
33
|
-
router = APIRouter()
|
34
|
-
|
35
|
-
# Database connection configuration
|
36
|
-
DATABASE_URL = os.getenv("DATABASE_URL", "postgresql://postgres:postgres@127.0.0.1:54322/postgres?options=-c%20search_path%3Ddev")
|
37
|
-
|
38
|
-
# Enums
|
39
|
-
class TaskStatus(str, Enum):
|
40
|
-
PENDING = "pending"
|
41
|
-
RUNNING = "running"
|
42
|
-
COMPLETED = "completed"
|
43
|
-
FAILED = "failed"
|
44
|
-
CANCELLED = "cancelled"
|
45
|
-
|
46
|
-
class EvaluationPriority(int, Enum):
|
47
|
-
LOW = 1
|
48
|
-
MEDIUM = 5
|
49
|
-
HIGH = 10
|
50
|
-
|
51
|
-
# Request Models
|
52
|
-
class EvaluationRequest(BaseModel):
|
53
|
-
name: str = Field(..., description="评估任务名称", min_length=1, max_length=255)
|
54
|
-
models: List[str] = Field(..., description="待评估模型列表", min_items=1)
|
55
|
-
benchmark: str = Field(..., description="基准测试名称")
|
56
|
-
dataset: Optional[str] = Field(None, description="数据集名称")
|
57
|
-
config: Optional[Dict[str, Any]] = Field(None, description="评估配置参数")
|
58
|
-
priority: EvaluationPriority = Field(EvaluationPriority.MEDIUM, description="任务优先级")
|
59
|
-
timeout_minutes: Optional[int] = Field(60, description="超时时间(分钟)", ge=5, le=1440)
|
60
|
-
|
61
|
-
class BatchEvaluationRequest(BaseModel):
|
62
|
-
name_prefix: str = Field(..., description="批量任务名称前缀")
|
63
|
-
models: List[str] = Field(..., description="待评估模型列表", min_items=1)
|
64
|
-
benchmarks: List[str] = Field(..., description="基准测试列表", min_items=1)
|
65
|
-
config: Optional[Dict[str, Any]] = Field(None, description="通用评估配置")
|
66
|
-
priority: EvaluationPriority = Field(EvaluationPriority.MEDIUM, description="任务优先级")
|
67
|
-
|
68
|
-
# Response Models
|
69
|
-
class EvaluationResponse(BaseModel):
|
70
|
-
success: bool
|
71
|
-
task_id: str
|
72
|
-
status: TaskStatus
|
73
|
-
message: Optional[str] = None
|
74
|
-
estimated_time_minutes: Optional[int] = None
|
75
|
-
|
76
|
-
class EvaluationStatusResponse(BaseModel):
|
77
|
-
task_id: str
|
78
|
-
name: str
|
79
|
-
status: TaskStatus
|
80
|
-
models: List[str]
|
81
|
-
benchmark: str
|
82
|
-
progress: float = Field(0.0, description="完成进度 (0.0-1.0)")
|
83
|
-
current_model: Optional[str] = None
|
84
|
-
created_at: datetime
|
85
|
-
started_at: Optional[datetime] = None
|
86
|
-
completed_at: Optional[datetime] = None
|
87
|
-
estimated_completion: Optional[datetime] = None
|
88
|
-
error_message: Optional[str] = None
|
89
|
-
|
90
|
-
class ModelResult(BaseModel):
|
91
|
-
model_name: str
|
92
|
-
metrics: Dict[str, float]
|
93
|
-
raw_results: Optional[List[Any]] = None
|
94
|
-
execution_time_seconds: float
|
95
|
-
status: str
|
96
|
-
|
97
|
-
class EvaluationResult(BaseModel):
|
98
|
-
task_id: str
|
99
|
-
name: str
|
100
|
-
status: TaskStatus
|
101
|
-
models: List[ModelResult]
|
102
|
-
benchmark: str
|
103
|
-
dataset: Optional[str] = None
|
104
|
-
summary: Dict[str, Any]
|
105
|
-
config: Optional[Dict[str, Any]] = None
|
106
|
-
created_at: datetime
|
107
|
-
started_at: Optional[datetime] = None
|
108
|
-
completed_at: Optional[datetime] = None
|
109
|
-
total_execution_time_seconds: Optional[float] = None
|
110
|
-
|
111
|
-
class BenchmarkInfo(BaseModel):
|
112
|
-
name: str
|
113
|
-
description: str
|
114
|
-
category: str
|
115
|
-
metrics: List[str]
|
116
|
-
config_schema: Optional[Dict[str, Any]] = None
|
117
|
-
|
118
|
-
# Database connection helper
|
119
|
-
async def get_db_connection():
|
120
|
-
"""Get database connection"""
|
121
|
-
try:
|
122
|
-
return await asyncpg.connect(DATABASE_URL)
|
123
|
-
except Exception as e:
|
124
|
-
logger.error(f"Database connection failed: {e}")
|
125
|
-
raise HTTPException(status_code=500, detail="Database connection failed")
|
126
|
-
|
127
|
-
# Task Management Functions
|
128
|
-
async def create_task_record(task_id: str, request: EvaluationRequest) -> None:
|
129
|
-
"""Create evaluation task record in database"""
|
130
|
-
conn = await get_db_connection()
|
131
|
-
try:
|
132
|
-
await conn.execute("""
|
133
|
-
INSERT INTO evaluations (id, name, status, models, benchmark, dataset, config, priority, timeout_minutes, created_at)
|
134
|
-
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
|
135
|
-
""", task_id, request.name, TaskStatus.PENDING.value, request.models,
|
136
|
-
request.benchmark, request.dataset, json.dumps(request.config) if request.config else None,
|
137
|
-
request.priority, request.timeout_minutes, datetime.utcnow())
|
138
|
-
finally:
|
139
|
-
await conn.close()
|
140
|
-
|
141
|
-
async def update_task_status(task_id: str, status: TaskStatus,
|
142
|
-
progress: Optional[float] = None,
|
143
|
-
current_model: Optional[str] = None,
|
144
|
-
error_message: Optional[str] = None) -> None:
|
145
|
-
"""Update evaluation task status"""
|
146
|
-
conn = await get_db_connection()
|
147
|
-
try:
|
148
|
-
updates = ["status = $2"]
|
149
|
-
params = [task_id, status.value]
|
150
|
-
param_count = 2
|
151
|
-
|
152
|
-
if progress is not None:
|
153
|
-
param_count += 1
|
154
|
-
updates.append(f"progress = ${param_count}")
|
155
|
-
params.append(progress)
|
156
|
-
|
157
|
-
if current_model is not None:
|
158
|
-
param_count += 1
|
159
|
-
updates.append(f"current_model = ${param_count}")
|
160
|
-
params.append(current_model)
|
161
|
-
|
162
|
-
if error_message is not None:
|
163
|
-
param_count += 1
|
164
|
-
updates.append(f"error_message = ${param_count}")
|
165
|
-
params.append(error_message)
|
166
|
-
|
167
|
-
if status == TaskStatus.RUNNING:
|
168
|
-
param_count += 1
|
169
|
-
updates.append(f"started_at = ${param_count}")
|
170
|
-
params.append(datetime.utcnow())
|
171
|
-
elif status in [TaskStatus.COMPLETED, TaskStatus.FAILED, TaskStatus.CANCELLED]:
|
172
|
-
param_count += 1
|
173
|
-
updates.append(f"completed_at = ${param_count}")
|
174
|
-
params.append(datetime.utcnow())
|
175
|
-
|
176
|
-
query = f"UPDATE evaluations SET {', '.join(updates)} WHERE id = $1"
|
177
|
-
await conn.execute(query, *params)
|
178
|
-
finally:
|
179
|
-
await conn.close()
|
180
|
-
|
181
|
-
async def get_task_status(task_id: str) -> Optional[Dict[str, Any]]:
|
182
|
-
"""Get evaluation task status"""
|
183
|
-
conn = await get_db_connection()
|
184
|
-
try:
|
185
|
-
result = await conn.fetchrow("""
|
186
|
-
SELECT * FROM evaluations WHERE id = $1
|
187
|
-
""", task_id)
|
188
|
-
|
189
|
-
if not result:
|
190
|
-
return None
|
191
|
-
|
192
|
-
return {
|
193
|
-
'task_id': str(result['id']),
|
194
|
-
'name': result['name'],
|
195
|
-
'status': result['status'],
|
196
|
-
'models': result['models'],
|
197
|
-
'benchmark': result['benchmark'],
|
198
|
-
'dataset': result['dataset'],
|
199
|
-
'progress': result.get('progress', 0.0),
|
200
|
-
'current_model': result.get('current_model'),
|
201
|
-
'created_at': result['created_at'],
|
202
|
-
'started_at': result.get('started_at'),
|
203
|
-
'completed_at': result.get('completed_at'),
|
204
|
-
'estimated_completion': result.get('estimated_completion'),
|
205
|
-
'error_message': result.get('error_message')
|
206
|
-
}
|
207
|
-
finally:
|
208
|
-
await conn.close()
|
209
|
-
|
210
|
-
def generate_task_id() -> str:
|
211
|
-
"""Generate unique task ID"""
|
212
|
-
return str(uuid.uuid4())
|
213
|
-
|
214
|
-
# Background task functions
|
215
|
-
async def run_evaluation_task(task_id: str, request: EvaluationRequest):
|
216
|
-
"""Run evaluation task in background"""
|
217
|
-
try:
|
218
|
-
logger.info(f"Starting evaluation task {task_id}: {request.name}")
|
219
|
-
await update_task_status(task_id, TaskStatus.RUNNING)
|
220
|
-
|
221
|
-
# For now, create a mock evaluation for testing
|
222
|
-
import random
|
223
|
-
|
224
|
-
total_models = len(request.models)
|
225
|
-
results = []
|
226
|
-
|
227
|
-
for i, model in enumerate(request.models):
|
228
|
-
logger.info(f"Evaluating model {model} ({i+1}/{total_models})")
|
229
|
-
await update_task_status(task_id, TaskStatus.RUNNING,
|
230
|
-
progress=i/total_models, current_model=model)
|
231
|
-
|
232
|
-
# Simulate evaluation time
|
233
|
-
await asyncio.sleep(2)
|
234
|
-
|
235
|
-
# Mock evaluation results
|
236
|
-
model_result = {
|
237
|
-
'model_name': model,
|
238
|
-
'metrics': {
|
239
|
-
'accuracy': round(random.uniform(0.6, 0.95), 4),
|
240
|
-
'f1_score': round(random.uniform(0.55, 0.92), 4),
|
241
|
-
'overall_score': round(random.uniform(0.6, 0.9), 4)
|
242
|
-
},
|
243
|
-
'raw_results': [f"sample_prediction_{j}" for j in range(5)], # Mock predictions
|
244
|
-
'execution_time_seconds': round(random.uniform(1.5, 4.0), 2)
|
245
|
-
}
|
246
|
-
results.append(model_result)
|
247
|
-
|
248
|
-
# Update progress
|
249
|
-
await update_task_status(task_id, TaskStatus.RUNNING,
|
250
|
-
progress=(i+1)/total_models, current_model=model)
|
251
|
-
|
252
|
-
# Save final results
|
253
|
-
await save_evaluation_results(task_id, results)
|
254
|
-
await update_task_status(task_id, TaskStatus.COMPLETED, progress=1.0)
|
255
|
-
|
256
|
-
logger.info(f"Completed evaluation task {task_id}")
|
257
|
-
|
258
|
-
except Exception as e:
|
259
|
-
logger.error(f"Evaluation task {task_id} failed: {e}")
|
260
|
-
await update_task_status(task_id, TaskStatus.FAILED, error_message=str(e))
|
261
|
-
|
262
|
-
async def save_evaluation_results(task_id: str, results: List[Dict[str, Any]]):
|
263
|
-
"""Save evaluation results to database"""
|
264
|
-
conn = await get_db_connection()
|
265
|
-
try:
|
266
|
-
for result in results:
|
267
|
-
await conn.execute("""
|
268
|
-
INSERT INTO evaluation_results (evaluation_id, model_name, metrics, raw_results, execution_time_seconds, created_at)
|
269
|
-
VALUES ($1, $2, $3, $4, $5, $6)
|
270
|
-
""", task_id, result['model_name'], json.dumps(result['metrics']),
|
271
|
-
json.dumps(result.get('raw_results')), result['execution_time_seconds'], datetime.utcnow())
|
272
|
-
finally:
|
273
|
-
await conn.close()
|
274
|
-
|
275
|
-
# API Endpoints
|
276
|
-
|
277
|
-
@router.post("/", response_model=EvaluationResponse)
|
278
|
-
async def create_evaluation(
|
279
|
-
request: EvaluationRequest,
|
280
|
-
background_tasks: BackgroundTasks
|
281
|
-
):
|
282
|
-
"""Create new evaluation task"""
|
283
|
-
try:
|
284
|
-
task_id = generate_task_id()
|
285
|
-
|
286
|
-
# Create task record
|
287
|
-
await create_task_record(task_id, request)
|
288
|
-
|
289
|
-
# Start background evaluation
|
290
|
-
background_tasks.add_task(run_evaluation_task, task_id, request)
|
291
|
-
|
292
|
-
return EvaluationResponse(
|
293
|
-
success=True,
|
294
|
-
task_id=task_id,
|
295
|
-
status=TaskStatus.PENDING,
|
296
|
-
estimated_time_minutes=request.timeout_minutes
|
297
|
-
)
|
298
|
-
|
299
|
-
except Exception as e:
|
300
|
-
logger.error(f"Failed to create evaluation: {e}")
|
301
|
-
raise HTTPException(status_code=500, detail=f"Failed to create evaluation: {str(e)}")
|
302
|
-
|
303
|
-
@router.post("/batch", response_model=List[EvaluationResponse])
|
304
|
-
async def create_batch_evaluation(
|
305
|
-
request: BatchEvaluationRequest,
|
306
|
-
background_tasks: BackgroundTasks,
|
307
|
-
user: Dict = Depends(require_write_access)
|
308
|
-
):
|
309
|
-
"""Create batch evaluation tasks"""
|
310
|
-
try:
|
311
|
-
responses = []
|
312
|
-
|
313
|
-
for i, benchmark in enumerate(request.benchmarks):
|
314
|
-
task_id = generate_task_id()
|
315
|
-
eval_request = EvaluationRequest(
|
316
|
-
name=f"{request.name_prefix}_{benchmark}_{i+1}",
|
317
|
-
models=request.models,
|
318
|
-
benchmark=benchmark,
|
319
|
-
config=request.config,
|
320
|
-
priority=request.priority
|
321
|
-
)
|
322
|
-
|
323
|
-
await create_task_record(task_id, eval_request)
|
324
|
-
background_tasks.add_task(run_evaluation_task, task_id, eval_request)
|
325
|
-
|
326
|
-
responses.append(EvaluationResponse(
|
327
|
-
success=True,
|
328
|
-
task_id=task_id,
|
329
|
-
status=TaskStatus.PENDING
|
330
|
-
))
|
331
|
-
|
332
|
-
return responses
|
333
|
-
|
334
|
-
except Exception as e:
|
335
|
-
logger.error(f"Failed to create batch evaluation: {e}")
|
336
|
-
raise HTTPException(status_code=500, detail=f"Failed to create batch evaluation: {str(e)}")
|
337
|
-
|
338
|
-
@router.get("/", response_model=List[EvaluationStatusResponse])
|
339
|
-
async def list_evaluations(
|
340
|
-
status: Optional[TaskStatus] = None,
|
341
|
-
limit: int = Query(default=50, le=200),
|
342
|
-
offset: int = Query(default=0, ge=0)
|
343
|
-
):
|
344
|
-
"""List evaluation tasks"""
|
345
|
-
try:
|
346
|
-
conn = await get_db_connection()
|
347
|
-
try:
|
348
|
-
query = "SELECT * FROM evaluations"
|
349
|
-
params = []
|
350
|
-
|
351
|
-
if status:
|
352
|
-
query += " WHERE status = $1"
|
353
|
-
params.append(status.value)
|
354
|
-
|
355
|
-
query += " ORDER BY created_at DESC LIMIT $" + str(len(params) + 1) + " OFFSET $" + str(len(params) + 2)
|
356
|
-
params.extend([limit, offset])
|
357
|
-
|
358
|
-
results = await conn.fetch(query, *params)
|
359
|
-
|
360
|
-
return [
|
361
|
-
EvaluationStatusResponse(
|
362
|
-
task_id=str(row['id']),
|
363
|
-
name=row['name'],
|
364
|
-
status=TaskStatus(row['status']),
|
365
|
-
models=row['models'],
|
366
|
-
benchmark=row['benchmark'],
|
367
|
-
progress=row.get('progress', 0.0),
|
368
|
-
current_model=row.get('current_model'),
|
369
|
-
created_at=row['created_at'],
|
370
|
-
started_at=row.get('started_at'),
|
371
|
-
completed_at=row.get('completed_at'),
|
372
|
-
estimated_completion=row.get('estimated_completion'),
|
373
|
-
error_message=row.get('error_message')
|
374
|
-
)
|
375
|
-
for row in results
|
376
|
-
]
|
377
|
-
finally:
|
378
|
-
await conn.close()
|
379
|
-
|
380
|
-
except Exception as e:
|
381
|
-
logger.error(f"Failed to list evaluations: {e}")
|
382
|
-
raise HTTPException(status_code=500, detail=f"Failed to list evaluations: {str(e)}")
|
383
|
-
|
384
|
-
@router.get("/{task_id}/status", response_model=EvaluationStatusResponse)
|
385
|
-
async def get_evaluation_status(
|
386
|
-
task_id: str
|
387
|
-
):
|
388
|
-
"""Get evaluation task status"""
|
389
|
-
try:
|
390
|
-
status = await get_task_status(task_id)
|
391
|
-
if not status:
|
392
|
-
raise HTTPException(status_code=404, detail="Task not found")
|
393
|
-
|
394
|
-
return EvaluationStatusResponse(**status)
|
395
|
-
|
396
|
-
except HTTPException:
|
397
|
-
raise
|
398
|
-
except Exception as e:
|
399
|
-
logger.error(f"Failed to get evaluation status: {e}")
|
400
|
-
raise HTTPException(status_code=500, detail=f"Failed to get evaluation status: {str(e)}")
|
401
|
-
|
402
|
-
@router.get("/{task_id}/results", response_model=EvaluationResult)
|
403
|
-
async def get_evaluation_results(
|
404
|
-
task_id: str
|
405
|
-
):
|
406
|
-
"""Get evaluation results"""
|
407
|
-
try:
|
408
|
-
# Get task info
|
409
|
-
status = await get_task_status(task_id)
|
410
|
-
if not status:
|
411
|
-
raise HTTPException(status_code=404, detail="Task not found")
|
412
|
-
|
413
|
-
# Get results
|
414
|
-
conn = await get_db_connection()
|
415
|
-
try:
|
416
|
-
results = await conn.fetch("""
|
417
|
-
SELECT * FROM evaluation_results WHERE evaluation_id = $1 ORDER BY created_at
|
418
|
-
""", task_id)
|
419
|
-
|
420
|
-
model_results = [
|
421
|
-
ModelResult(
|
422
|
-
model_name=row['model_name'],
|
423
|
-
metrics=json.loads(row['metrics']),
|
424
|
-
raw_results=json.loads(row['raw_results']) if row['raw_results'] else None,
|
425
|
-
execution_time_seconds=float(row['execution_time_seconds']),
|
426
|
-
status="completed"
|
427
|
-
)
|
428
|
-
for row in results
|
429
|
-
]
|
430
|
-
|
431
|
-
# Calculate summary
|
432
|
-
summary = {}
|
433
|
-
if model_results:
|
434
|
-
all_metrics = [r.metrics for r in model_results]
|
435
|
-
if all_metrics:
|
436
|
-
metric_names = set()
|
437
|
-
for metrics in all_metrics:
|
438
|
-
metric_names.update(metrics.keys())
|
439
|
-
|
440
|
-
for metric in metric_names:
|
441
|
-
values = [m.get(metric, 0) for m in all_metrics if metric in m]
|
442
|
-
if values:
|
443
|
-
summary[f"avg_{metric}"] = sum(values) / len(values)
|
444
|
-
summary[f"max_{metric}"] = max(values)
|
445
|
-
summary[f"min_{metric}"] = min(values)
|
446
|
-
|
447
|
-
return EvaluationResult(
|
448
|
-
task_id=task_id,
|
449
|
-
name=status['name'],
|
450
|
-
status=TaskStatus(status['status']),
|
451
|
-
models=model_results,
|
452
|
-
benchmark=status['benchmark'],
|
453
|
-
dataset=status.get('dataset'),
|
454
|
-
summary=summary,
|
455
|
-
created_at=status['created_at'],
|
456
|
-
started_at=status.get('started_at'),
|
457
|
-
completed_at=status.get('completed_at')
|
458
|
-
)
|
459
|
-
finally:
|
460
|
-
await conn.close()
|
461
|
-
|
462
|
-
except HTTPException:
|
463
|
-
raise
|
464
|
-
except Exception as e:
|
465
|
-
logger.error(f"Failed to get evaluation results: {e}")
|
466
|
-
raise HTTPException(status_code=500, detail=f"Failed to get evaluation results: {str(e)}")
|
467
|
-
|
468
|
-
@router.post("/{task_id}/cancel")
|
469
|
-
async def cancel_evaluation(
|
470
|
-
task_id: str
|
471
|
-
):
|
472
|
-
"""Cancel evaluation task"""
|
473
|
-
try:
|
474
|
-
status = await get_task_status(task_id)
|
475
|
-
if not status:
|
476
|
-
raise HTTPException(status_code=404, detail="Task not found")
|
477
|
-
|
478
|
-
current_status = TaskStatus(status['status'])
|
479
|
-
if current_status in [TaskStatus.COMPLETED, TaskStatus.FAILED, TaskStatus.CANCELLED]:
|
480
|
-
raise HTTPException(status_code=400, detail=f"Cannot cancel task with status: {current_status}")
|
481
|
-
|
482
|
-
await update_task_status(task_id, TaskStatus.CANCELLED)
|
483
|
-
|
484
|
-
return {"success": True, "message": "Task cancelled successfully"}
|
485
|
-
|
486
|
-
except HTTPException:
|
487
|
-
raise
|
488
|
-
except Exception as e:
|
489
|
-
logger.error(f"Failed to cancel evaluation: {e}")
|
490
|
-
raise HTTPException(status_code=500, detail=f"Failed to cancel evaluation: {str(e)}")
|
491
|
-
|
492
|
-
@router.get("/{task_id}/stream")
|
493
|
-
async def stream_evaluation_progress(
|
494
|
-
task_id: str
|
495
|
-
):
|
496
|
-
"""Stream evaluation progress in real-time"""
|
497
|
-
|
498
|
-
async def generate():
|
499
|
-
"""Generate SSE stream for evaluation progress"""
|
500
|
-
last_status = None
|
501
|
-
|
502
|
-
while True:
|
503
|
-
try:
|
504
|
-
current_status = await get_task_status(task_id)
|
505
|
-
if not current_status:
|
506
|
-
yield f"data: {json.dumps({'error': 'Task not found'})}\n\n"
|
507
|
-
break
|
508
|
-
|
509
|
-
# Only send update if status changed
|
510
|
-
if current_status != last_status:
|
511
|
-
yield f"data: {json.dumps(current_status)}\n\n"
|
512
|
-
last_status = current_status
|
513
|
-
|
514
|
-
# Stop streaming if task is complete
|
515
|
-
status_enum = TaskStatus(current_status['status'])
|
516
|
-
if status_enum in [TaskStatus.COMPLETED, TaskStatus.FAILED, TaskStatus.CANCELLED]:
|
517
|
-
break
|
518
|
-
|
519
|
-
await asyncio.sleep(2) # Update every 2 seconds
|
520
|
-
|
521
|
-
except Exception as e:
|
522
|
-
logger.error(f"Error in stream: {e}")
|
523
|
-
yield f"data: {json.dumps({'error': str(e)})}\n\n"
|
524
|
-
break
|
525
|
-
|
526
|
-
return StreamingResponse(generate(), media_type="text/plain")
|
527
|
-
|
528
|
-
@router.get("/benchmarks", response_model=List[BenchmarkInfo])
|
529
|
-
async def list_benchmarks(
|
530
|
-
category: Optional[str] = None
|
531
|
-
):
|
532
|
-
"""List available benchmarks"""
|
533
|
-
try:
|
534
|
-
# Get benchmarks from database
|
535
|
-
conn = await get_db_connection()
|
536
|
-
try:
|
537
|
-
results = await conn.fetch("SELECT * FROM dev.benchmarks ORDER BY category, name")
|
538
|
-
benchmarks = [
|
539
|
-
BenchmarkInfo(
|
540
|
-
name=row['name'],
|
541
|
-
description=row['description'],
|
542
|
-
category=row['category'],
|
543
|
-
metrics=row['metrics'] if isinstance(row['metrics'], list) else json.loads(row['metrics']) if row['metrics'] else [],
|
544
|
-
config_schema=row['config_schema'] if isinstance(row['config_schema'], dict) else json.loads(row['config_schema']) if row['config_schema'] else None
|
545
|
-
)
|
546
|
-
for row in results
|
547
|
-
]
|
548
|
-
finally:
|
549
|
-
await conn.close()
|
550
|
-
|
551
|
-
if category:
|
552
|
-
benchmarks = [b for b in benchmarks if b.category == category]
|
553
|
-
|
554
|
-
return benchmarks
|
555
|
-
|
556
|
-
except Exception as e:
|
557
|
-
logger.error(f"Failed to list benchmarks: {e}")
|
558
|
-
raise HTTPException(status_code=500, detail=f"Failed to list benchmarks: {str(e)}")
|
559
|
-
|
560
|
-
@router.get("/models")
|
561
|
-
async def list_evaluatable_models():
|
562
|
-
"""List models available for evaluation"""
|
563
|
-
try:
|
564
|
-
# This would integrate with your model registry
|
565
|
-
# For now, return common models
|
566
|
-
return {
|
567
|
-
"success": True,
|
568
|
-
"models": [
|
569
|
-
{"name": "gpt-4", "provider": "openai", "type": "llm"},
|
570
|
-
{"name": "gpt-3.5-turbo", "provider": "openai", "type": "llm"},
|
571
|
-
{"name": "claude-3-opus", "provider": "anthropic", "type": "llm"},
|
572
|
-
{"name": "claude-3-sonnet", "provider": "anthropic", "type": "llm"},
|
573
|
-
{"name": "llama-2-70b", "provider": "meta", "type": "llm"},
|
574
|
-
]
|
575
|
-
}
|
576
|
-
|
577
|
-
except Exception as e:
|
578
|
-
logger.error(f"Failed to list models: {e}")
|
579
|
-
raise HTTPException(status_code=500, detail=f"Failed to list models: {str(e)}")
|