isa-model 0.4.0__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +466 -43
- isa_model/core/cache/redis_cache.py +12 -3
- isa_model/core/config/config_manager.py +230 -3
- isa_model/core/config.py +90 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +21 -1
- isa_model/core/database/supabase_client.py +154 -19
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +27 -18
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_manager.py +40 -17
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +174 -18
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/services/intelligent_model_selector.py +399 -21
- isa_model/core/storage/hf_storage.py +1 -1
- isa_model/core/types.py +1 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -370
- isa_model/deployment/local/__init__.py +31 -0
- isa_model/deployment/local/config.py +248 -0
- isa_model/deployment/local/gpu_gateway.py +607 -0
- isa_model/deployment/local/health_checker.py +428 -0
- isa_model/deployment/local/provider.py +586 -0
- isa_model/deployment/local/tensorrt_service.py +621 -0
- isa_model/deployment/local/transformers_service.py +644 -0
- isa_model/deployment/local/vllm_service.py +527 -0
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +137 -10
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/openai_stt_service.py +22 -6
- isa_model/inference/services/custom_model_manager.py +277 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +335 -24
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/local_llm_service.py +747 -0
- isa_model/inference/services/llm/ollama_llm_service.py +9 -2
- isa_model/inference/services/llm/openai_llm_service.py +33 -16
- isa_model/inference/services/llm/yyds_llm_service.py +8 -2
- isa_model/inference/services/vision/__init__.py +22 -1
- isa_model/inference/services/vision/blip_vision_service.py +359 -0
- isa_model/inference/services/vision/helpers/image_utils.py +8 -5
- isa_model/inference/services/vision/isa_vision_service.py +65 -4
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +172 -22
- isa_model/serving/api/middleware/auth.py +8 -2
- isa_model/serving/api/middleware/security.py +23 -33
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +4 -1
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +138 -2
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +680 -18
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +68 -54
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/METADATA +66 -24
- isa_model-0.4.3.dist-info/RECORD +193 -0
- isa_model/core/storage/minio_storage.py +0 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks/__init__.py +0 -27
- isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
- isa_model/eval/benchmarks.py +0 -701
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -24
- isa_model/eval/evaluators/audio_evaluator.py +0 -727
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/embedding_evaluator.py +0 -742
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/evaluators/vision_evaluator.py +0 -564
- isa_model/eval/example_evaluation.py +0 -395
- isa_model/eval/factory.py +0 -798
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/isa_benchmarks.py +0 -700
- isa_model/eval/isa_integration.py +0 -582
- isa_model/eval/metrics.py +0 -951
- isa_model/eval/tests/unit/test_basic.py +0 -396
- isa_model/serving/api/routes/evaluations.py +0 -579
- isa_model/training/__init__.py +0 -168
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -26
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/examples/intelligent_training_example.py +0 -281
- isa_model/training/factory.py +0 -424
- isa_model/training/intelligent/__init__.py +0 -25
- isa_model/training/intelligent/decision_engine.py +0 -643
- isa_model/training/intelligent/intelligent_factory.py +0 -888
- isa_model/training/intelligent/knowledge_base.py +0 -751
- isa_model/training/intelligent/resource_optimizer.py +0 -839
- isa_model/training/intelligent/task_classifier.py +0 -576
- isa_model/training/storage/__init__.py +0 -24
- isa_model/training/storage/core_integration.py +0 -439
- isa_model/training/storage/training_repository.py +0 -552
- isa_model/training/storage/training_storage.py +0 -628
- isa_model-0.4.0.dist-info/RECORD +0 -182
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,637 @@
|
|
1
|
+
"""
|
2
|
+
Model Evaluation Service - Step 2 of Model Pipeline
|
3
|
+
Handles model evaluation, validation, and performance assessment
|
4
|
+
"""
|
5
|
+
|
6
|
+
import pandas as pd
|
7
|
+
import numpy as np
|
8
|
+
from typing import Dict, List, Any, Optional
|
9
|
+
import logging
|
10
|
+
from dataclasses import dataclass, field
|
11
|
+
from datetime import datetime
|
12
|
+
|
13
|
+
try:
|
14
|
+
from sklearn.model_selection import cross_val_score, validation_curve, learning_curve
|
15
|
+
from sklearn.metrics import classification_report, confusion_matrix
|
16
|
+
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
|
17
|
+
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
|
18
|
+
SKLEARN_AVAILABLE = True
|
19
|
+
except ImportError:
|
20
|
+
SKLEARN_AVAILABLE = False
|
21
|
+
logging.warning("scikit-learn not available. Evaluation capabilities will be limited.")
|
22
|
+
|
23
|
+
logger = logging.getLogger(__name__)
|
24
|
+
|
25
|
+
@dataclass
|
26
|
+
class EvaluationResult:
|
27
|
+
"""Result of model evaluation step"""
|
28
|
+
success: bool
|
29
|
+
model_id: str
|
30
|
+
evaluation_metrics: Dict[str, Any] = field(default_factory=dict)
|
31
|
+
cross_validation_results: Dict[str, Any] = field(default_factory=dict)
|
32
|
+
validation_analysis: Dict[str, Any] = field(default_factory=dict)
|
33
|
+
performance_comparison: Dict[str, Any] = field(default_factory=dict)
|
34
|
+
recommendations: List[str] = field(default_factory=list)
|
35
|
+
performance_metrics: Dict[str, Any] = field(default_factory=dict)
|
36
|
+
warnings: List[str] = field(default_factory=list)
|
37
|
+
errors: List[str] = field(default_factory=list)
|
38
|
+
|
39
|
+
class ModelEvaluationService:
|
40
|
+
"""
|
41
|
+
Model Evaluation Service - Step 2 of Model Pipeline
|
42
|
+
|
43
|
+
Handles:
|
44
|
+
- Model performance evaluation using various metrics
|
45
|
+
- Cross-validation and validation curve analysis
|
46
|
+
- Model comparison and benchmarking
|
47
|
+
- Performance diagnostics and recommendations
|
48
|
+
"""
|
49
|
+
|
50
|
+
def __init__(self):
|
51
|
+
self.execution_stats = {
|
52
|
+
'total_evaluation_operations': 0,
|
53
|
+
'successful_evaluation_operations': 0,
|
54
|
+
'failed_evaluation_operations': 0,
|
55
|
+
'models_evaluated': 0,
|
56
|
+
'average_evaluation_time': 0.0
|
57
|
+
}
|
58
|
+
|
59
|
+
# Track evaluation results
|
60
|
+
self.evaluation_results = {}
|
61
|
+
|
62
|
+
logger.info("Model Evaluation Service initialized")
|
63
|
+
|
64
|
+
def evaluate_model(self,
|
65
|
+
model_info: Dict[str, Any],
|
66
|
+
test_data: pd.DataFrame,
|
67
|
+
target_column: str,
|
68
|
+
evaluation_config: Optional[Dict[str, Any]] = None) -> EvaluationResult:
|
69
|
+
"""
|
70
|
+
Evaluate a trained model's performance
|
71
|
+
|
72
|
+
Args:
|
73
|
+
model_info: Information about the trained model
|
74
|
+
test_data: Test dataset for evaluation
|
75
|
+
target_column: Target variable column name
|
76
|
+
evaluation_config: Configuration for evaluation
|
77
|
+
|
78
|
+
Returns:
|
79
|
+
EvaluationResult with comprehensive evaluation metrics
|
80
|
+
"""
|
81
|
+
start_time = datetime.now()
|
82
|
+
evaluation_config = evaluation_config or {}
|
83
|
+
|
84
|
+
try:
|
85
|
+
model_id = model_info.get('model_id', 'unknown')
|
86
|
+
logger.info(f"Starting model evaluation for: {model_id}")
|
87
|
+
|
88
|
+
# Initialize result
|
89
|
+
result = EvaluationResult(
|
90
|
+
success=False,
|
91
|
+
model_id=model_id
|
92
|
+
)
|
93
|
+
|
94
|
+
# Validate inputs
|
95
|
+
validation_result = self._validate_evaluation_inputs(model_info, test_data, target_column)
|
96
|
+
if not validation_result['valid']:
|
97
|
+
result.errors.extend(validation_result['errors'])
|
98
|
+
return self._finalize_evaluation_result(result, start_time)
|
99
|
+
|
100
|
+
# Extract model and processor
|
101
|
+
processor = model_info.get('processor')
|
102
|
+
model_instance = model_info.get('model_instance')
|
103
|
+
problem_type = model_info.get('problem_type', 'classification')
|
104
|
+
|
105
|
+
if not processor:
|
106
|
+
result.errors.append("Model processor not available")
|
107
|
+
return self._finalize_evaluation_result(result, start_time)
|
108
|
+
|
109
|
+
# Prepare test data
|
110
|
+
X_test = test_data.drop(columns=[target_column])
|
111
|
+
y_test = test_data[target_column]
|
112
|
+
|
113
|
+
# Basic evaluation metrics
|
114
|
+
basic_metrics = self._calculate_basic_metrics(
|
115
|
+
processor, model_instance, X_test, y_test, problem_type
|
116
|
+
)
|
117
|
+
|
118
|
+
if not basic_metrics['success']:
|
119
|
+
result.errors.extend(basic_metrics['errors'])
|
120
|
+
return self._finalize_evaluation_result(result, start_time)
|
121
|
+
|
122
|
+
result.evaluation_metrics = basic_metrics['metrics']
|
123
|
+
|
124
|
+
# Cross-validation analysis
|
125
|
+
if evaluation_config.get('perform_cv', True) and SKLEARN_AVAILABLE:
|
126
|
+
cv_results = self._perform_cross_validation(
|
127
|
+
model_info, test_data, target_column, evaluation_config
|
128
|
+
)
|
129
|
+
result.cross_validation_results = cv_results
|
130
|
+
|
131
|
+
# Validation curve analysis
|
132
|
+
if evaluation_config.get('validation_curves', False) and SKLEARN_AVAILABLE:
|
133
|
+
validation_analysis = self._analyze_validation_curves(
|
134
|
+
model_info, test_data, target_column, evaluation_config
|
135
|
+
)
|
136
|
+
result.validation_analysis = validation_analysis
|
137
|
+
|
138
|
+
# Performance diagnostics
|
139
|
+
diagnostics = self._diagnose_model_performance(
|
140
|
+
result.evaluation_metrics, problem_type, model_info
|
141
|
+
)
|
142
|
+
result.recommendations = diagnostics['recommendations']
|
143
|
+
result.warnings.extend(diagnostics['warnings'])
|
144
|
+
|
145
|
+
# Success
|
146
|
+
result.success = True
|
147
|
+
self.evaluation_results[model_id] = result
|
148
|
+
|
149
|
+
return self._finalize_evaluation_result(result, start_time)
|
150
|
+
|
151
|
+
except Exception as e:
|
152
|
+
logger.error(f"Model evaluation failed: {e}")
|
153
|
+
result.errors.append(f"Evaluation error: {str(e)}")
|
154
|
+
return self._finalize_evaluation_result(result, start_time)
|
155
|
+
|
156
|
+
def compare_models(self,
|
157
|
+
model_infos: List[Dict[str, Any]],
|
158
|
+
test_data: pd.DataFrame,
|
159
|
+
target_column: str,
|
160
|
+
comparison_metrics: Optional[List[str]] = None) -> Dict[str, Any]:
|
161
|
+
"""Compare multiple models on the same test dataset"""
|
162
|
+
try:
|
163
|
+
comparison_metrics = comparison_metrics or ['accuracy', 'f1_score', 'r2_score']
|
164
|
+
comparison_results = {
|
165
|
+
'model_comparison': {},
|
166
|
+
'ranking': {},
|
167
|
+
'best_model': None,
|
168
|
+
'comparison_summary': {}
|
169
|
+
}
|
170
|
+
|
171
|
+
model_performances = {}
|
172
|
+
|
173
|
+
for model_info in model_infos:
|
174
|
+
model_id = model_info.get('model_id', 'unknown')
|
175
|
+
|
176
|
+
try:
|
177
|
+
evaluation_result = self.evaluate_model(
|
178
|
+
model_info, test_data, target_column, {'perform_cv': False}
|
179
|
+
)
|
180
|
+
|
181
|
+
if evaluation_result.success:
|
182
|
+
model_performances[model_id] = {
|
183
|
+
'metrics': evaluation_result.evaluation_metrics,
|
184
|
+
'algorithm': model_info.get('training_config', {}).algorithm,
|
185
|
+
'problem_type': model_info.get('problem_type')
|
186
|
+
}
|
187
|
+
comparison_results['model_comparison'][model_id] = evaluation_result.evaluation_metrics
|
188
|
+
else:
|
189
|
+
logger.warning(f"Evaluation failed for model {model_id}")
|
190
|
+
|
191
|
+
except Exception as e:
|
192
|
+
logger.error(f"Error evaluating model {model_id}: {e}")
|
193
|
+
|
194
|
+
# Rank models by performance
|
195
|
+
if model_performances:
|
196
|
+
rankings = self._rank_models_by_performance(model_performances, comparison_metrics)
|
197
|
+
comparison_results['ranking'] = rankings
|
198
|
+
|
199
|
+
if rankings:
|
200
|
+
best_model_id = rankings[0]['model_id']
|
201
|
+
comparison_results['best_model'] = {
|
202
|
+
'model_id': best_model_id,
|
203
|
+
'metrics': model_performances[best_model_id]['metrics'],
|
204
|
+
'algorithm': model_performances[best_model_id]['algorithm']
|
205
|
+
}
|
206
|
+
|
207
|
+
# Generate comparison summary
|
208
|
+
comparison_results['comparison_summary'] = self._generate_comparison_summary(
|
209
|
+
model_performances, comparison_metrics
|
210
|
+
)
|
211
|
+
|
212
|
+
return comparison_results
|
213
|
+
|
214
|
+
except Exception as e:
|
215
|
+
logger.error(f"Model comparison failed: {e}")
|
216
|
+
return {'error': str(e)}
|
217
|
+
|
218
|
+
def analyze_model_performance(self,
|
219
|
+
model_id: str,
|
220
|
+
detailed_analysis: bool = True) -> Dict[str, Any]:
|
221
|
+
"""Perform detailed performance analysis for a specific model"""
|
222
|
+
try:
|
223
|
+
if model_id not in self.evaluation_results:
|
224
|
+
return {'error': f'No evaluation results found for model {model_id}'}
|
225
|
+
|
226
|
+
result = self.evaluation_results[model_id]
|
227
|
+
|
228
|
+
analysis = {
|
229
|
+
'model_id': model_id,
|
230
|
+
'basic_performance': result.evaluation_metrics,
|
231
|
+
'cross_validation': result.cross_validation_results,
|
232
|
+
'recommendations': result.recommendations,
|
233
|
+
'warnings': result.warnings
|
234
|
+
}
|
235
|
+
|
236
|
+
if detailed_analysis:
|
237
|
+
# Add detailed analysis
|
238
|
+
metrics = result.evaluation_metrics
|
239
|
+
|
240
|
+
# Performance categorization
|
241
|
+
performance_category = self._categorize_performance(metrics)
|
242
|
+
analysis['performance_category'] = performance_category
|
243
|
+
|
244
|
+
# Identify potential issues
|
245
|
+
issues = self._identify_performance_issues(metrics, result.cross_validation_results)
|
246
|
+
analysis['potential_issues'] = issues
|
247
|
+
|
248
|
+
# Improvement suggestions
|
249
|
+
improvements = self._suggest_improvements(metrics, performance_category, issues)
|
250
|
+
analysis['improvement_suggestions'] = improvements
|
251
|
+
|
252
|
+
return analysis
|
253
|
+
|
254
|
+
except Exception as e:
|
255
|
+
logger.error(f"Performance analysis failed: {e}")
|
256
|
+
return {'error': str(e)}
|
257
|
+
|
258
|
+
def _validate_evaluation_inputs(self,
|
259
|
+
model_info: Dict[str, Any],
|
260
|
+
test_data: pd.DataFrame,
|
261
|
+
target_column: str) -> Dict[str, Any]:
|
262
|
+
"""Validate evaluation inputs"""
|
263
|
+
errors = []
|
264
|
+
|
265
|
+
# Check test data
|
266
|
+
if test_data.empty:
|
267
|
+
errors.append("Test data is empty")
|
268
|
+
|
269
|
+
# Check target column
|
270
|
+
if target_column not in test_data.columns:
|
271
|
+
errors.append(f"Target column '{target_column}' not found in test data")
|
272
|
+
|
273
|
+
# Check model info
|
274
|
+
if not model_info:
|
275
|
+
errors.append("Model information is required")
|
276
|
+
|
277
|
+
return {
|
278
|
+
'valid': len(errors) == 0,
|
279
|
+
'errors': errors
|
280
|
+
}
|
281
|
+
|
282
|
+
def _calculate_basic_metrics(self,
|
283
|
+
processor,
|
284
|
+
model_instance,
|
285
|
+
X_test: pd.DataFrame,
|
286
|
+
y_test: pd.Series,
|
287
|
+
problem_type: str) -> Dict[str, Any]:
|
288
|
+
"""Calculate basic evaluation metrics"""
|
289
|
+
try:
|
290
|
+
# Try to get predictions from the processor first
|
291
|
+
if hasattr(processor, 'models') and model_instance:
|
292
|
+
# Preprocess test data similar to training
|
293
|
+
X_test_processed = processor._basic_preprocessing(X_test) if hasattr(processor, '_basic_preprocessing') else X_test
|
294
|
+
|
295
|
+
# Make predictions
|
296
|
+
y_pred = model_instance.predict(X_test_processed)
|
297
|
+
|
298
|
+
# Calculate metrics based on problem type
|
299
|
+
if problem_type == 'classification':
|
300
|
+
metrics = {
|
301
|
+
'accuracy': float(accuracy_score(y_test, y_pred)) if SKLEARN_AVAILABLE else 0.0,
|
302
|
+
'precision': float(precision_score(y_test, y_pred, average='weighted', zero_division=0)) if SKLEARN_AVAILABLE else 0.0,
|
303
|
+
'recall': float(recall_score(y_test, y_pred, average='weighted', zero_division=0)) if SKLEARN_AVAILABLE else 0.0,
|
304
|
+
'f1_score': float(f1_score(y_test, y_pred, average='weighted', zero_division=0)) if SKLEARN_AVAILABLE else 0.0
|
305
|
+
}
|
306
|
+
|
307
|
+
# Add ROC AUC for binary classification
|
308
|
+
if len(np.unique(y_test)) == 2 and SKLEARN_AVAILABLE:
|
309
|
+
try:
|
310
|
+
if hasattr(model_instance, 'predict_proba'):
|
311
|
+
y_proba = model_instance.predict_proba(X_test_processed)[:, 1]
|
312
|
+
metrics['roc_auc'] = float(roc_auc_score(y_test, y_proba))
|
313
|
+
else:
|
314
|
+
metrics['roc_auc'] = float(roc_auc_score(y_test, y_pred))
|
315
|
+
except Exception:
|
316
|
+
pass # Skip if not applicable
|
317
|
+
|
318
|
+
# Add classification report
|
319
|
+
if SKLEARN_AVAILABLE:
|
320
|
+
try:
|
321
|
+
metrics['classification_report'] = classification_report(y_test, y_pred, output_dict=True)
|
322
|
+
except Exception:
|
323
|
+
pass
|
324
|
+
|
325
|
+
elif problem_type in ['regression', 'time_series']:
|
326
|
+
metrics = {
|
327
|
+
'r2_score': float(r2_score(y_test, y_pred)) if SKLEARN_AVAILABLE else 0.0,
|
328
|
+
'mean_squared_error': float(mean_squared_error(y_test, y_pred)) if SKLEARN_AVAILABLE else 0.0,
|
329
|
+
'mean_absolute_error': float(mean_absolute_error(y_test, y_pred)) if SKLEARN_AVAILABLE else 0.0,
|
330
|
+
'root_mean_squared_error': float(np.sqrt(mean_squared_error(y_test, y_pred))) if SKLEARN_AVAILABLE else 0.0
|
331
|
+
}
|
332
|
+
|
333
|
+
# Add percentage error metrics
|
334
|
+
if len(y_test) > 0:
|
335
|
+
mape = np.mean(np.abs((y_test - y_pred) / np.where(y_test != 0, y_test, 1))) * 100
|
336
|
+
metrics['mean_absolute_percentage_error'] = float(mape)
|
337
|
+
|
338
|
+
else:
|
339
|
+
metrics = {
|
340
|
+
'error': f'Unsupported problem type: {problem_type}'
|
341
|
+
}
|
342
|
+
|
343
|
+
return {
|
344
|
+
'success': True,
|
345
|
+
'metrics': metrics
|
346
|
+
}
|
347
|
+
else:
|
348
|
+
return {
|
349
|
+
'success': False,
|
350
|
+
'errors': ['Model instance not available for evaluation']
|
351
|
+
}
|
352
|
+
|
353
|
+
except Exception as e:
|
354
|
+
return {
|
355
|
+
'success': False,
|
356
|
+
'errors': [f'Metric calculation failed: {str(e)}']
|
357
|
+
}
|
358
|
+
|
359
|
+
def _perform_cross_validation(self,
|
360
|
+
model_info: Dict[str, Any],
|
361
|
+
data: pd.DataFrame,
|
362
|
+
target_column: str,
|
363
|
+
config: Dict[str, Any]) -> Dict[str, Any]:
|
364
|
+
"""Perform cross-validation analysis"""
|
365
|
+
try:
|
366
|
+
if not SKLEARN_AVAILABLE:
|
367
|
+
return {'error': 'scikit-learn not available for cross-validation'}
|
368
|
+
|
369
|
+
processor = model_info.get('processor')
|
370
|
+
model_instance = model_info.get('model_instance')
|
371
|
+
problem_type = model_info.get('problem_type', 'classification')
|
372
|
+
|
373
|
+
if not (processor and model_instance):
|
374
|
+
return {'error': 'Model or processor not available'}
|
375
|
+
|
376
|
+
# Prepare data
|
377
|
+
X = data.drop(columns=[target_column])
|
378
|
+
y = data[target_column]
|
379
|
+
X_processed = processor._basic_preprocessing(X) if hasattr(processor, '_basic_preprocessing') else X
|
380
|
+
|
381
|
+
cv_folds = config.get('cv_folds', 5)
|
382
|
+
|
383
|
+
# Determine scoring metric
|
384
|
+
if problem_type == 'classification':
|
385
|
+
scoring = 'accuracy' if y.nunique() > 2 else 'roc_auc'
|
386
|
+
else:
|
387
|
+
scoring = 'r2'
|
388
|
+
|
389
|
+
# Perform cross-validation
|
390
|
+
cv_scores = cross_val_score(model_instance, X_processed, y, cv=cv_folds, scoring=scoring)
|
391
|
+
|
392
|
+
cv_results = {
|
393
|
+
'scoring_metric': scoring,
|
394
|
+
'cv_folds': cv_folds,
|
395
|
+
'mean_score': float(cv_scores.mean()),
|
396
|
+
'std_score': float(cv_scores.std()),
|
397
|
+
'individual_scores': cv_scores.tolist(),
|
398
|
+
'score_range': [float(cv_scores.min()), float(cv_scores.max())],
|
399
|
+
'confidence_interval_95': [
|
400
|
+
float(cv_scores.mean() - 1.96 * cv_scores.std()),
|
401
|
+
float(cv_scores.mean() + 1.96 * cv_scores.std())
|
402
|
+
]
|
403
|
+
}
|
404
|
+
|
405
|
+
return cv_results
|
406
|
+
|
407
|
+
except Exception as e:
|
408
|
+
logger.error(f"Cross-validation failed: {e}")
|
409
|
+
return {'error': str(e)}
|
410
|
+
|
411
|
+
def _analyze_validation_curves(self,
|
412
|
+
model_info: Dict[str, Any],
|
413
|
+
data: pd.DataFrame,
|
414
|
+
target_column: str,
|
415
|
+
config: Dict[str, Any]) -> Dict[str, Any]:
|
416
|
+
"""Analyze validation curves for hyperparameter sensitivity"""
|
417
|
+
try:
|
418
|
+
if not SKLEARN_AVAILABLE:
|
419
|
+
return {'error': 'scikit-learn not available for validation curves'}
|
420
|
+
|
421
|
+
# This would be implemented with validation_curve from sklearn
|
422
|
+
# For now, return placeholder
|
423
|
+
return {
|
424
|
+
'validation_curve_analysis': 'Not implemented in current version',
|
425
|
+
'hyperparameter_sensitivity': {},
|
426
|
+
'overfitting_analysis': {}
|
427
|
+
}
|
428
|
+
|
429
|
+
except Exception as e:
|
430
|
+
logger.error(f"Validation curve analysis failed: {e}")
|
431
|
+
return {'error': str(e)}
|
432
|
+
|
433
|
+
def _diagnose_model_performance(self,
|
434
|
+
metrics: Dict[str, Any],
|
435
|
+
problem_type: str,
|
436
|
+
model_info: Dict[str, Any]) -> Dict[str, Any]:
|
437
|
+
"""Diagnose model performance and provide recommendations"""
|
438
|
+
recommendations = []
|
439
|
+
warnings = []
|
440
|
+
|
441
|
+
if problem_type == 'classification':
|
442
|
+
accuracy = metrics.get('accuracy', 0)
|
443
|
+
precision = metrics.get('precision', 0)
|
444
|
+
recall = metrics.get('recall', 0)
|
445
|
+
f1 = metrics.get('f1_score', 0)
|
446
|
+
|
447
|
+
# Performance thresholds
|
448
|
+
if accuracy < 0.6:
|
449
|
+
warnings.append("Low accuracy detected")
|
450
|
+
recommendations.append("Consider feature engineering or different algorithm")
|
451
|
+
|
452
|
+
if precision < 0.5:
|
453
|
+
warnings.append("Low precision - many false positives")
|
454
|
+
recommendations.append("Adjust classification threshold or use precision-focused metrics")
|
455
|
+
|
456
|
+
if recall < 0.5:
|
457
|
+
warnings.append("Low recall - many false negatives")
|
458
|
+
recommendations.append("Consider class balancing techniques or recall-focused optimization")
|
459
|
+
|
460
|
+
if abs(precision - recall) > 0.2:
|
461
|
+
warnings.append("Significant precision-recall imbalance")
|
462
|
+
recommendations.append("Review class distribution and sampling strategy")
|
463
|
+
|
464
|
+
elif problem_type in ['regression', 'time_series']:
|
465
|
+
r2 = metrics.get('r2_score', 0)
|
466
|
+
rmse = metrics.get('root_mean_squared_error', float('inf'))
|
467
|
+
mae = metrics.get('mean_absolute_error', float('inf'))
|
468
|
+
|
469
|
+
if r2 < 0.5:
|
470
|
+
warnings.append("Low R² score - poor variance explanation")
|
471
|
+
recommendations.append("Consider feature engineering or more complex models")
|
472
|
+
|
473
|
+
if r2 < 0:
|
474
|
+
warnings.append("Negative R² - model performs worse than baseline")
|
475
|
+
recommendations.append("Review model and data preprocessing")
|
476
|
+
|
477
|
+
mape = metrics.get('mean_absolute_percentage_error')
|
478
|
+
if mape and mape > 20:
|
479
|
+
warnings.append("High percentage error")
|
480
|
+
recommendations.append("Consider data transformation or outlier handling")
|
481
|
+
|
482
|
+
# General recommendations
|
483
|
+
if not recommendations:
|
484
|
+
recommendations.append("Model performance looks good overall")
|
485
|
+
|
486
|
+
return {
|
487
|
+
'recommendations': recommendations,
|
488
|
+
'warnings': warnings
|
489
|
+
}
|
490
|
+
|
491
|
+
def _rank_models_by_performance(self,
|
492
|
+
model_performances: Dict[str, Any],
|
493
|
+
metrics: List[str]) -> List[Dict[str, Any]]:
|
494
|
+
"""Rank models by performance metrics"""
|
495
|
+
rankings = []
|
496
|
+
|
497
|
+
for model_id, performance in model_performances.items():
|
498
|
+
score = 0
|
499
|
+
metric_count = 0
|
500
|
+
|
501
|
+
model_metrics = performance['metrics']
|
502
|
+
problem_type = performance['problem_type']
|
503
|
+
|
504
|
+
# Calculate composite score
|
505
|
+
if problem_type == 'classification':
|
506
|
+
if 'accuracy' in model_metrics and 'accuracy' in metrics:
|
507
|
+
score += model_metrics['accuracy']
|
508
|
+
metric_count += 1
|
509
|
+
if 'f1_score' in model_metrics and 'f1_score' in metrics:
|
510
|
+
score += model_metrics['f1_score']
|
511
|
+
metric_count += 1
|
512
|
+
|
513
|
+
elif problem_type in ['regression', 'time_series']:
|
514
|
+
if 'r2_score' in model_metrics and 'r2_score' in metrics:
|
515
|
+
score += max(0, model_metrics['r2_score']) # Ensure positive
|
516
|
+
metric_count += 1
|
517
|
+
|
518
|
+
average_score = score / max(metric_count, 1)
|
519
|
+
|
520
|
+
rankings.append({
|
521
|
+
'model_id': model_id,
|
522
|
+
'algorithm': performance['algorithm'],
|
523
|
+
'composite_score': average_score,
|
524
|
+
'key_metrics': {k: v for k, v in model_metrics.items() if k in metrics}
|
525
|
+
})
|
526
|
+
|
527
|
+
# Sort by composite score (descending)
|
528
|
+
rankings.sort(key=lambda x: x['composite_score'], reverse=True)
|
529
|
+
|
530
|
+
return rankings
|
531
|
+
|
532
|
+
def _generate_comparison_summary(self,
|
533
|
+
model_performances: Dict[str, Any],
|
534
|
+
metrics: List[str]) -> Dict[str, Any]:
|
535
|
+
"""Generate summary of model comparison"""
|
536
|
+
summary = {
|
537
|
+
'total_models': len(model_performances),
|
538
|
+
'metric_summary': {},
|
539
|
+
'performance_distribution': {}
|
540
|
+
}
|
541
|
+
|
542
|
+
# Calculate metric statistics across models
|
543
|
+
for metric in metrics:
|
544
|
+
metric_values = []
|
545
|
+
for performance in model_performances.values():
|
546
|
+
if metric in performance['metrics']:
|
547
|
+
metric_values.append(performance['metrics'][metric])
|
548
|
+
|
549
|
+
if metric_values:
|
550
|
+
summary['metric_summary'][metric] = {
|
551
|
+
'mean': float(np.mean(metric_values)),
|
552
|
+
'std': float(np.std(metric_values)),
|
553
|
+
'min': float(np.min(metric_values)),
|
554
|
+
'max': float(np.max(metric_values))
|
555
|
+
}
|
556
|
+
|
557
|
+
return summary
|
558
|
+
|
559
|
+
def _categorize_performance(self, metrics: Dict[str, Any]) -> str:
|
560
|
+
"""Categorize model performance as excellent, good, fair, or poor"""
|
561
|
+
# Implementation would depend on specific thresholds
|
562
|
+
# For now, return placeholder
|
563
|
+
return "good"
|
564
|
+
|
565
|
+
def _identify_performance_issues(self,
|
566
|
+
metrics: Dict[str, Any],
|
567
|
+
cv_results: Dict[str, Any]) -> List[str]:
|
568
|
+
"""Identify potential performance issues"""
|
569
|
+
issues = []
|
570
|
+
|
571
|
+
# Check for overfitting signs
|
572
|
+
if cv_results:
|
573
|
+
cv_std = cv_results.get('std_score', 0)
|
574
|
+
if cv_std > 0.1:
|
575
|
+
issues.append("High variance in cross-validation scores - possible overfitting")
|
576
|
+
|
577
|
+
return issues
|
578
|
+
|
579
|
+
def _suggest_improvements(self,
|
580
|
+
metrics: Dict[str, Any],
|
581
|
+
performance_category: str,
|
582
|
+
issues: List[str]) -> List[str]:
|
583
|
+
"""Suggest specific improvements"""
|
584
|
+
suggestions = []
|
585
|
+
|
586
|
+
if performance_category in ['fair', 'poor']:
|
587
|
+
suggestions.append("Consider hyperparameter tuning")
|
588
|
+
suggestions.append("Try feature engineering")
|
589
|
+
suggestions.append("Experiment with different algorithms")
|
590
|
+
|
591
|
+
if 'overfitting' in ' '.join(issues).lower():
|
592
|
+
suggestions.append("Add regularization")
|
593
|
+
suggestions.append("Reduce model complexity")
|
594
|
+
suggestions.append("Increase training data")
|
595
|
+
|
596
|
+
return suggestions
|
597
|
+
|
598
|
+
def _finalize_evaluation_result(self,
|
599
|
+
result: EvaluationResult,
|
600
|
+
start_time: datetime) -> EvaluationResult:
|
601
|
+
"""Finalize evaluation result with timing and stats"""
|
602
|
+
end_time = datetime.now()
|
603
|
+
duration = (end_time - start_time).total_seconds()
|
604
|
+
|
605
|
+
# Update performance metrics
|
606
|
+
result.performance_metrics['evaluation_duration_seconds'] = duration
|
607
|
+
result.performance_metrics['end_time'] = end_time
|
608
|
+
|
609
|
+
# Update execution stats
|
610
|
+
self.execution_stats['total_evaluation_operations'] += 1
|
611
|
+
if result.success:
|
612
|
+
self.execution_stats['successful_evaluation_operations'] += 1
|
613
|
+
self.execution_stats['models_evaluated'] += 1
|
614
|
+
else:
|
615
|
+
self.execution_stats['failed_evaluation_operations'] += 1
|
616
|
+
|
617
|
+
# Update average duration
|
618
|
+
total = self.execution_stats['total_evaluation_operations']
|
619
|
+
old_avg = self.execution_stats['average_evaluation_time']
|
620
|
+
self.execution_stats['average_evaluation_time'] = (old_avg * (total - 1) + duration) / total
|
621
|
+
|
622
|
+
logger.info(f"Evaluation completed: success={result.success}, duration={duration:.2f}s")
|
623
|
+
return result
|
624
|
+
|
625
|
+
def get_evaluation_result(self, model_id: str) -> Optional[EvaluationResult]:
|
626
|
+
"""Get evaluation result for a specific model"""
|
627
|
+
return self.evaluation_results.get(model_id)
|
628
|
+
|
629
|
+
def get_execution_stats(self) -> Dict[str, Any]:
|
630
|
+
"""Get service execution statistics"""
|
631
|
+
return {
|
632
|
+
**self.execution_stats,
|
633
|
+
'success_rate': (
|
634
|
+
self.execution_stats['successful_evaluation_operations'] /
|
635
|
+
max(1, self.execution_stats['total_evaluation_operations'])
|
636
|
+
)
|
637
|
+
}
|