isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +35 -80
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/types.py +1 -0
  26. isa_model/deployment/__init__.py +5 -48
  27. isa_model/deployment/core/__init__.py +2 -31
  28. isa_model/deployment/core/deployment_manager.py +1278 -370
  29. isa_model/deployment/modal/__init__.py +8 -0
  30. isa_model/deployment/modal/config.py +136 -0
  31. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  32. isa_model/deployment/modal/services/__init__.py +3 -0
  33. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  34. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  35. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  36. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  37. isa_model/deployment/modal/services/video/__init__.py +1 -0
  38. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  39. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  40. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  41. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  42. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  43. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  44. isa_model/deployment/storage/__init__.py +5 -0
  45. isa_model/deployment/storage/deployment_repository.py +824 -0
  46. isa_model/deployment/triton/__init__.py +10 -0
  47. isa_model/deployment/triton/config.py +196 -0
  48. isa_model/deployment/triton/configs/__init__.py +1 -0
  49. isa_model/deployment/triton/provider.py +512 -0
  50. isa_model/deployment/triton/scripts/__init__.py +1 -0
  51. isa_model/deployment/triton/templates/__init__.py +1 -0
  52. isa_model/inference/__init__.py +47 -1
  53. isa_model/inference/ai_factory.py +137 -10
  54. isa_model/inference/legacy_services/__init__.py +21 -0
  55. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  56. isa_model/inference/legacy_services/model_service.py +573 -0
  57. isa_model/inference/legacy_services/model_serving.py +717 -0
  58. isa_model/inference/legacy_services/model_training.py +561 -0
  59. isa_model/inference/models/__init__.py +21 -0
  60. isa_model/inference/models/inference_config.py +551 -0
  61. isa_model/inference/models/inference_record.py +675 -0
  62. isa_model/inference/models/performance_models.py +714 -0
  63. isa_model/inference/repositories/__init__.py +9 -0
  64. isa_model/inference/repositories/inference_repository.py +828 -0
  65. isa_model/inference/services/audio/base_stt_service.py +184 -11
  66. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  67. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  68. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  69. isa_model/inference/services/llm/__init__.py +10 -2
  70. isa_model/inference/services/llm/base_llm_service.py +335 -24
  71. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  72. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  73. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  74. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  75. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  76. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  77. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  78. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  79. isa_model/inference/services/vision/__init__.py +22 -1
  80. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  81. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  82. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  83. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  84. isa_model/serving/api/cache_manager.py +245 -0
  85. isa_model/serving/api/dependencies/__init__.py +1 -0
  86. isa_model/serving/api/dependencies/auth.py +194 -0
  87. isa_model/serving/api/dependencies/database.py +139 -0
  88. isa_model/serving/api/error_handlers.py +284 -0
  89. isa_model/serving/api/fastapi_server.py +172 -22
  90. isa_model/serving/api/middleware/auth.py +8 -2
  91. isa_model/serving/api/middleware/security.py +23 -33
  92. isa_model/serving/api/middleware/tenant_context.py +414 -0
  93. isa_model/serving/api/routes/analytics.py +4 -1
  94. isa_model/serving/api/routes/config.py +645 -0
  95. isa_model/serving/api/routes/deployment_billing.py +315 -0
  96. isa_model/serving/api/routes/deployments.py +138 -2
  97. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  98. isa_model/serving/api/routes/health.py +32 -12
  99. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  100. isa_model/serving/api/routes/local_deployments.py +448 -0
  101. isa_model/serving/api/routes/tenants.py +575 -0
  102. isa_model/serving/api/routes/unified.py +680 -18
  103. isa_model/serving/api/routes/webhooks.py +479 -0
  104. isa_model/serving/api/startup.py +68 -54
  105. isa_model/utils/gpu_utils.py +311 -0
  106. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
  107. isa_model-0.4.4.dist-info/RECORD +180 -0
  108. isa_model/core/security/secrets.py +0 -358
  109. isa_model/core/storage/hf_storage.py +0 -419
  110. isa_model/core/storage/minio_storage.py +0 -0
  111. isa_model/deployment/cloud/__init__.py +0 -9
  112. isa_model/deployment/cloud/modal/__init__.py +0 -10
  113. isa_model/deployment/core/deployment_config.py +0 -356
  114. isa_model/deployment/core/isa_deployment_service.py +0 -401
  115. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  116. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  117. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  118. isa_model/deployment/runtime/deployed_service.py +0 -338
  119. isa_model/deployment/services/__init__.py +0 -9
  120. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  121. isa_model/deployment/services/model_service.py +0 -332
  122. isa_model/deployment/services/service_monitor.py +0 -356
  123. isa_model/deployment/services/service_registry.py +0 -527
  124. isa_model/eval/__init__.py +0 -92
  125. isa_model/eval/benchmarks/__init__.py +0 -27
  126. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  127. isa_model/eval/benchmarks.py +0 -701
  128. isa_model/eval/config/__init__.py +0 -10
  129. isa_model/eval/config/evaluation_config.py +0 -108
  130. isa_model/eval/evaluators/__init__.py +0 -24
  131. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  132. isa_model/eval/evaluators/base_evaluator.py +0 -503
  133. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  134. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  135. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  136. isa_model/eval/example_evaluation.py +0 -395
  137. isa_model/eval/factory.py +0 -798
  138. isa_model/eval/infrastructure/__init__.py +0 -24
  139. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  140. isa_model/eval/isa_benchmarks.py +0 -700
  141. isa_model/eval/isa_integration.py +0 -582
  142. isa_model/eval/metrics.py +0 -951
  143. isa_model/eval/tests/unit/test_basic.py +0 -396
  144. isa_model/serving/api/routes/evaluations.py +0 -579
  145. isa_model/training/__init__.py +0 -168
  146. isa_model/training/annotation/annotation_schema.py +0 -47
  147. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  148. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  149. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  150. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  151. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  152. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  153. isa_model/training/annotation/views/annotation_controller.py +0 -158
  154. isa_model/training/cloud/__init__.py +0 -22
  155. isa_model/training/cloud/job_orchestrator.py +0 -402
  156. isa_model/training/cloud/runpod_trainer.py +0 -454
  157. isa_model/training/cloud/storage_manager.py +0 -482
  158. isa_model/training/core/__init__.py +0 -26
  159. isa_model/training/core/config.py +0 -181
  160. isa_model/training/core/dataset.py +0 -222
  161. isa_model/training/core/trainer.py +0 -720
  162. isa_model/training/core/utils.py +0 -213
  163. isa_model/training/examples/intelligent_training_example.py +0 -281
  164. isa_model/training/factory.py +0 -424
  165. isa_model/training/intelligent/__init__.py +0 -25
  166. isa_model/training/intelligent/decision_engine.py +0 -643
  167. isa_model/training/intelligent/intelligent_factory.py +0 -888
  168. isa_model/training/intelligent/knowledge_base.py +0 -751
  169. isa_model/training/intelligent/resource_optimizer.py +0 -839
  170. isa_model/training/intelligent/task_classifier.py +0 -576
  171. isa_model/training/storage/__init__.py +0 -24
  172. isa_model/training/storage/core_integration.py +0 -439
  173. isa_model/training/storage/training_repository.py +0 -552
  174. isa_model/training/storage/training_storage.py +0 -628
  175. isa_model-0.4.0.dist-info/RECORD +0 -182
  176. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  177. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  178. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  179. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  180. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  181. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  182. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  183. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  184. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  185. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  186. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  187. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  188. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
  189. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,637 @@
1
+ """
2
+ Model Evaluation Service - Step 2 of Model Pipeline
3
+ Handles model evaluation, validation, and performance assessment
4
+ """
5
+
6
+ import pandas as pd
7
+ import numpy as np
8
+ from typing import Dict, List, Any, Optional
9
+ import logging
10
+ from dataclasses import dataclass, field
11
+ from datetime import datetime
12
+
13
+ try:
14
+ from sklearn.model_selection import cross_val_score, validation_curve, learning_curve
15
+ from sklearn.metrics import classification_report, confusion_matrix
16
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
17
+ from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
18
+ SKLEARN_AVAILABLE = True
19
+ except ImportError:
20
+ SKLEARN_AVAILABLE = False
21
+ logging.warning("scikit-learn not available. Evaluation capabilities will be limited.")
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+ @dataclass
26
+ class EvaluationResult:
27
+ """Result of model evaluation step"""
28
+ success: bool
29
+ model_id: str
30
+ evaluation_metrics: Dict[str, Any] = field(default_factory=dict)
31
+ cross_validation_results: Dict[str, Any] = field(default_factory=dict)
32
+ validation_analysis: Dict[str, Any] = field(default_factory=dict)
33
+ performance_comparison: Dict[str, Any] = field(default_factory=dict)
34
+ recommendations: List[str] = field(default_factory=list)
35
+ performance_metrics: Dict[str, Any] = field(default_factory=dict)
36
+ warnings: List[str] = field(default_factory=list)
37
+ errors: List[str] = field(default_factory=list)
38
+
39
+ class ModelEvaluationService:
40
+ """
41
+ Model Evaluation Service - Step 2 of Model Pipeline
42
+
43
+ Handles:
44
+ - Model performance evaluation using various metrics
45
+ - Cross-validation and validation curve analysis
46
+ - Model comparison and benchmarking
47
+ - Performance diagnostics and recommendations
48
+ """
49
+
50
+ def __init__(self):
51
+ self.execution_stats = {
52
+ 'total_evaluation_operations': 0,
53
+ 'successful_evaluation_operations': 0,
54
+ 'failed_evaluation_operations': 0,
55
+ 'models_evaluated': 0,
56
+ 'average_evaluation_time': 0.0
57
+ }
58
+
59
+ # Track evaluation results
60
+ self.evaluation_results = {}
61
+
62
+ logger.info("Model Evaluation Service initialized")
63
+
64
+ def evaluate_model(self,
65
+ model_info: Dict[str, Any],
66
+ test_data: pd.DataFrame,
67
+ target_column: str,
68
+ evaluation_config: Optional[Dict[str, Any]] = None) -> EvaluationResult:
69
+ """
70
+ Evaluate a trained model's performance
71
+
72
+ Args:
73
+ model_info: Information about the trained model
74
+ test_data: Test dataset for evaluation
75
+ target_column: Target variable column name
76
+ evaluation_config: Configuration for evaluation
77
+
78
+ Returns:
79
+ EvaluationResult with comprehensive evaluation metrics
80
+ """
81
+ start_time = datetime.now()
82
+ evaluation_config = evaluation_config or {}
83
+
84
+ try:
85
+ model_id = model_info.get('model_id', 'unknown')
86
+ logger.info(f"Starting model evaluation for: {model_id}")
87
+
88
+ # Initialize result
89
+ result = EvaluationResult(
90
+ success=False,
91
+ model_id=model_id
92
+ )
93
+
94
+ # Validate inputs
95
+ validation_result = self._validate_evaluation_inputs(model_info, test_data, target_column)
96
+ if not validation_result['valid']:
97
+ result.errors.extend(validation_result['errors'])
98
+ return self._finalize_evaluation_result(result, start_time)
99
+
100
+ # Extract model and processor
101
+ processor = model_info.get('processor')
102
+ model_instance = model_info.get('model_instance')
103
+ problem_type = model_info.get('problem_type', 'classification')
104
+
105
+ if not processor:
106
+ result.errors.append("Model processor not available")
107
+ return self._finalize_evaluation_result(result, start_time)
108
+
109
+ # Prepare test data
110
+ X_test = test_data.drop(columns=[target_column])
111
+ y_test = test_data[target_column]
112
+
113
+ # Basic evaluation metrics
114
+ basic_metrics = self._calculate_basic_metrics(
115
+ processor, model_instance, X_test, y_test, problem_type
116
+ )
117
+
118
+ if not basic_metrics['success']:
119
+ result.errors.extend(basic_metrics['errors'])
120
+ return self._finalize_evaluation_result(result, start_time)
121
+
122
+ result.evaluation_metrics = basic_metrics['metrics']
123
+
124
+ # Cross-validation analysis
125
+ if evaluation_config.get('perform_cv', True) and SKLEARN_AVAILABLE:
126
+ cv_results = self._perform_cross_validation(
127
+ model_info, test_data, target_column, evaluation_config
128
+ )
129
+ result.cross_validation_results = cv_results
130
+
131
+ # Validation curve analysis
132
+ if evaluation_config.get('validation_curves', False) and SKLEARN_AVAILABLE:
133
+ validation_analysis = self._analyze_validation_curves(
134
+ model_info, test_data, target_column, evaluation_config
135
+ )
136
+ result.validation_analysis = validation_analysis
137
+
138
+ # Performance diagnostics
139
+ diagnostics = self._diagnose_model_performance(
140
+ result.evaluation_metrics, problem_type, model_info
141
+ )
142
+ result.recommendations = diagnostics['recommendations']
143
+ result.warnings.extend(diagnostics['warnings'])
144
+
145
+ # Success
146
+ result.success = True
147
+ self.evaluation_results[model_id] = result
148
+
149
+ return self._finalize_evaluation_result(result, start_time)
150
+
151
+ except Exception as e:
152
+ logger.error(f"Model evaluation failed: {e}")
153
+ result.errors.append(f"Evaluation error: {str(e)}")
154
+ return self._finalize_evaluation_result(result, start_time)
155
+
156
+ def compare_models(self,
157
+ model_infos: List[Dict[str, Any]],
158
+ test_data: pd.DataFrame,
159
+ target_column: str,
160
+ comparison_metrics: Optional[List[str]] = None) -> Dict[str, Any]:
161
+ """Compare multiple models on the same test dataset"""
162
+ try:
163
+ comparison_metrics = comparison_metrics or ['accuracy', 'f1_score', 'r2_score']
164
+ comparison_results = {
165
+ 'model_comparison': {},
166
+ 'ranking': {},
167
+ 'best_model': None,
168
+ 'comparison_summary': {}
169
+ }
170
+
171
+ model_performances = {}
172
+
173
+ for model_info in model_infos:
174
+ model_id = model_info.get('model_id', 'unknown')
175
+
176
+ try:
177
+ evaluation_result = self.evaluate_model(
178
+ model_info, test_data, target_column, {'perform_cv': False}
179
+ )
180
+
181
+ if evaluation_result.success:
182
+ model_performances[model_id] = {
183
+ 'metrics': evaluation_result.evaluation_metrics,
184
+ 'algorithm': model_info.get('training_config', {}).algorithm,
185
+ 'problem_type': model_info.get('problem_type')
186
+ }
187
+ comparison_results['model_comparison'][model_id] = evaluation_result.evaluation_metrics
188
+ else:
189
+ logger.warning(f"Evaluation failed for model {model_id}")
190
+
191
+ except Exception as e:
192
+ logger.error(f"Error evaluating model {model_id}: {e}")
193
+
194
+ # Rank models by performance
195
+ if model_performances:
196
+ rankings = self._rank_models_by_performance(model_performances, comparison_metrics)
197
+ comparison_results['ranking'] = rankings
198
+
199
+ if rankings:
200
+ best_model_id = rankings[0]['model_id']
201
+ comparison_results['best_model'] = {
202
+ 'model_id': best_model_id,
203
+ 'metrics': model_performances[best_model_id]['metrics'],
204
+ 'algorithm': model_performances[best_model_id]['algorithm']
205
+ }
206
+
207
+ # Generate comparison summary
208
+ comparison_results['comparison_summary'] = self._generate_comparison_summary(
209
+ model_performances, comparison_metrics
210
+ )
211
+
212
+ return comparison_results
213
+
214
+ except Exception as e:
215
+ logger.error(f"Model comparison failed: {e}")
216
+ return {'error': str(e)}
217
+
218
+ def analyze_model_performance(self,
219
+ model_id: str,
220
+ detailed_analysis: bool = True) -> Dict[str, Any]:
221
+ """Perform detailed performance analysis for a specific model"""
222
+ try:
223
+ if model_id not in self.evaluation_results:
224
+ return {'error': f'No evaluation results found for model {model_id}'}
225
+
226
+ result = self.evaluation_results[model_id]
227
+
228
+ analysis = {
229
+ 'model_id': model_id,
230
+ 'basic_performance': result.evaluation_metrics,
231
+ 'cross_validation': result.cross_validation_results,
232
+ 'recommendations': result.recommendations,
233
+ 'warnings': result.warnings
234
+ }
235
+
236
+ if detailed_analysis:
237
+ # Add detailed analysis
238
+ metrics = result.evaluation_metrics
239
+
240
+ # Performance categorization
241
+ performance_category = self._categorize_performance(metrics)
242
+ analysis['performance_category'] = performance_category
243
+
244
+ # Identify potential issues
245
+ issues = self._identify_performance_issues(metrics, result.cross_validation_results)
246
+ analysis['potential_issues'] = issues
247
+
248
+ # Improvement suggestions
249
+ improvements = self._suggest_improvements(metrics, performance_category, issues)
250
+ analysis['improvement_suggestions'] = improvements
251
+
252
+ return analysis
253
+
254
+ except Exception as e:
255
+ logger.error(f"Performance analysis failed: {e}")
256
+ return {'error': str(e)}
257
+
258
+ def _validate_evaluation_inputs(self,
259
+ model_info: Dict[str, Any],
260
+ test_data: pd.DataFrame,
261
+ target_column: str) -> Dict[str, Any]:
262
+ """Validate evaluation inputs"""
263
+ errors = []
264
+
265
+ # Check test data
266
+ if test_data.empty:
267
+ errors.append("Test data is empty")
268
+
269
+ # Check target column
270
+ if target_column not in test_data.columns:
271
+ errors.append(f"Target column '{target_column}' not found in test data")
272
+
273
+ # Check model info
274
+ if not model_info:
275
+ errors.append("Model information is required")
276
+
277
+ return {
278
+ 'valid': len(errors) == 0,
279
+ 'errors': errors
280
+ }
281
+
282
+ def _calculate_basic_metrics(self,
283
+ processor,
284
+ model_instance,
285
+ X_test: pd.DataFrame,
286
+ y_test: pd.Series,
287
+ problem_type: str) -> Dict[str, Any]:
288
+ """Calculate basic evaluation metrics"""
289
+ try:
290
+ # Try to get predictions from the processor first
291
+ if hasattr(processor, 'models') and model_instance:
292
+ # Preprocess test data similar to training
293
+ X_test_processed = processor._basic_preprocessing(X_test) if hasattr(processor, '_basic_preprocessing') else X_test
294
+
295
+ # Make predictions
296
+ y_pred = model_instance.predict(X_test_processed)
297
+
298
+ # Calculate metrics based on problem type
299
+ if problem_type == 'classification':
300
+ metrics = {
301
+ 'accuracy': float(accuracy_score(y_test, y_pred)) if SKLEARN_AVAILABLE else 0.0,
302
+ 'precision': float(precision_score(y_test, y_pred, average='weighted', zero_division=0)) if SKLEARN_AVAILABLE else 0.0,
303
+ 'recall': float(recall_score(y_test, y_pred, average='weighted', zero_division=0)) if SKLEARN_AVAILABLE else 0.0,
304
+ 'f1_score': float(f1_score(y_test, y_pred, average='weighted', zero_division=0)) if SKLEARN_AVAILABLE else 0.0
305
+ }
306
+
307
+ # Add ROC AUC for binary classification
308
+ if len(np.unique(y_test)) == 2 and SKLEARN_AVAILABLE:
309
+ try:
310
+ if hasattr(model_instance, 'predict_proba'):
311
+ y_proba = model_instance.predict_proba(X_test_processed)[:, 1]
312
+ metrics['roc_auc'] = float(roc_auc_score(y_test, y_proba))
313
+ else:
314
+ metrics['roc_auc'] = float(roc_auc_score(y_test, y_pred))
315
+ except Exception:
316
+ pass # Skip if not applicable
317
+
318
+ # Add classification report
319
+ if SKLEARN_AVAILABLE:
320
+ try:
321
+ metrics['classification_report'] = classification_report(y_test, y_pred, output_dict=True)
322
+ except Exception:
323
+ pass
324
+
325
+ elif problem_type in ['regression', 'time_series']:
326
+ metrics = {
327
+ 'r2_score': float(r2_score(y_test, y_pred)) if SKLEARN_AVAILABLE else 0.0,
328
+ 'mean_squared_error': float(mean_squared_error(y_test, y_pred)) if SKLEARN_AVAILABLE else 0.0,
329
+ 'mean_absolute_error': float(mean_absolute_error(y_test, y_pred)) if SKLEARN_AVAILABLE else 0.0,
330
+ 'root_mean_squared_error': float(np.sqrt(mean_squared_error(y_test, y_pred))) if SKLEARN_AVAILABLE else 0.0
331
+ }
332
+
333
+ # Add percentage error metrics
334
+ if len(y_test) > 0:
335
+ mape = np.mean(np.abs((y_test - y_pred) / np.where(y_test != 0, y_test, 1))) * 100
336
+ metrics['mean_absolute_percentage_error'] = float(mape)
337
+
338
+ else:
339
+ metrics = {
340
+ 'error': f'Unsupported problem type: {problem_type}'
341
+ }
342
+
343
+ return {
344
+ 'success': True,
345
+ 'metrics': metrics
346
+ }
347
+ else:
348
+ return {
349
+ 'success': False,
350
+ 'errors': ['Model instance not available for evaluation']
351
+ }
352
+
353
+ except Exception as e:
354
+ return {
355
+ 'success': False,
356
+ 'errors': [f'Metric calculation failed: {str(e)}']
357
+ }
358
+
359
+ def _perform_cross_validation(self,
360
+ model_info: Dict[str, Any],
361
+ data: pd.DataFrame,
362
+ target_column: str,
363
+ config: Dict[str, Any]) -> Dict[str, Any]:
364
+ """Perform cross-validation analysis"""
365
+ try:
366
+ if not SKLEARN_AVAILABLE:
367
+ return {'error': 'scikit-learn not available for cross-validation'}
368
+
369
+ processor = model_info.get('processor')
370
+ model_instance = model_info.get('model_instance')
371
+ problem_type = model_info.get('problem_type', 'classification')
372
+
373
+ if not (processor and model_instance):
374
+ return {'error': 'Model or processor not available'}
375
+
376
+ # Prepare data
377
+ X = data.drop(columns=[target_column])
378
+ y = data[target_column]
379
+ X_processed = processor._basic_preprocessing(X) if hasattr(processor, '_basic_preprocessing') else X
380
+
381
+ cv_folds = config.get('cv_folds', 5)
382
+
383
+ # Determine scoring metric
384
+ if problem_type == 'classification':
385
+ scoring = 'accuracy' if y.nunique() > 2 else 'roc_auc'
386
+ else:
387
+ scoring = 'r2'
388
+
389
+ # Perform cross-validation
390
+ cv_scores = cross_val_score(model_instance, X_processed, y, cv=cv_folds, scoring=scoring)
391
+
392
+ cv_results = {
393
+ 'scoring_metric': scoring,
394
+ 'cv_folds': cv_folds,
395
+ 'mean_score': float(cv_scores.mean()),
396
+ 'std_score': float(cv_scores.std()),
397
+ 'individual_scores': cv_scores.tolist(),
398
+ 'score_range': [float(cv_scores.min()), float(cv_scores.max())],
399
+ 'confidence_interval_95': [
400
+ float(cv_scores.mean() - 1.96 * cv_scores.std()),
401
+ float(cv_scores.mean() + 1.96 * cv_scores.std())
402
+ ]
403
+ }
404
+
405
+ return cv_results
406
+
407
+ except Exception as e:
408
+ logger.error(f"Cross-validation failed: {e}")
409
+ return {'error': str(e)}
410
+
411
+ def _analyze_validation_curves(self,
412
+ model_info: Dict[str, Any],
413
+ data: pd.DataFrame,
414
+ target_column: str,
415
+ config: Dict[str, Any]) -> Dict[str, Any]:
416
+ """Analyze validation curves for hyperparameter sensitivity"""
417
+ try:
418
+ if not SKLEARN_AVAILABLE:
419
+ return {'error': 'scikit-learn not available for validation curves'}
420
+
421
+ # This would be implemented with validation_curve from sklearn
422
+ # For now, return placeholder
423
+ return {
424
+ 'validation_curve_analysis': 'Not implemented in current version',
425
+ 'hyperparameter_sensitivity': {},
426
+ 'overfitting_analysis': {}
427
+ }
428
+
429
+ except Exception as e:
430
+ logger.error(f"Validation curve analysis failed: {e}")
431
+ return {'error': str(e)}
432
+
433
+ def _diagnose_model_performance(self,
434
+ metrics: Dict[str, Any],
435
+ problem_type: str,
436
+ model_info: Dict[str, Any]) -> Dict[str, Any]:
437
+ """Diagnose model performance and provide recommendations"""
438
+ recommendations = []
439
+ warnings = []
440
+
441
+ if problem_type == 'classification':
442
+ accuracy = metrics.get('accuracy', 0)
443
+ precision = metrics.get('precision', 0)
444
+ recall = metrics.get('recall', 0)
445
+ f1 = metrics.get('f1_score', 0)
446
+
447
+ # Performance thresholds
448
+ if accuracy < 0.6:
449
+ warnings.append("Low accuracy detected")
450
+ recommendations.append("Consider feature engineering or different algorithm")
451
+
452
+ if precision < 0.5:
453
+ warnings.append("Low precision - many false positives")
454
+ recommendations.append("Adjust classification threshold or use precision-focused metrics")
455
+
456
+ if recall < 0.5:
457
+ warnings.append("Low recall - many false negatives")
458
+ recommendations.append("Consider class balancing techniques or recall-focused optimization")
459
+
460
+ if abs(precision - recall) > 0.2:
461
+ warnings.append("Significant precision-recall imbalance")
462
+ recommendations.append("Review class distribution and sampling strategy")
463
+
464
+ elif problem_type in ['regression', 'time_series']:
465
+ r2 = metrics.get('r2_score', 0)
466
+ rmse = metrics.get('root_mean_squared_error', float('inf'))
467
+ mae = metrics.get('mean_absolute_error', float('inf'))
468
+
469
+ if r2 < 0.5:
470
+ warnings.append("Low R² score - poor variance explanation")
471
+ recommendations.append("Consider feature engineering or more complex models")
472
+
473
+ if r2 < 0:
474
+ warnings.append("Negative R² - model performs worse than baseline")
475
+ recommendations.append("Review model and data preprocessing")
476
+
477
+ mape = metrics.get('mean_absolute_percentage_error')
478
+ if mape and mape > 20:
479
+ warnings.append("High percentage error")
480
+ recommendations.append("Consider data transformation or outlier handling")
481
+
482
+ # General recommendations
483
+ if not recommendations:
484
+ recommendations.append("Model performance looks good overall")
485
+
486
+ return {
487
+ 'recommendations': recommendations,
488
+ 'warnings': warnings
489
+ }
490
+
491
+ def _rank_models_by_performance(self,
492
+ model_performances: Dict[str, Any],
493
+ metrics: List[str]) -> List[Dict[str, Any]]:
494
+ """Rank models by performance metrics"""
495
+ rankings = []
496
+
497
+ for model_id, performance in model_performances.items():
498
+ score = 0
499
+ metric_count = 0
500
+
501
+ model_metrics = performance['metrics']
502
+ problem_type = performance['problem_type']
503
+
504
+ # Calculate composite score
505
+ if problem_type == 'classification':
506
+ if 'accuracy' in model_metrics and 'accuracy' in metrics:
507
+ score += model_metrics['accuracy']
508
+ metric_count += 1
509
+ if 'f1_score' in model_metrics and 'f1_score' in metrics:
510
+ score += model_metrics['f1_score']
511
+ metric_count += 1
512
+
513
+ elif problem_type in ['regression', 'time_series']:
514
+ if 'r2_score' in model_metrics and 'r2_score' in metrics:
515
+ score += max(0, model_metrics['r2_score']) # Ensure positive
516
+ metric_count += 1
517
+
518
+ average_score = score / max(metric_count, 1)
519
+
520
+ rankings.append({
521
+ 'model_id': model_id,
522
+ 'algorithm': performance['algorithm'],
523
+ 'composite_score': average_score,
524
+ 'key_metrics': {k: v for k, v in model_metrics.items() if k in metrics}
525
+ })
526
+
527
+ # Sort by composite score (descending)
528
+ rankings.sort(key=lambda x: x['composite_score'], reverse=True)
529
+
530
+ return rankings
531
+
532
+ def _generate_comparison_summary(self,
533
+ model_performances: Dict[str, Any],
534
+ metrics: List[str]) -> Dict[str, Any]:
535
+ """Generate summary of model comparison"""
536
+ summary = {
537
+ 'total_models': len(model_performances),
538
+ 'metric_summary': {},
539
+ 'performance_distribution': {}
540
+ }
541
+
542
+ # Calculate metric statistics across models
543
+ for metric in metrics:
544
+ metric_values = []
545
+ for performance in model_performances.values():
546
+ if metric in performance['metrics']:
547
+ metric_values.append(performance['metrics'][metric])
548
+
549
+ if metric_values:
550
+ summary['metric_summary'][metric] = {
551
+ 'mean': float(np.mean(metric_values)),
552
+ 'std': float(np.std(metric_values)),
553
+ 'min': float(np.min(metric_values)),
554
+ 'max': float(np.max(metric_values))
555
+ }
556
+
557
+ return summary
558
+
559
+ def _categorize_performance(self, metrics: Dict[str, Any]) -> str:
560
+ """Categorize model performance as excellent, good, fair, or poor"""
561
+ # Implementation would depend on specific thresholds
562
+ # For now, return placeholder
563
+ return "good"
564
+
565
+ def _identify_performance_issues(self,
566
+ metrics: Dict[str, Any],
567
+ cv_results: Dict[str, Any]) -> List[str]:
568
+ """Identify potential performance issues"""
569
+ issues = []
570
+
571
+ # Check for overfitting signs
572
+ if cv_results:
573
+ cv_std = cv_results.get('std_score', 0)
574
+ if cv_std > 0.1:
575
+ issues.append("High variance in cross-validation scores - possible overfitting")
576
+
577
+ return issues
578
+
579
+ def _suggest_improvements(self,
580
+ metrics: Dict[str, Any],
581
+ performance_category: str,
582
+ issues: List[str]) -> List[str]:
583
+ """Suggest specific improvements"""
584
+ suggestions = []
585
+
586
+ if performance_category in ['fair', 'poor']:
587
+ suggestions.append("Consider hyperparameter tuning")
588
+ suggestions.append("Try feature engineering")
589
+ suggestions.append("Experiment with different algorithms")
590
+
591
+ if 'overfitting' in ' '.join(issues).lower():
592
+ suggestions.append("Add regularization")
593
+ suggestions.append("Reduce model complexity")
594
+ suggestions.append("Increase training data")
595
+
596
+ return suggestions
597
+
598
+ def _finalize_evaluation_result(self,
599
+ result: EvaluationResult,
600
+ start_time: datetime) -> EvaluationResult:
601
+ """Finalize evaluation result with timing and stats"""
602
+ end_time = datetime.now()
603
+ duration = (end_time - start_time).total_seconds()
604
+
605
+ # Update performance metrics
606
+ result.performance_metrics['evaluation_duration_seconds'] = duration
607
+ result.performance_metrics['end_time'] = end_time
608
+
609
+ # Update execution stats
610
+ self.execution_stats['total_evaluation_operations'] += 1
611
+ if result.success:
612
+ self.execution_stats['successful_evaluation_operations'] += 1
613
+ self.execution_stats['models_evaluated'] += 1
614
+ else:
615
+ self.execution_stats['failed_evaluation_operations'] += 1
616
+
617
+ # Update average duration
618
+ total = self.execution_stats['total_evaluation_operations']
619
+ old_avg = self.execution_stats['average_evaluation_time']
620
+ self.execution_stats['average_evaluation_time'] = (old_avg * (total - 1) + duration) / total
621
+
622
+ logger.info(f"Evaluation completed: success={result.success}, duration={duration:.2f}s")
623
+ return result
624
+
625
+ def get_evaluation_result(self, model_id: str) -> Optional[EvaluationResult]:
626
+ """Get evaluation result for a specific model"""
627
+ return self.evaluation_results.get(model_id)
628
+
629
+ def get_execution_stats(self) -> Dict[str, Any]:
630
+ """Get service execution statistics"""
631
+ return {
632
+ **self.execution_stats,
633
+ 'success_rate': (
634
+ self.execution_stats['successful_evaluation_operations'] /
635
+ max(1, self.execution_stats['total_evaluation_operations'])
636
+ )
637
+ }