isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. isa_model/client.py +1166 -584
  2. isa_model/core/cache/redis_cache.py +410 -0
  3. isa_model/core/config/config_manager.py +282 -12
  4. isa_model/core/config.py +91 -1
  5. isa_model/core/database/__init__.py +1 -0
  6. isa_model/core/database/direct_db_client.py +114 -0
  7. isa_model/core/database/migration_manager.py +563 -0
  8. isa_model/core/database/migrations.py +297 -0
  9. isa_model/core/database/supabase_client.py +258 -0
  10. isa_model/core/dependencies.py +316 -0
  11. isa_model/core/discovery/__init__.py +19 -0
  12. isa_model/core/discovery/consul_discovery.py +190 -0
  13. isa_model/core/logging/__init__.py +54 -0
  14. isa_model/core/logging/influx_logger.py +523 -0
  15. isa_model/core/logging/loki_logger.py +160 -0
  16. isa_model/core/models/__init__.py +46 -0
  17. isa_model/core/models/config_models.py +625 -0
  18. isa_model/core/models/deployment_billing_tracker.py +430 -0
  19. isa_model/core/models/model_billing_tracker.py +60 -88
  20. isa_model/core/models/model_manager.py +66 -25
  21. isa_model/core/models/model_metadata.py +690 -0
  22. isa_model/core/models/model_repo.py +217 -55
  23. isa_model/core/models/model_statistics_tracker.py +234 -0
  24. isa_model/core/models/model_storage.py +0 -1
  25. isa_model/core/models/model_version_manager.py +959 -0
  26. isa_model/core/models/system_models.py +857 -0
  27. isa_model/core/pricing_manager.py +2 -249
  28. isa_model/core/repositories/__init__.py +9 -0
  29. isa_model/core/repositories/config_repository.py +912 -0
  30. isa_model/core/resilience/circuit_breaker.py +366 -0
  31. isa_model/core/security/secrets.py +358 -0
  32. isa_model/core/services/__init__.py +2 -4
  33. isa_model/core/services/intelligent_model_selector.py +479 -370
  34. isa_model/core/storage/hf_storage.py +2 -2
  35. isa_model/core/types.py +8 -0
  36. isa_model/deployment/__init__.py +5 -48
  37. isa_model/deployment/core/__init__.py +2 -31
  38. isa_model/deployment/core/deployment_manager.py +1278 -368
  39. isa_model/deployment/local/__init__.py +31 -0
  40. isa_model/deployment/local/config.py +248 -0
  41. isa_model/deployment/local/gpu_gateway.py +607 -0
  42. isa_model/deployment/local/health_checker.py +428 -0
  43. isa_model/deployment/local/provider.py +586 -0
  44. isa_model/deployment/local/tensorrt_service.py +621 -0
  45. isa_model/deployment/local/transformers_service.py +644 -0
  46. isa_model/deployment/local/vllm_service.py +527 -0
  47. isa_model/deployment/modal/__init__.py +8 -0
  48. isa_model/deployment/modal/config.py +136 -0
  49. isa_model/deployment/modal/deployer.py +894 -0
  50. isa_model/deployment/modal/services/__init__.py +3 -0
  51. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  52. isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
  53. isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
  54. isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
  55. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  56. isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
  57. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  58. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  59. isa_model/deployment/modal/services/video/__init__.py +1 -0
  60. isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
  61. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  62. isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
  63. isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
  64. isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
  65. isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
  66. isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
  67. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  68. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  69. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  70. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  71. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  72. isa_model/deployment/storage/__init__.py +5 -0
  73. isa_model/deployment/storage/deployment_repository.py +824 -0
  74. isa_model/deployment/triton/__init__.py +10 -0
  75. isa_model/deployment/triton/config.py +196 -0
  76. isa_model/deployment/triton/configs/__init__.py +1 -0
  77. isa_model/deployment/triton/provider.py +512 -0
  78. isa_model/deployment/triton/scripts/__init__.py +1 -0
  79. isa_model/deployment/triton/templates/__init__.py +1 -0
  80. isa_model/inference/__init__.py +47 -1
  81. isa_model/inference/ai_factory.py +179 -16
  82. isa_model/inference/legacy_services/__init__.py +21 -0
  83. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  84. isa_model/inference/legacy_services/model_service.py +573 -0
  85. isa_model/inference/legacy_services/model_serving.py +717 -0
  86. isa_model/inference/legacy_services/model_training.py +561 -0
  87. isa_model/inference/models/__init__.py +21 -0
  88. isa_model/inference/models/inference_config.py +551 -0
  89. isa_model/inference/models/inference_record.py +675 -0
  90. isa_model/inference/models/performance_models.py +714 -0
  91. isa_model/inference/repositories/__init__.py +9 -0
  92. isa_model/inference/repositories/inference_repository.py +828 -0
  93. isa_model/inference/services/audio/__init__.py +21 -0
  94. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  95. isa_model/inference/services/audio/base_stt_service.py +184 -11
  96. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  97. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  98. isa_model/inference/services/audio/openai_stt_service.py +53 -11
  99. isa_model/inference/services/base_service.py +17 -1
  100. isa_model/inference/services/custom_model_manager.py +277 -0
  101. isa_model/inference/services/embedding/__init__.py +13 -0
  102. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  103. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  104. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  105. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  106. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  107. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  108. isa_model/inference/services/img/__init__.py +2 -2
  109. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  110. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  111. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  112. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  113. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  114. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  115. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  116. isa_model/inference/services/llm/__init__.py +10 -2
  117. isa_model/inference/services/llm/base_llm_service.py +361 -26
  118. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  119. isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
  120. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  121. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  122. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  123. isa_model/inference/services/llm/local_llm_service.py +747 -0
  124. isa_model/inference/services/llm/ollama_llm_service.py +11 -3
  125. isa_model/inference/services/llm/openai_llm_service.py +670 -56
  126. isa_model/inference/services/llm/yyds_llm_service.py +10 -3
  127. isa_model/inference/services/vision/__init__.py +27 -6
  128. isa_model/inference/services/vision/base_vision_service.py +118 -185
  129. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  130. isa_model/inference/services/vision/helpers/image_utils.py +19 -10
  131. isa_model/inference/services/vision/isa_vision_service.py +634 -0
  132. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  133. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  134. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  135. isa_model/serving/api/cache_manager.py +245 -0
  136. isa_model/serving/api/dependencies/__init__.py +1 -0
  137. isa_model/serving/api/dependencies/auth.py +194 -0
  138. isa_model/serving/api/dependencies/database.py +139 -0
  139. isa_model/serving/api/error_handlers.py +284 -0
  140. isa_model/serving/api/fastapi_server.py +240 -18
  141. isa_model/serving/api/middleware/auth.py +317 -0
  142. isa_model/serving/api/middleware/security.py +268 -0
  143. isa_model/serving/api/middleware/tenant_context.py +414 -0
  144. isa_model/serving/api/routes/analytics.py +489 -0
  145. isa_model/serving/api/routes/config.py +645 -0
  146. isa_model/serving/api/routes/deployment_billing.py +315 -0
  147. isa_model/serving/api/routes/deployments.py +475 -0
  148. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  149. isa_model/serving/api/routes/health.py +32 -12
  150. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  151. isa_model/serving/api/routes/local_deployments.py +448 -0
  152. isa_model/serving/api/routes/logs.py +430 -0
  153. isa_model/serving/api/routes/settings.py +582 -0
  154. isa_model/serving/api/routes/tenants.py +575 -0
  155. isa_model/serving/api/routes/unified.py +992 -171
  156. isa_model/serving/api/routes/webhooks.py +479 -0
  157. isa_model/serving/api/startup.py +318 -0
  158. isa_model/serving/modal_proxy_server.py +249 -0
  159. isa_model/utils/gpu_utils.py +311 -0
  160. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
  161. isa_model-0.4.3.dist-info/RECORD +193 -0
  162. isa_model/deployment/cloud/__init__.py +0 -9
  163. isa_model/deployment/cloud/modal/__init__.py +0 -10
  164. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  165. isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
  166. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
  167. isa_model/deployment/cloud/modal/register_models.py +0 -321
  168. isa_model/deployment/core/deployment_config.py +0 -356
  169. isa_model/deployment/core/isa_deployment_service.py +0 -401
  170. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  171. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  172. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  173. isa_model/deployment/runtime/deployed_service.py +0 -338
  174. isa_model/deployment/services/__init__.py +0 -9
  175. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  176. isa_model/deployment/services/model_service.py +0 -332
  177. isa_model/deployment/services/service_monitor.py +0 -356
  178. isa_model/deployment/services/service_registry.py +0 -527
  179. isa_model/eval/__init__.py +0 -92
  180. isa_model/eval/benchmarks.py +0 -469
  181. isa_model/eval/config/__init__.py +0 -10
  182. isa_model/eval/config/evaluation_config.py +0 -108
  183. isa_model/eval/evaluators/__init__.py +0 -18
  184. isa_model/eval/evaluators/base_evaluator.py +0 -503
  185. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  186. isa_model/eval/factory.py +0 -531
  187. isa_model/eval/infrastructure/__init__.py +0 -24
  188. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  189. isa_model/eval/metrics.py +0 -798
  190. isa_model/inference/adapter/unified_api.py +0 -248
  191. isa_model/inference/services/helpers/stacked_config.py +0 -148
  192. isa_model/inference/services/img/flux_professional_service.py +0 -603
  193. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  194. isa_model/inference/services/others/table_transformer_service.py +0 -61
  195. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  196. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  197. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  198. isa_model/scripts/inference_tracker.py +0 -283
  199. isa_model/scripts/mlflow_manager.py +0 -379
  200. isa_model/scripts/model_registry.py +0 -465
  201. isa_model/scripts/register_models.py +0 -370
  202. isa_model/scripts/register_models_with_embeddings.py +0 -510
  203. isa_model/scripts/start_mlflow.py +0 -95
  204. isa_model/scripts/training_tracker.py +0 -257
  205. isa_model/training/__init__.py +0 -74
  206. isa_model/training/annotation/annotation_schema.py +0 -47
  207. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  208. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  209. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  210. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  211. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  212. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  213. isa_model/training/annotation/views/annotation_controller.py +0 -158
  214. isa_model/training/cloud/__init__.py +0 -22
  215. isa_model/training/cloud/job_orchestrator.py +0 -402
  216. isa_model/training/cloud/runpod_trainer.py +0 -454
  217. isa_model/training/cloud/storage_manager.py +0 -482
  218. isa_model/training/core/__init__.py +0 -23
  219. isa_model/training/core/config.py +0 -181
  220. isa_model/training/core/dataset.py +0 -222
  221. isa_model/training/core/trainer.py +0 -720
  222. isa_model/training/core/utils.py +0 -213
  223. isa_model/training/factory.py +0 -424
  224. isa_model-0.3.91.dist-info/RECORD +0 -138
  225. /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
  226. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  227. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  228. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,637 @@
1
+ """
2
+ Model Evaluation Service - Step 2 of Model Pipeline
3
+ Handles model evaluation, validation, and performance assessment
4
+ """
5
+
6
+ import pandas as pd
7
+ import numpy as np
8
+ from typing import Dict, List, Any, Optional
9
+ import logging
10
+ from dataclasses import dataclass, field
11
+ from datetime import datetime
12
+
13
+ try:
14
+ from sklearn.model_selection import cross_val_score, validation_curve, learning_curve
15
+ from sklearn.metrics import classification_report, confusion_matrix
16
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
17
+ from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
18
+ SKLEARN_AVAILABLE = True
19
+ except ImportError:
20
+ SKLEARN_AVAILABLE = False
21
+ logging.warning("scikit-learn not available. Evaluation capabilities will be limited.")
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+ @dataclass
26
+ class EvaluationResult:
27
+ """Result of model evaluation step"""
28
+ success: bool
29
+ model_id: str
30
+ evaluation_metrics: Dict[str, Any] = field(default_factory=dict)
31
+ cross_validation_results: Dict[str, Any] = field(default_factory=dict)
32
+ validation_analysis: Dict[str, Any] = field(default_factory=dict)
33
+ performance_comparison: Dict[str, Any] = field(default_factory=dict)
34
+ recommendations: List[str] = field(default_factory=list)
35
+ performance_metrics: Dict[str, Any] = field(default_factory=dict)
36
+ warnings: List[str] = field(default_factory=list)
37
+ errors: List[str] = field(default_factory=list)
38
+
39
+ class ModelEvaluationService:
40
+ """
41
+ Model Evaluation Service - Step 2 of Model Pipeline
42
+
43
+ Handles:
44
+ - Model performance evaluation using various metrics
45
+ - Cross-validation and validation curve analysis
46
+ - Model comparison and benchmarking
47
+ - Performance diagnostics and recommendations
48
+ """
49
+
50
+ def __init__(self):
51
+ self.execution_stats = {
52
+ 'total_evaluation_operations': 0,
53
+ 'successful_evaluation_operations': 0,
54
+ 'failed_evaluation_operations': 0,
55
+ 'models_evaluated': 0,
56
+ 'average_evaluation_time': 0.0
57
+ }
58
+
59
+ # Track evaluation results
60
+ self.evaluation_results = {}
61
+
62
+ logger.info("Model Evaluation Service initialized")
63
+
64
+ def evaluate_model(self,
65
+ model_info: Dict[str, Any],
66
+ test_data: pd.DataFrame,
67
+ target_column: str,
68
+ evaluation_config: Optional[Dict[str, Any]] = None) -> EvaluationResult:
69
+ """
70
+ Evaluate a trained model's performance
71
+
72
+ Args:
73
+ model_info: Information about the trained model
74
+ test_data: Test dataset for evaluation
75
+ target_column: Target variable column name
76
+ evaluation_config: Configuration for evaluation
77
+
78
+ Returns:
79
+ EvaluationResult with comprehensive evaluation metrics
80
+ """
81
+ start_time = datetime.now()
82
+ evaluation_config = evaluation_config or {}
83
+
84
+ try:
85
+ model_id = model_info.get('model_id', 'unknown')
86
+ logger.info(f"Starting model evaluation for: {model_id}")
87
+
88
+ # Initialize result
89
+ result = EvaluationResult(
90
+ success=False,
91
+ model_id=model_id
92
+ )
93
+
94
+ # Validate inputs
95
+ validation_result = self._validate_evaluation_inputs(model_info, test_data, target_column)
96
+ if not validation_result['valid']:
97
+ result.errors.extend(validation_result['errors'])
98
+ return self._finalize_evaluation_result(result, start_time)
99
+
100
+ # Extract model and processor
101
+ processor = model_info.get('processor')
102
+ model_instance = model_info.get('model_instance')
103
+ problem_type = model_info.get('problem_type', 'classification')
104
+
105
+ if not processor:
106
+ result.errors.append("Model processor not available")
107
+ return self._finalize_evaluation_result(result, start_time)
108
+
109
+ # Prepare test data
110
+ X_test = test_data.drop(columns=[target_column])
111
+ y_test = test_data[target_column]
112
+
113
+ # Basic evaluation metrics
114
+ basic_metrics = self._calculate_basic_metrics(
115
+ processor, model_instance, X_test, y_test, problem_type
116
+ )
117
+
118
+ if not basic_metrics['success']:
119
+ result.errors.extend(basic_metrics['errors'])
120
+ return self._finalize_evaluation_result(result, start_time)
121
+
122
+ result.evaluation_metrics = basic_metrics['metrics']
123
+
124
+ # Cross-validation analysis
125
+ if evaluation_config.get('perform_cv', True) and SKLEARN_AVAILABLE:
126
+ cv_results = self._perform_cross_validation(
127
+ model_info, test_data, target_column, evaluation_config
128
+ )
129
+ result.cross_validation_results = cv_results
130
+
131
+ # Validation curve analysis
132
+ if evaluation_config.get('validation_curves', False) and SKLEARN_AVAILABLE:
133
+ validation_analysis = self._analyze_validation_curves(
134
+ model_info, test_data, target_column, evaluation_config
135
+ )
136
+ result.validation_analysis = validation_analysis
137
+
138
+ # Performance diagnostics
139
+ diagnostics = self._diagnose_model_performance(
140
+ result.evaluation_metrics, problem_type, model_info
141
+ )
142
+ result.recommendations = diagnostics['recommendations']
143
+ result.warnings.extend(diagnostics['warnings'])
144
+
145
+ # Success
146
+ result.success = True
147
+ self.evaluation_results[model_id] = result
148
+
149
+ return self._finalize_evaluation_result(result, start_time)
150
+
151
+ except Exception as e:
152
+ logger.error(f"Model evaluation failed: {e}")
153
+ result.errors.append(f"Evaluation error: {str(e)}")
154
+ return self._finalize_evaluation_result(result, start_time)
155
+
156
+ def compare_models(self,
157
+ model_infos: List[Dict[str, Any]],
158
+ test_data: pd.DataFrame,
159
+ target_column: str,
160
+ comparison_metrics: Optional[List[str]] = None) -> Dict[str, Any]:
161
+ """Compare multiple models on the same test dataset"""
162
+ try:
163
+ comparison_metrics = comparison_metrics or ['accuracy', 'f1_score', 'r2_score']
164
+ comparison_results = {
165
+ 'model_comparison': {},
166
+ 'ranking': {},
167
+ 'best_model': None,
168
+ 'comparison_summary': {}
169
+ }
170
+
171
+ model_performances = {}
172
+
173
+ for model_info in model_infos:
174
+ model_id = model_info.get('model_id', 'unknown')
175
+
176
+ try:
177
+ evaluation_result = self.evaluate_model(
178
+ model_info, test_data, target_column, {'perform_cv': False}
179
+ )
180
+
181
+ if evaluation_result.success:
182
+ model_performances[model_id] = {
183
+ 'metrics': evaluation_result.evaluation_metrics,
184
+ 'algorithm': model_info.get('training_config', {}).algorithm,
185
+ 'problem_type': model_info.get('problem_type')
186
+ }
187
+ comparison_results['model_comparison'][model_id] = evaluation_result.evaluation_metrics
188
+ else:
189
+ logger.warning(f"Evaluation failed for model {model_id}")
190
+
191
+ except Exception as e:
192
+ logger.error(f"Error evaluating model {model_id}: {e}")
193
+
194
+ # Rank models by performance
195
+ if model_performances:
196
+ rankings = self._rank_models_by_performance(model_performances, comparison_metrics)
197
+ comparison_results['ranking'] = rankings
198
+
199
+ if rankings:
200
+ best_model_id = rankings[0]['model_id']
201
+ comparison_results['best_model'] = {
202
+ 'model_id': best_model_id,
203
+ 'metrics': model_performances[best_model_id]['metrics'],
204
+ 'algorithm': model_performances[best_model_id]['algorithm']
205
+ }
206
+
207
+ # Generate comparison summary
208
+ comparison_results['comparison_summary'] = self._generate_comparison_summary(
209
+ model_performances, comparison_metrics
210
+ )
211
+
212
+ return comparison_results
213
+
214
+ except Exception as e:
215
+ logger.error(f"Model comparison failed: {e}")
216
+ return {'error': str(e)}
217
+
218
+ def analyze_model_performance(self,
219
+ model_id: str,
220
+ detailed_analysis: bool = True) -> Dict[str, Any]:
221
+ """Perform detailed performance analysis for a specific model"""
222
+ try:
223
+ if model_id not in self.evaluation_results:
224
+ return {'error': f'No evaluation results found for model {model_id}'}
225
+
226
+ result = self.evaluation_results[model_id]
227
+
228
+ analysis = {
229
+ 'model_id': model_id,
230
+ 'basic_performance': result.evaluation_metrics,
231
+ 'cross_validation': result.cross_validation_results,
232
+ 'recommendations': result.recommendations,
233
+ 'warnings': result.warnings
234
+ }
235
+
236
+ if detailed_analysis:
237
+ # Add detailed analysis
238
+ metrics = result.evaluation_metrics
239
+
240
+ # Performance categorization
241
+ performance_category = self._categorize_performance(metrics)
242
+ analysis['performance_category'] = performance_category
243
+
244
+ # Identify potential issues
245
+ issues = self._identify_performance_issues(metrics, result.cross_validation_results)
246
+ analysis['potential_issues'] = issues
247
+
248
+ # Improvement suggestions
249
+ improvements = self._suggest_improvements(metrics, performance_category, issues)
250
+ analysis['improvement_suggestions'] = improvements
251
+
252
+ return analysis
253
+
254
+ except Exception as e:
255
+ logger.error(f"Performance analysis failed: {e}")
256
+ return {'error': str(e)}
257
+
258
+ def _validate_evaluation_inputs(self,
259
+ model_info: Dict[str, Any],
260
+ test_data: pd.DataFrame,
261
+ target_column: str) -> Dict[str, Any]:
262
+ """Validate evaluation inputs"""
263
+ errors = []
264
+
265
+ # Check test data
266
+ if test_data.empty:
267
+ errors.append("Test data is empty")
268
+
269
+ # Check target column
270
+ if target_column not in test_data.columns:
271
+ errors.append(f"Target column '{target_column}' not found in test data")
272
+
273
+ # Check model info
274
+ if not model_info:
275
+ errors.append("Model information is required")
276
+
277
+ return {
278
+ 'valid': len(errors) == 0,
279
+ 'errors': errors
280
+ }
281
+
282
+ def _calculate_basic_metrics(self,
283
+ processor,
284
+ model_instance,
285
+ X_test: pd.DataFrame,
286
+ y_test: pd.Series,
287
+ problem_type: str) -> Dict[str, Any]:
288
+ """Calculate basic evaluation metrics"""
289
+ try:
290
+ # Try to get predictions from the processor first
291
+ if hasattr(processor, 'models') and model_instance:
292
+ # Preprocess test data similar to training
293
+ X_test_processed = processor._basic_preprocessing(X_test) if hasattr(processor, '_basic_preprocessing') else X_test
294
+
295
+ # Make predictions
296
+ y_pred = model_instance.predict(X_test_processed)
297
+
298
+ # Calculate metrics based on problem type
299
+ if problem_type == 'classification':
300
+ metrics = {
301
+ 'accuracy': float(accuracy_score(y_test, y_pred)) if SKLEARN_AVAILABLE else 0.0,
302
+ 'precision': float(precision_score(y_test, y_pred, average='weighted', zero_division=0)) if SKLEARN_AVAILABLE else 0.0,
303
+ 'recall': float(recall_score(y_test, y_pred, average='weighted', zero_division=0)) if SKLEARN_AVAILABLE else 0.0,
304
+ 'f1_score': float(f1_score(y_test, y_pred, average='weighted', zero_division=0)) if SKLEARN_AVAILABLE else 0.0
305
+ }
306
+
307
+ # Add ROC AUC for binary classification
308
+ if len(np.unique(y_test)) == 2 and SKLEARN_AVAILABLE:
309
+ try:
310
+ if hasattr(model_instance, 'predict_proba'):
311
+ y_proba = model_instance.predict_proba(X_test_processed)[:, 1]
312
+ metrics['roc_auc'] = float(roc_auc_score(y_test, y_proba))
313
+ else:
314
+ metrics['roc_auc'] = float(roc_auc_score(y_test, y_pred))
315
+ except Exception:
316
+ pass # Skip if not applicable
317
+
318
+ # Add classification report
319
+ if SKLEARN_AVAILABLE:
320
+ try:
321
+ metrics['classification_report'] = classification_report(y_test, y_pred, output_dict=True)
322
+ except Exception:
323
+ pass
324
+
325
+ elif problem_type in ['regression', 'time_series']:
326
+ metrics = {
327
+ 'r2_score': float(r2_score(y_test, y_pred)) if SKLEARN_AVAILABLE else 0.0,
328
+ 'mean_squared_error': float(mean_squared_error(y_test, y_pred)) if SKLEARN_AVAILABLE else 0.0,
329
+ 'mean_absolute_error': float(mean_absolute_error(y_test, y_pred)) if SKLEARN_AVAILABLE else 0.0,
330
+ 'root_mean_squared_error': float(np.sqrt(mean_squared_error(y_test, y_pred))) if SKLEARN_AVAILABLE else 0.0
331
+ }
332
+
333
+ # Add percentage error metrics
334
+ if len(y_test) > 0:
335
+ mape = np.mean(np.abs((y_test - y_pred) / np.where(y_test != 0, y_test, 1))) * 100
336
+ metrics['mean_absolute_percentage_error'] = float(mape)
337
+
338
+ else:
339
+ metrics = {
340
+ 'error': f'Unsupported problem type: {problem_type}'
341
+ }
342
+
343
+ return {
344
+ 'success': True,
345
+ 'metrics': metrics
346
+ }
347
+ else:
348
+ return {
349
+ 'success': False,
350
+ 'errors': ['Model instance not available for evaluation']
351
+ }
352
+
353
+ except Exception as e:
354
+ return {
355
+ 'success': False,
356
+ 'errors': [f'Metric calculation failed: {str(e)}']
357
+ }
358
+
359
+ def _perform_cross_validation(self,
360
+ model_info: Dict[str, Any],
361
+ data: pd.DataFrame,
362
+ target_column: str,
363
+ config: Dict[str, Any]) -> Dict[str, Any]:
364
+ """Perform cross-validation analysis"""
365
+ try:
366
+ if not SKLEARN_AVAILABLE:
367
+ return {'error': 'scikit-learn not available for cross-validation'}
368
+
369
+ processor = model_info.get('processor')
370
+ model_instance = model_info.get('model_instance')
371
+ problem_type = model_info.get('problem_type', 'classification')
372
+
373
+ if not (processor and model_instance):
374
+ return {'error': 'Model or processor not available'}
375
+
376
+ # Prepare data
377
+ X = data.drop(columns=[target_column])
378
+ y = data[target_column]
379
+ X_processed = processor._basic_preprocessing(X) if hasattr(processor, '_basic_preprocessing') else X
380
+
381
+ cv_folds = config.get('cv_folds', 5)
382
+
383
+ # Determine scoring metric
384
+ if problem_type == 'classification':
385
+ scoring = 'accuracy' if y.nunique() > 2 else 'roc_auc'
386
+ else:
387
+ scoring = 'r2'
388
+
389
+ # Perform cross-validation
390
+ cv_scores = cross_val_score(model_instance, X_processed, y, cv=cv_folds, scoring=scoring)
391
+
392
+ cv_results = {
393
+ 'scoring_metric': scoring,
394
+ 'cv_folds': cv_folds,
395
+ 'mean_score': float(cv_scores.mean()),
396
+ 'std_score': float(cv_scores.std()),
397
+ 'individual_scores': cv_scores.tolist(),
398
+ 'score_range': [float(cv_scores.min()), float(cv_scores.max())],
399
+ 'confidence_interval_95': [
400
+ float(cv_scores.mean() - 1.96 * cv_scores.std()),
401
+ float(cv_scores.mean() + 1.96 * cv_scores.std())
402
+ ]
403
+ }
404
+
405
+ return cv_results
406
+
407
+ except Exception as e:
408
+ logger.error(f"Cross-validation failed: {e}")
409
+ return {'error': str(e)}
410
+
411
+ def _analyze_validation_curves(self,
412
+ model_info: Dict[str, Any],
413
+ data: pd.DataFrame,
414
+ target_column: str,
415
+ config: Dict[str, Any]) -> Dict[str, Any]:
416
+ """Analyze validation curves for hyperparameter sensitivity"""
417
+ try:
418
+ if not SKLEARN_AVAILABLE:
419
+ return {'error': 'scikit-learn not available for validation curves'}
420
+
421
+ # This would be implemented with validation_curve from sklearn
422
+ # For now, return placeholder
423
+ return {
424
+ 'validation_curve_analysis': 'Not implemented in current version',
425
+ 'hyperparameter_sensitivity': {},
426
+ 'overfitting_analysis': {}
427
+ }
428
+
429
+ except Exception as e:
430
+ logger.error(f"Validation curve analysis failed: {e}")
431
+ return {'error': str(e)}
432
+
433
+ def _diagnose_model_performance(self,
434
+ metrics: Dict[str, Any],
435
+ problem_type: str,
436
+ model_info: Dict[str, Any]) -> Dict[str, Any]:
437
+ """Diagnose model performance and provide recommendations"""
438
+ recommendations = []
439
+ warnings = []
440
+
441
+ if problem_type == 'classification':
442
+ accuracy = metrics.get('accuracy', 0)
443
+ precision = metrics.get('precision', 0)
444
+ recall = metrics.get('recall', 0)
445
+ f1 = metrics.get('f1_score', 0)
446
+
447
+ # Performance thresholds
448
+ if accuracy < 0.6:
449
+ warnings.append("Low accuracy detected")
450
+ recommendations.append("Consider feature engineering or different algorithm")
451
+
452
+ if precision < 0.5:
453
+ warnings.append("Low precision - many false positives")
454
+ recommendations.append("Adjust classification threshold or use precision-focused metrics")
455
+
456
+ if recall < 0.5:
457
+ warnings.append("Low recall - many false negatives")
458
+ recommendations.append("Consider class balancing techniques or recall-focused optimization")
459
+
460
+ if abs(precision - recall) > 0.2:
461
+ warnings.append("Significant precision-recall imbalance")
462
+ recommendations.append("Review class distribution and sampling strategy")
463
+
464
+ elif problem_type in ['regression', 'time_series']:
465
+ r2 = metrics.get('r2_score', 0)
466
+ rmse = metrics.get('root_mean_squared_error', float('inf'))
467
+ mae = metrics.get('mean_absolute_error', float('inf'))
468
+
469
+ if r2 < 0.5:
470
+ warnings.append("Low R² score - poor variance explanation")
471
+ recommendations.append("Consider feature engineering or more complex models")
472
+
473
+ if r2 < 0:
474
+ warnings.append("Negative R² - model performs worse than baseline")
475
+ recommendations.append("Review model and data preprocessing")
476
+
477
+ mape = metrics.get('mean_absolute_percentage_error')
478
+ if mape and mape > 20:
479
+ warnings.append("High percentage error")
480
+ recommendations.append("Consider data transformation or outlier handling")
481
+
482
+ # General recommendations
483
+ if not recommendations:
484
+ recommendations.append("Model performance looks good overall")
485
+
486
+ return {
487
+ 'recommendations': recommendations,
488
+ 'warnings': warnings
489
+ }
490
+
491
+ def _rank_models_by_performance(self,
492
+ model_performances: Dict[str, Any],
493
+ metrics: List[str]) -> List[Dict[str, Any]]:
494
+ """Rank models by performance metrics"""
495
+ rankings = []
496
+
497
+ for model_id, performance in model_performances.items():
498
+ score = 0
499
+ metric_count = 0
500
+
501
+ model_metrics = performance['metrics']
502
+ problem_type = performance['problem_type']
503
+
504
+ # Calculate composite score
505
+ if problem_type == 'classification':
506
+ if 'accuracy' in model_metrics and 'accuracy' in metrics:
507
+ score += model_metrics['accuracy']
508
+ metric_count += 1
509
+ if 'f1_score' in model_metrics and 'f1_score' in metrics:
510
+ score += model_metrics['f1_score']
511
+ metric_count += 1
512
+
513
+ elif problem_type in ['regression', 'time_series']:
514
+ if 'r2_score' in model_metrics and 'r2_score' in metrics:
515
+ score += max(0, model_metrics['r2_score']) # Ensure positive
516
+ metric_count += 1
517
+
518
+ average_score = score / max(metric_count, 1)
519
+
520
+ rankings.append({
521
+ 'model_id': model_id,
522
+ 'algorithm': performance['algorithm'],
523
+ 'composite_score': average_score,
524
+ 'key_metrics': {k: v for k, v in model_metrics.items() if k in metrics}
525
+ })
526
+
527
+ # Sort by composite score (descending)
528
+ rankings.sort(key=lambda x: x['composite_score'], reverse=True)
529
+
530
+ return rankings
531
+
532
+ def _generate_comparison_summary(self,
533
+ model_performances: Dict[str, Any],
534
+ metrics: List[str]) -> Dict[str, Any]:
535
+ """Generate summary of model comparison"""
536
+ summary = {
537
+ 'total_models': len(model_performances),
538
+ 'metric_summary': {},
539
+ 'performance_distribution': {}
540
+ }
541
+
542
+ # Calculate metric statistics across models
543
+ for metric in metrics:
544
+ metric_values = []
545
+ for performance in model_performances.values():
546
+ if metric in performance['metrics']:
547
+ metric_values.append(performance['metrics'][metric])
548
+
549
+ if metric_values:
550
+ summary['metric_summary'][metric] = {
551
+ 'mean': float(np.mean(metric_values)),
552
+ 'std': float(np.std(metric_values)),
553
+ 'min': float(np.min(metric_values)),
554
+ 'max': float(np.max(metric_values))
555
+ }
556
+
557
+ return summary
558
+
559
+ def _categorize_performance(self, metrics: Dict[str, Any]) -> str:
560
+ """Categorize model performance as excellent, good, fair, or poor"""
561
+ # Implementation would depend on specific thresholds
562
+ # For now, return placeholder
563
+ return "good"
564
+
565
+ def _identify_performance_issues(self,
566
+ metrics: Dict[str, Any],
567
+ cv_results: Dict[str, Any]) -> List[str]:
568
+ """Identify potential performance issues"""
569
+ issues = []
570
+
571
+ # Check for overfitting signs
572
+ if cv_results:
573
+ cv_std = cv_results.get('std_score', 0)
574
+ if cv_std > 0.1:
575
+ issues.append("High variance in cross-validation scores - possible overfitting")
576
+
577
+ return issues
578
+
579
+ def _suggest_improvements(self,
580
+ metrics: Dict[str, Any],
581
+ performance_category: str,
582
+ issues: List[str]) -> List[str]:
583
+ """Suggest specific improvements"""
584
+ suggestions = []
585
+
586
+ if performance_category in ['fair', 'poor']:
587
+ suggestions.append("Consider hyperparameter tuning")
588
+ suggestions.append("Try feature engineering")
589
+ suggestions.append("Experiment with different algorithms")
590
+
591
+ if 'overfitting' in ' '.join(issues).lower():
592
+ suggestions.append("Add regularization")
593
+ suggestions.append("Reduce model complexity")
594
+ suggestions.append("Increase training data")
595
+
596
+ return suggestions
597
+
598
+ def _finalize_evaluation_result(self,
599
+ result: EvaluationResult,
600
+ start_time: datetime) -> EvaluationResult:
601
+ """Finalize evaluation result with timing and stats"""
602
+ end_time = datetime.now()
603
+ duration = (end_time - start_time).total_seconds()
604
+
605
+ # Update performance metrics
606
+ result.performance_metrics['evaluation_duration_seconds'] = duration
607
+ result.performance_metrics['end_time'] = end_time
608
+
609
+ # Update execution stats
610
+ self.execution_stats['total_evaluation_operations'] += 1
611
+ if result.success:
612
+ self.execution_stats['successful_evaluation_operations'] += 1
613
+ self.execution_stats['models_evaluated'] += 1
614
+ else:
615
+ self.execution_stats['failed_evaluation_operations'] += 1
616
+
617
+ # Update average duration
618
+ total = self.execution_stats['total_evaluation_operations']
619
+ old_avg = self.execution_stats['average_evaluation_time']
620
+ self.execution_stats['average_evaluation_time'] = (old_avg * (total - 1) + duration) / total
621
+
622
+ logger.info(f"Evaluation completed: success={result.success}, duration={duration:.2f}s")
623
+ return result
624
+
625
+ def get_evaluation_result(self, model_id: str) -> Optional[EvaluationResult]:
626
+ """Get evaluation result for a specific model"""
627
+ return self.evaluation_results.get(model_id)
628
+
629
+ def get_execution_stats(self) -> Dict[str, Any]:
630
+ """Get service execution statistics"""
631
+ return {
632
+ **self.execution_stats,
633
+ 'success_rate': (
634
+ self.execution_stats['successful_evaluation_operations'] /
635
+ max(1, self.execution_stats['total_evaluation_operations'])
636
+ )
637
+ }