isa-model 0.4.0__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +40 -17
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/storage/hf_storage.py +1 -1
  26. isa_model/core/types.py +1 -0
  27. isa_model/deployment/__init__.py +5 -48
  28. isa_model/deployment/core/__init__.py +2 -31
  29. isa_model/deployment/core/deployment_manager.py +1278 -370
  30. isa_model/deployment/local/__init__.py +31 -0
  31. isa_model/deployment/local/config.py +248 -0
  32. isa_model/deployment/local/gpu_gateway.py +607 -0
  33. isa_model/deployment/local/health_checker.py +428 -0
  34. isa_model/deployment/local/provider.py +586 -0
  35. isa_model/deployment/local/tensorrt_service.py +621 -0
  36. isa_model/deployment/local/transformers_service.py +644 -0
  37. isa_model/deployment/local/vllm_service.py +527 -0
  38. isa_model/deployment/modal/__init__.py +8 -0
  39. isa_model/deployment/modal/config.py +136 -0
  40. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  41. isa_model/deployment/modal/services/__init__.py +3 -0
  42. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  43. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  44. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  45. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  46. isa_model/deployment/modal/services/video/__init__.py +1 -0
  47. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  48. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  49. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  50. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  51. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  52. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  53. isa_model/deployment/storage/__init__.py +5 -0
  54. isa_model/deployment/storage/deployment_repository.py +824 -0
  55. isa_model/deployment/triton/__init__.py +10 -0
  56. isa_model/deployment/triton/config.py +196 -0
  57. isa_model/deployment/triton/configs/__init__.py +1 -0
  58. isa_model/deployment/triton/provider.py +512 -0
  59. isa_model/deployment/triton/scripts/__init__.py +1 -0
  60. isa_model/deployment/triton/templates/__init__.py +1 -0
  61. isa_model/inference/__init__.py +47 -1
  62. isa_model/inference/ai_factory.py +137 -10
  63. isa_model/inference/legacy_services/__init__.py +21 -0
  64. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  65. isa_model/inference/legacy_services/model_service.py +573 -0
  66. isa_model/inference/legacy_services/model_serving.py +717 -0
  67. isa_model/inference/legacy_services/model_training.py +561 -0
  68. isa_model/inference/models/__init__.py +21 -0
  69. isa_model/inference/models/inference_config.py +551 -0
  70. isa_model/inference/models/inference_record.py +675 -0
  71. isa_model/inference/models/performance_models.py +714 -0
  72. isa_model/inference/repositories/__init__.py +9 -0
  73. isa_model/inference/repositories/inference_repository.py +828 -0
  74. isa_model/inference/services/audio/base_stt_service.py +184 -11
  75. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  76. isa_model/inference/services/custom_model_manager.py +277 -0
  77. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  78. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  79. isa_model/inference/services/llm/__init__.py +10 -2
  80. isa_model/inference/services/llm/base_llm_service.py +335 -24
  81. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  82. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  83. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  84. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  85. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  86. isa_model/inference/services/llm/local_llm_service.py +747 -0
  87. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  88. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  89. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  90. isa_model/inference/services/vision/__init__.py +22 -1
  91. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  92. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  93. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  94. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  95. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  96. isa_model/serving/api/cache_manager.py +245 -0
  97. isa_model/serving/api/dependencies/__init__.py +1 -0
  98. isa_model/serving/api/dependencies/auth.py +194 -0
  99. isa_model/serving/api/dependencies/database.py +139 -0
  100. isa_model/serving/api/error_handlers.py +284 -0
  101. isa_model/serving/api/fastapi_server.py +172 -22
  102. isa_model/serving/api/middleware/auth.py +8 -2
  103. isa_model/serving/api/middleware/security.py +23 -33
  104. isa_model/serving/api/middleware/tenant_context.py +414 -0
  105. isa_model/serving/api/routes/analytics.py +4 -1
  106. isa_model/serving/api/routes/config.py +645 -0
  107. isa_model/serving/api/routes/deployment_billing.py +315 -0
  108. isa_model/serving/api/routes/deployments.py +138 -2
  109. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  110. isa_model/serving/api/routes/health.py +32 -12
  111. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  112. isa_model/serving/api/routes/local_deployments.py +448 -0
  113. isa_model/serving/api/routes/tenants.py +575 -0
  114. isa_model/serving/api/routes/unified.py +680 -18
  115. isa_model/serving/api/routes/webhooks.py +479 -0
  116. isa_model/serving/api/startup.py +68 -54
  117. isa_model/utils/gpu_utils.py +311 -0
  118. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/METADATA +66 -24
  119. isa_model-0.4.3.dist-info/RECORD +193 -0
  120. isa_model/core/storage/minio_storage.py +0 -0
  121. isa_model/deployment/cloud/__init__.py +0 -9
  122. isa_model/deployment/cloud/modal/__init__.py +0 -10
  123. isa_model/deployment/core/deployment_config.py +0 -356
  124. isa_model/deployment/core/isa_deployment_service.py +0 -401
  125. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  126. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  127. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  128. isa_model/deployment/runtime/deployed_service.py +0 -338
  129. isa_model/deployment/services/__init__.py +0 -9
  130. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  131. isa_model/deployment/services/model_service.py +0 -332
  132. isa_model/deployment/services/service_monitor.py +0 -356
  133. isa_model/deployment/services/service_registry.py +0 -527
  134. isa_model/eval/__init__.py +0 -92
  135. isa_model/eval/benchmarks/__init__.py +0 -27
  136. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  137. isa_model/eval/benchmarks.py +0 -701
  138. isa_model/eval/config/__init__.py +0 -10
  139. isa_model/eval/config/evaluation_config.py +0 -108
  140. isa_model/eval/evaluators/__init__.py +0 -24
  141. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  142. isa_model/eval/evaluators/base_evaluator.py +0 -503
  143. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  144. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  145. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  146. isa_model/eval/example_evaluation.py +0 -395
  147. isa_model/eval/factory.py +0 -798
  148. isa_model/eval/infrastructure/__init__.py +0 -24
  149. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  150. isa_model/eval/isa_benchmarks.py +0 -700
  151. isa_model/eval/isa_integration.py +0 -582
  152. isa_model/eval/metrics.py +0 -951
  153. isa_model/eval/tests/unit/test_basic.py +0 -396
  154. isa_model/serving/api/routes/evaluations.py +0 -579
  155. isa_model/training/__init__.py +0 -168
  156. isa_model/training/annotation/annotation_schema.py +0 -47
  157. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  158. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  159. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  160. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  161. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  162. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  163. isa_model/training/annotation/views/annotation_controller.py +0 -158
  164. isa_model/training/cloud/__init__.py +0 -22
  165. isa_model/training/cloud/job_orchestrator.py +0 -402
  166. isa_model/training/cloud/runpod_trainer.py +0 -454
  167. isa_model/training/cloud/storage_manager.py +0 -482
  168. isa_model/training/core/__init__.py +0 -26
  169. isa_model/training/core/config.py +0 -181
  170. isa_model/training/core/dataset.py +0 -222
  171. isa_model/training/core/trainer.py +0 -720
  172. isa_model/training/core/utils.py +0 -213
  173. isa_model/training/examples/intelligent_training_example.py +0 -281
  174. isa_model/training/factory.py +0 -424
  175. isa_model/training/intelligent/__init__.py +0 -25
  176. isa_model/training/intelligent/decision_engine.py +0 -643
  177. isa_model/training/intelligent/intelligent_factory.py +0 -888
  178. isa_model/training/intelligent/knowledge_base.py +0 -751
  179. isa_model/training/intelligent/resource_optimizer.py +0 -839
  180. isa_model/training/intelligent/task_classifier.py +0 -576
  181. isa_model/training/storage/__init__.py +0 -24
  182. isa_model/training/storage/core_integration.py +0 -439
  183. isa_model/training/storage/training_repository.py +0 -552
  184. isa_model/training/storage/training_storage.py +0 -628
  185. isa_model-0.4.0.dist-info/RECORD +0 -182
  186. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  187. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  188. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  189. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  190. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  191. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  192. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  193. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  194. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  195. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  196. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  197. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  198. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  199. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -1,24 +0,0 @@
1
- """
2
- Infrastructure components for evaluation framework.
3
-
4
- Provides robust infrastructure for production-scale evaluation:
5
- - Async execution and concurrency management
6
- - Distributed evaluation support
7
- - Experiment tracking integration
8
- - Result storage and caching
9
- - Resource monitoring
10
- """
11
-
12
- from .experiment_tracker import ExperimentTracker, WandBTracker, MLflowTracker
13
- from .async_runner import AsyncEvaluationRunner
14
- from .result_storage import ResultStorage
15
- from .cache_manager import CacheManager
16
-
17
- __all__ = [
18
- "ExperimentTracker",
19
- "WandBTracker",
20
- "MLflowTracker",
21
- "AsyncEvaluationRunner",
22
- "ResultStorage",
23
- "CacheManager"
24
- ]
@@ -1,466 +0,0 @@
1
- """
2
- Experiment tracking infrastructure with W&B and MLflow integration.
3
-
4
- Implements industry best practices for ML experiment tracking:
5
- - Automatic metric logging and visualization
6
- - Hyperparameter tracking and optimization
7
- - Model artifact management
8
- - Distributed experiment coordination
9
- - Cost and resource tracking
10
- """
11
-
12
- import logging
13
- import asyncio
14
- from abc import ABC, abstractmethod
15
- from typing import Dict, Any, Optional, List
16
- from datetime import datetime
17
- import json
18
-
19
- try:
20
- import wandb
21
- WANDB_AVAILABLE = True
22
- except ImportError:
23
- WANDB_AVAILABLE = False
24
-
25
- try:
26
- import mlflow
27
- import mlflow.tracking
28
- MLFLOW_AVAILABLE = True
29
- except ImportError:
30
- MLFLOW_AVAILABLE = False
31
-
32
- logger = logging.getLogger(__name__)
33
-
34
-
35
- class ExperimentTracker(ABC):
36
- """
37
- Abstract base class for experiment tracking systems.
38
-
39
- Provides unified interface for different tracking backends.
40
- """
41
-
42
- def __init__(self, config: Optional[Dict[str, Any]] = None):
43
- """
44
- Initialize experiment tracker.
45
-
46
- Args:
47
- config: Tracker configuration
48
- """
49
- self.config = config or {}
50
- self.active_run_id: Optional[str] = None
51
- self.is_running = False
52
-
53
- @abstractmethod
54
- async def start_run(self, name: str, config: Dict[str, Any]) -> str:
55
- """
56
- Start a new experiment run.
57
-
58
- Args:
59
- name: Run name
60
- config: Run configuration
61
-
62
- Returns:
63
- Run ID
64
- """
65
- pass
66
-
67
- @abstractmethod
68
- async def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> None:
69
- """
70
- Log metrics to the experiment tracker.
71
-
72
- Args:
73
- metrics: Metrics to log
74
- step: Optional step number
75
- """
76
- pass
77
-
78
- @abstractmethod
79
- async def log_params(self, params: Dict[str, Any]) -> None:
80
- """
81
- Log parameters to the experiment tracker.
82
-
83
- Args:
84
- params: Parameters to log
85
- """
86
- pass
87
-
88
- @abstractmethod
89
- async def log_artifacts(self, artifacts: Dict[str, Any]) -> None:
90
- """
91
- Log artifacts to the experiment tracker.
92
-
93
- Args:
94
- artifacts: Artifacts to log
95
- """
96
- pass
97
-
98
- @abstractmethod
99
- async def end_run(self) -> None:
100
- """End the current experiment run."""
101
- pass
102
-
103
- def get_run_id(self) -> Optional[str]:
104
- """Get current run ID."""
105
- return self.active_run_id
106
-
107
-
108
- class WandBTracker(ExperimentTracker):
109
- """
110
- Weights & Biases experiment tracker.
111
-
112
- Features:
113
- - Real-time metric visualization
114
- - Hyperparameter sweeps
115
- - Model artifact tracking
116
- - Team collaboration
117
- """
118
-
119
- def __init__(self,
120
- project: str,
121
- entity: Optional[str] = None,
122
- config: Optional[Dict[str, Any]] = None):
123
- """
124
- Initialize W&B tracker.
125
-
126
- Args:
127
- project: W&B project name
128
- entity: W&B entity (team) name
129
- config: Additional configuration
130
- """
131
- super().__init__(config)
132
-
133
- if not WANDB_AVAILABLE:
134
- raise ImportError("wandb is not installed. Install with: pip install wandb")
135
-
136
- self.project = project
137
- self.entity = entity
138
- self.run = None
139
-
140
- logger.info(f"Initialized W&B tracker for project: {project}")
141
-
142
- async def start_run(self, name: str, config: Dict[str, Any]) -> str:
143
- """Start a new W&B run."""
144
- try:
145
- # Initialize wandb run
146
- self.run = wandb.init(
147
- project=self.project,
148
- entity=self.entity,
149
- name=name,
150
- config=config,
151
- reinit=True
152
- )
153
-
154
- self.active_run_id = self.run.id
155
- self.is_running = True
156
-
157
- logger.info(f"Started W&B run: {name} (ID: {self.active_run_id})")
158
- return self.active_run_id
159
-
160
- except Exception as e:
161
- logger.error(f"Failed to start W&B run: {e}")
162
- raise
163
-
164
- async def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> None:
165
- """Log metrics to W&B."""
166
- if not self.is_running or not self.run:
167
- logger.warning("No active W&B run for logging metrics")
168
- return
169
-
170
- try:
171
- # Filter out non-numeric values
172
- numeric_metrics = {k: v for k, v in metrics.items()
173
- if isinstance(v, (int, float)) and not str(v).lower() in ['nan', 'inf', '-inf']}
174
-
175
- if numeric_metrics:
176
- self.run.log(numeric_metrics, step=step)
177
- logger.debug(f"Logged {len(numeric_metrics)} metrics to W&B")
178
-
179
- except Exception as e:
180
- logger.error(f"Failed to log metrics to W&B: {e}")
181
-
182
- async def log_params(self, params: Dict[str, Any]) -> None:
183
- """Log parameters to W&B."""
184
- if not self.is_running or not self.run:
185
- logger.warning("No active W&B run for logging params")
186
- return
187
-
188
- try:
189
- # W&B config is set during init, but we can update it
190
- for key, value in params.items():
191
- self.run.config[key] = value
192
-
193
- logger.debug(f"Logged {len(params)} parameters to W&B")
194
-
195
- except Exception as e:
196
- logger.error(f"Failed to log parameters to W&B: {e}")
197
-
198
- async def log_artifacts(self, artifacts: Dict[str, Any]) -> None:
199
- """Log artifacts to W&B."""
200
- if not self.is_running or not self.run:
201
- logger.warning("No active W&B run for logging artifacts")
202
- return
203
-
204
- try:
205
- for name, artifact in artifacts.items():
206
- if isinstance(artifact, str):
207
- # File path
208
- self.run.save(artifact, base_path=".")
209
- elif isinstance(artifact, dict):
210
- # Save as JSON
211
- artifact_path = f"{name}.json"
212
- with open(artifact_path, 'w') as f:
213
- json.dump(artifact, f, indent=2)
214
- self.run.save(artifact_path)
215
-
216
- logger.debug(f"Logged {len(artifacts)} artifacts to W&B")
217
-
218
- except Exception as e:
219
- logger.error(f"Failed to log artifacts to W&B: {e}")
220
-
221
- async def end_run(self) -> None:
222
- """End the current W&B run."""
223
- if self.run:
224
- try:
225
- self.run.finish()
226
- logger.info(f"Ended W&B run: {self.active_run_id}")
227
- except Exception as e:
228
- logger.error(f"Failed to end W&B run: {e}")
229
- finally:
230
- self.run = None
231
- self.active_run_id = None
232
- self.is_running = False
233
-
234
-
235
- class MLflowTracker(ExperimentTracker):
236
- """
237
- MLflow experiment tracker.
238
-
239
- Features:
240
- - Model lifecycle management
241
- - Experiment comparison
242
- - Model registry integration
243
- - Production deployment tracking
244
- """
245
-
246
- def __init__(self,
247
- experiment_name: str,
248
- tracking_uri: Optional[str] = None,
249
- config: Optional[Dict[str, Any]] = None):
250
- """
251
- Initialize MLflow tracker.
252
-
253
- Args:
254
- experiment_name: MLflow experiment name
255
- tracking_uri: MLflow tracking server URI
256
- config: Additional configuration
257
- """
258
- super().__init__(config)
259
-
260
- if not MLFLOW_AVAILABLE:
261
- raise ImportError("mlflow is not installed. Install with: pip install mlflow")
262
-
263
- self.experiment_name = experiment_name
264
-
265
- # Set tracking URI if provided
266
- if tracking_uri:
267
- mlflow.set_tracking_uri(tracking_uri)
268
-
269
- # Get or create experiment
270
- try:
271
- self.experiment = mlflow.get_experiment_by_name(experiment_name)
272
- if self.experiment is None:
273
- experiment_id = mlflow.create_experiment(experiment_name)
274
- self.experiment = mlflow.get_experiment(experiment_id)
275
- except Exception as e:
276
- logger.error(f"Failed to initialize MLflow experiment: {e}")
277
- raise
278
-
279
- logger.info(f"Initialized MLflow tracker for experiment: {experiment_name}")
280
-
281
- async def start_run(self, name: str, config: Dict[str, Any]) -> str:
282
- """Start a new MLflow run."""
283
- try:
284
- mlflow.start_run(
285
- experiment_id=self.experiment.experiment_id,
286
- run_name=name
287
- )
288
-
289
- run = mlflow.active_run()
290
- self.active_run_id = run.info.run_id
291
- self.is_running = True
292
-
293
- # Log initial config
294
- await self.log_params(config)
295
-
296
- logger.info(f"Started MLflow run: {name} (ID: {self.active_run_id})")
297
- return self.active_run_id
298
-
299
- except Exception as e:
300
- logger.error(f"Failed to start MLflow run: {e}")
301
- raise
302
-
303
- async def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> None:
304
- """Log metrics to MLflow."""
305
- if not self.is_running:
306
- logger.warning("No active MLflow run for logging metrics")
307
- return
308
-
309
- try:
310
- for key, value in metrics.items():
311
- if isinstance(value, (int, float)) and not str(value).lower() in ['nan', 'inf', '-inf']:
312
- mlflow.log_metric(key, value, step=step)
313
-
314
- logger.debug(f"Logged {len(metrics)} metrics to MLflow")
315
-
316
- except Exception as e:
317
- logger.error(f"Failed to log metrics to MLflow: {e}")
318
-
319
- async def log_params(self, params: Dict[str, Any]) -> None:
320
- """Log parameters to MLflow."""
321
- if not self.is_running:
322
- logger.warning("No active MLflow run for logging params")
323
- return
324
-
325
- try:
326
- # Convert complex objects to strings
327
- str_params = {}
328
- for key, value in params.items():
329
- if isinstance(value, (dict, list)):
330
- str_params[key] = json.dumps(value)
331
- else:
332
- str_params[key] = str(value)
333
-
334
- mlflow.log_params(str_params)
335
- logger.debug(f"Logged {len(params)} parameters to MLflow")
336
-
337
- except Exception as e:
338
- logger.error(f"Failed to log parameters to MLflow: {e}")
339
-
340
- async def log_artifacts(self, artifacts: Dict[str, Any]) -> None:
341
- """Log artifacts to MLflow."""
342
- if not self.is_running:
343
- logger.warning("No active MLflow run for logging artifacts")
344
- return
345
-
346
- try:
347
- for name, artifact in artifacts.items():
348
- if isinstance(artifact, str):
349
- # File path
350
- mlflow.log_artifact(artifact)
351
- elif isinstance(artifact, dict):
352
- # Save as JSON and log
353
- artifact_path = f"{name}.json"
354
- with open(artifact_path, 'w') as f:
355
- json.dump(artifact, f, indent=2)
356
- mlflow.log_artifact(artifact_path)
357
-
358
- logger.debug(f"Logged {len(artifacts)} artifacts to MLflow")
359
-
360
- except Exception as e:
361
- logger.error(f"Failed to log artifacts to MLflow: {e}")
362
-
363
- async def end_run(self) -> None:
364
- """End the current MLflow run."""
365
- if self.is_running:
366
- try:
367
- mlflow.end_run()
368
- logger.info(f"Ended MLflow run: {self.active_run_id}")
369
- except Exception as e:
370
- logger.error(f"Failed to end MLflow run: {e}")
371
- finally:
372
- self.active_run_id = None
373
- self.is_running = False
374
-
375
-
376
- class MultiTracker(ExperimentTracker):
377
- """
378
- Multi-backend experiment tracker.
379
-
380
- Logs to multiple tracking systems simultaneously for redundancy.
381
- """
382
-
383
- def __init__(self, trackers: List[ExperimentTracker]):
384
- """
385
- Initialize multi-tracker.
386
-
387
- Args:
388
- trackers: List of tracker instances
389
- """
390
- super().__init__()
391
- self.trackers = trackers
392
- logger.info(f"Initialized multi-tracker with {len(trackers)} backends")
393
-
394
- async def start_run(self, name: str, config: Dict[str, Any]) -> str:
395
- """Start runs on all trackers."""
396
- run_ids = []
397
-
398
- for tracker in self.trackers:
399
- try:
400
- run_id = await tracker.start_run(name, config)
401
- run_ids.append(run_id)
402
- except Exception as e:
403
- logger.error(f"Failed to start run on {type(tracker).__name__}: {e}")
404
-
405
- self.is_running = len(run_ids) > 0
406
- self.active_run_id = run_ids[0] if run_ids else None
407
-
408
- return self.active_run_id or "multi_tracker_run"
409
-
410
- async def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> None:
411
- """Log metrics to all trackers."""
412
- tasks = []
413
- for tracker in self.trackers:
414
- tasks.append(tracker.log_metrics(metrics, step))
415
-
416
- # Run all logging tasks concurrently
417
- await asyncio.gather(*tasks, return_exceptions=True)
418
-
419
- async def log_params(self, params: Dict[str, Any]) -> None:
420
- """Log parameters to all trackers."""
421
- tasks = []
422
- for tracker in self.trackers:
423
- tasks.append(tracker.log_params(params))
424
-
425
- await asyncio.gather(*tasks, return_exceptions=True)
426
-
427
- async def log_artifacts(self, artifacts: Dict[str, Any]) -> None:
428
- """Log artifacts to all trackers."""
429
- tasks = []
430
- for tracker in self.trackers:
431
- tasks.append(tracker.log_artifacts(artifacts))
432
-
433
- await asyncio.gather(*tasks, return_exceptions=True)
434
-
435
- async def end_run(self) -> None:
436
- """End runs on all trackers."""
437
- tasks = []
438
- for tracker in self.trackers:
439
- tasks.append(tracker.end_run())
440
-
441
- await asyncio.gather(*tasks, return_exceptions=True)
442
-
443
- self.is_running = False
444
- self.active_run_id = None
445
-
446
-
447
- def create_experiment_tracker(tracker_type: str, **kwargs) -> ExperimentTracker:
448
- """
449
- Factory function to create experiment trackers.
450
-
451
- Args:
452
- tracker_type: Type of tracker ("wandb", "mlflow", "multi")
453
- **kwargs: Tracker-specific configuration
454
-
455
- Returns:
456
- Configured experiment tracker
457
- """
458
- if tracker_type.lower() == "wandb":
459
- return WandBTracker(**kwargs)
460
- elif tracker_type.lower() == "mlflow":
461
- return MLflowTracker(**kwargs)
462
- elif tracker_type.lower() == "multi":
463
- trackers = kwargs.get("trackers", [])
464
- return MultiTracker(trackers)
465
- else:
466
- raise ValueError(f"Unknown tracker type: {tracker_type}")