isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. isa_model/client.py +1166 -584
  2. isa_model/core/cache/redis_cache.py +410 -0
  3. isa_model/core/config/config_manager.py +282 -12
  4. isa_model/core/config.py +91 -1
  5. isa_model/core/database/__init__.py +1 -0
  6. isa_model/core/database/direct_db_client.py +114 -0
  7. isa_model/core/database/migration_manager.py +563 -0
  8. isa_model/core/database/migrations.py +297 -0
  9. isa_model/core/database/supabase_client.py +258 -0
  10. isa_model/core/dependencies.py +316 -0
  11. isa_model/core/discovery/__init__.py +19 -0
  12. isa_model/core/discovery/consul_discovery.py +190 -0
  13. isa_model/core/logging/__init__.py +54 -0
  14. isa_model/core/logging/influx_logger.py +523 -0
  15. isa_model/core/logging/loki_logger.py +160 -0
  16. isa_model/core/models/__init__.py +46 -0
  17. isa_model/core/models/config_models.py +625 -0
  18. isa_model/core/models/deployment_billing_tracker.py +430 -0
  19. isa_model/core/models/model_billing_tracker.py +60 -88
  20. isa_model/core/models/model_manager.py +66 -25
  21. isa_model/core/models/model_metadata.py +690 -0
  22. isa_model/core/models/model_repo.py +217 -55
  23. isa_model/core/models/model_statistics_tracker.py +234 -0
  24. isa_model/core/models/model_storage.py +0 -1
  25. isa_model/core/models/model_version_manager.py +959 -0
  26. isa_model/core/models/system_models.py +857 -0
  27. isa_model/core/pricing_manager.py +2 -249
  28. isa_model/core/repositories/__init__.py +9 -0
  29. isa_model/core/repositories/config_repository.py +912 -0
  30. isa_model/core/resilience/circuit_breaker.py +366 -0
  31. isa_model/core/security/secrets.py +358 -0
  32. isa_model/core/services/__init__.py +2 -4
  33. isa_model/core/services/intelligent_model_selector.py +479 -370
  34. isa_model/core/storage/hf_storage.py +2 -2
  35. isa_model/core/types.py +8 -0
  36. isa_model/deployment/__init__.py +5 -48
  37. isa_model/deployment/core/__init__.py +2 -31
  38. isa_model/deployment/core/deployment_manager.py +1278 -368
  39. isa_model/deployment/local/__init__.py +31 -0
  40. isa_model/deployment/local/config.py +248 -0
  41. isa_model/deployment/local/gpu_gateway.py +607 -0
  42. isa_model/deployment/local/health_checker.py +428 -0
  43. isa_model/deployment/local/provider.py +586 -0
  44. isa_model/deployment/local/tensorrt_service.py +621 -0
  45. isa_model/deployment/local/transformers_service.py +644 -0
  46. isa_model/deployment/local/vllm_service.py +527 -0
  47. isa_model/deployment/modal/__init__.py +8 -0
  48. isa_model/deployment/modal/config.py +136 -0
  49. isa_model/deployment/modal/deployer.py +894 -0
  50. isa_model/deployment/modal/services/__init__.py +3 -0
  51. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  52. isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
  53. isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
  54. isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
  55. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  56. isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
  57. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  58. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  59. isa_model/deployment/modal/services/video/__init__.py +1 -0
  60. isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
  61. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  62. isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
  63. isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
  64. isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
  65. isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
  66. isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
  67. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  68. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  69. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  70. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  71. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  72. isa_model/deployment/storage/__init__.py +5 -0
  73. isa_model/deployment/storage/deployment_repository.py +824 -0
  74. isa_model/deployment/triton/__init__.py +10 -0
  75. isa_model/deployment/triton/config.py +196 -0
  76. isa_model/deployment/triton/configs/__init__.py +1 -0
  77. isa_model/deployment/triton/provider.py +512 -0
  78. isa_model/deployment/triton/scripts/__init__.py +1 -0
  79. isa_model/deployment/triton/templates/__init__.py +1 -0
  80. isa_model/inference/__init__.py +47 -1
  81. isa_model/inference/ai_factory.py +179 -16
  82. isa_model/inference/legacy_services/__init__.py +21 -0
  83. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  84. isa_model/inference/legacy_services/model_service.py +573 -0
  85. isa_model/inference/legacy_services/model_serving.py +717 -0
  86. isa_model/inference/legacy_services/model_training.py +561 -0
  87. isa_model/inference/models/__init__.py +21 -0
  88. isa_model/inference/models/inference_config.py +551 -0
  89. isa_model/inference/models/inference_record.py +675 -0
  90. isa_model/inference/models/performance_models.py +714 -0
  91. isa_model/inference/repositories/__init__.py +9 -0
  92. isa_model/inference/repositories/inference_repository.py +828 -0
  93. isa_model/inference/services/audio/__init__.py +21 -0
  94. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  95. isa_model/inference/services/audio/base_stt_service.py +184 -11
  96. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  97. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  98. isa_model/inference/services/audio/openai_stt_service.py +53 -11
  99. isa_model/inference/services/base_service.py +17 -1
  100. isa_model/inference/services/custom_model_manager.py +277 -0
  101. isa_model/inference/services/embedding/__init__.py +13 -0
  102. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  103. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  104. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  105. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  106. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  107. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  108. isa_model/inference/services/img/__init__.py +2 -2
  109. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  110. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  111. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  112. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  113. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  114. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  115. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  116. isa_model/inference/services/llm/__init__.py +10 -2
  117. isa_model/inference/services/llm/base_llm_service.py +361 -26
  118. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  119. isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
  120. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  121. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  122. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  123. isa_model/inference/services/llm/local_llm_service.py +747 -0
  124. isa_model/inference/services/llm/ollama_llm_service.py +11 -3
  125. isa_model/inference/services/llm/openai_llm_service.py +670 -56
  126. isa_model/inference/services/llm/yyds_llm_service.py +10 -3
  127. isa_model/inference/services/vision/__init__.py +27 -6
  128. isa_model/inference/services/vision/base_vision_service.py +118 -185
  129. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  130. isa_model/inference/services/vision/helpers/image_utils.py +19 -10
  131. isa_model/inference/services/vision/isa_vision_service.py +634 -0
  132. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  133. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  134. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  135. isa_model/serving/api/cache_manager.py +245 -0
  136. isa_model/serving/api/dependencies/__init__.py +1 -0
  137. isa_model/serving/api/dependencies/auth.py +194 -0
  138. isa_model/serving/api/dependencies/database.py +139 -0
  139. isa_model/serving/api/error_handlers.py +284 -0
  140. isa_model/serving/api/fastapi_server.py +240 -18
  141. isa_model/serving/api/middleware/auth.py +317 -0
  142. isa_model/serving/api/middleware/security.py +268 -0
  143. isa_model/serving/api/middleware/tenant_context.py +414 -0
  144. isa_model/serving/api/routes/analytics.py +489 -0
  145. isa_model/serving/api/routes/config.py +645 -0
  146. isa_model/serving/api/routes/deployment_billing.py +315 -0
  147. isa_model/serving/api/routes/deployments.py +475 -0
  148. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  149. isa_model/serving/api/routes/health.py +32 -12
  150. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  151. isa_model/serving/api/routes/local_deployments.py +448 -0
  152. isa_model/serving/api/routes/logs.py +430 -0
  153. isa_model/serving/api/routes/settings.py +582 -0
  154. isa_model/serving/api/routes/tenants.py +575 -0
  155. isa_model/serving/api/routes/unified.py +992 -171
  156. isa_model/serving/api/routes/webhooks.py +479 -0
  157. isa_model/serving/api/startup.py +318 -0
  158. isa_model/serving/modal_proxy_server.py +249 -0
  159. isa_model/utils/gpu_utils.py +311 -0
  160. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
  161. isa_model-0.4.3.dist-info/RECORD +193 -0
  162. isa_model/deployment/cloud/__init__.py +0 -9
  163. isa_model/deployment/cloud/modal/__init__.py +0 -10
  164. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  165. isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
  166. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
  167. isa_model/deployment/cloud/modal/register_models.py +0 -321
  168. isa_model/deployment/core/deployment_config.py +0 -356
  169. isa_model/deployment/core/isa_deployment_service.py +0 -401
  170. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  171. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  172. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  173. isa_model/deployment/runtime/deployed_service.py +0 -338
  174. isa_model/deployment/services/__init__.py +0 -9
  175. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  176. isa_model/deployment/services/model_service.py +0 -332
  177. isa_model/deployment/services/service_monitor.py +0 -356
  178. isa_model/deployment/services/service_registry.py +0 -527
  179. isa_model/eval/__init__.py +0 -92
  180. isa_model/eval/benchmarks.py +0 -469
  181. isa_model/eval/config/__init__.py +0 -10
  182. isa_model/eval/config/evaluation_config.py +0 -108
  183. isa_model/eval/evaluators/__init__.py +0 -18
  184. isa_model/eval/evaluators/base_evaluator.py +0 -503
  185. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  186. isa_model/eval/factory.py +0 -531
  187. isa_model/eval/infrastructure/__init__.py +0 -24
  188. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  189. isa_model/eval/metrics.py +0 -798
  190. isa_model/inference/adapter/unified_api.py +0 -248
  191. isa_model/inference/services/helpers/stacked_config.py +0 -148
  192. isa_model/inference/services/img/flux_professional_service.py +0 -603
  193. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  194. isa_model/inference/services/others/table_transformer_service.py +0 -61
  195. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  196. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  197. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  198. isa_model/scripts/inference_tracker.py +0 -283
  199. isa_model/scripts/mlflow_manager.py +0 -379
  200. isa_model/scripts/model_registry.py +0 -465
  201. isa_model/scripts/register_models.py +0 -370
  202. isa_model/scripts/register_models_with_embeddings.py +0 -510
  203. isa_model/scripts/start_mlflow.py +0 -95
  204. isa_model/scripts/training_tracker.py +0 -257
  205. isa_model/training/__init__.py +0 -74
  206. isa_model/training/annotation/annotation_schema.py +0 -47
  207. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  208. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  209. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  210. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  211. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  212. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  213. isa_model/training/annotation/views/annotation_controller.py +0 -158
  214. isa_model/training/cloud/__init__.py +0 -22
  215. isa_model/training/cloud/job_orchestrator.py +0 -402
  216. isa_model/training/cloud/runpod_trainer.py +0 -454
  217. isa_model/training/cloud/storage_manager.py +0 -482
  218. isa_model/training/core/__init__.py +0 -23
  219. isa_model/training/core/config.py +0 -181
  220. isa_model/training/core/dataset.py +0 -222
  221. isa_model/training/core/trainer.py +0 -720
  222. isa_model/training/core/utils.py +0 -213
  223. isa_model/training/factory.py +0 -424
  224. isa_model-0.3.91.dist-info/RECORD +0 -138
  225. /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
  226. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  227. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  228. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -1,466 +0,0 @@
1
- """
2
- Experiment tracking infrastructure with W&B and MLflow integration.
3
-
4
- Implements industry best practices for ML experiment tracking:
5
- - Automatic metric logging and visualization
6
- - Hyperparameter tracking and optimization
7
- - Model artifact management
8
- - Distributed experiment coordination
9
- - Cost and resource tracking
10
- """
11
-
12
- import logging
13
- import asyncio
14
- from abc import ABC, abstractmethod
15
- from typing import Dict, Any, Optional, List
16
- from datetime import datetime
17
- import json
18
-
19
- try:
20
- import wandb
21
- WANDB_AVAILABLE = True
22
- except ImportError:
23
- WANDB_AVAILABLE = False
24
-
25
- try:
26
- import mlflow
27
- import mlflow.tracking
28
- MLFLOW_AVAILABLE = True
29
- except ImportError:
30
- MLFLOW_AVAILABLE = False
31
-
32
- logger = logging.getLogger(__name__)
33
-
34
-
35
- class ExperimentTracker(ABC):
36
- """
37
- Abstract base class for experiment tracking systems.
38
-
39
- Provides unified interface for different tracking backends.
40
- """
41
-
42
- def __init__(self, config: Optional[Dict[str, Any]] = None):
43
- """
44
- Initialize experiment tracker.
45
-
46
- Args:
47
- config: Tracker configuration
48
- """
49
- self.config = config or {}
50
- self.active_run_id: Optional[str] = None
51
- self.is_running = False
52
-
53
- @abstractmethod
54
- async def start_run(self, name: str, config: Dict[str, Any]) -> str:
55
- """
56
- Start a new experiment run.
57
-
58
- Args:
59
- name: Run name
60
- config: Run configuration
61
-
62
- Returns:
63
- Run ID
64
- """
65
- pass
66
-
67
- @abstractmethod
68
- async def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> None:
69
- """
70
- Log metrics to the experiment tracker.
71
-
72
- Args:
73
- metrics: Metrics to log
74
- step: Optional step number
75
- """
76
- pass
77
-
78
- @abstractmethod
79
- async def log_params(self, params: Dict[str, Any]) -> None:
80
- """
81
- Log parameters to the experiment tracker.
82
-
83
- Args:
84
- params: Parameters to log
85
- """
86
- pass
87
-
88
- @abstractmethod
89
- async def log_artifacts(self, artifacts: Dict[str, Any]) -> None:
90
- """
91
- Log artifacts to the experiment tracker.
92
-
93
- Args:
94
- artifacts: Artifacts to log
95
- """
96
- pass
97
-
98
- @abstractmethod
99
- async def end_run(self) -> None:
100
- """End the current experiment run."""
101
- pass
102
-
103
- def get_run_id(self) -> Optional[str]:
104
- """Get current run ID."""
105
- return self.active_run_id
106
-
107
-
108
- class WandBTracker(ExperimentTracker):
109
- """
110
- Weights & Biases experiment tracker.
111
-
112
- Features:
113
- - Real-time metric visualization
114
- - Hyperparameter sweeps
115
- - Model artifact tracking
116
- - Team collaboration
117
- """
118
-
119
- def __init__(self,
120
- project: str,
121
- entity: Optional[str] = None,
122
- config: Optional[Dict[str, Any]] = None):
123
- """
124
- Initialize W&B tracker.
125
-
126
- Args:
127
- project: W&B project name
128
- entity: W&B entity (team) name
129
- config: Additional configuration
130
- """
131
- super().__init__(config)
132
-
133
- if not WANDB_AVAILABLE:
134
- raise ImportError("wandb is not installed. Install with: pip install wandb")
135
-
136
- self.project = project
137
- self.entity = entity
138
- self.run = None
139
-
140
- logger.info(f"Initialized W&B tracker for project: {project}")
141
-
142
- async def start_run(self, name: str, config: Dict[str, Any]) -> str:
143
- """Start a new W&B run."""
144
- try:
145
- # Initialize wandb run
146
- self.run = wandb.init(
147
- project=self.project,
148
- entity=self.entity,
149
- name=name,
150
- config=config,
151
- reinit=True
152
- )
153
-
154
- self.active_run_id = self.run.id
155
- self.is_running = True
156
-
157
- logger.info(f"Started W&B run: {name} (ID: {self.active_run_id})")
158
- return self.active_run_id
159
-
160
- except Exception as e:
161
- logger.error(f"Failed to start W&B run: {e}")
162
- raise
163
-
164
- async def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> None:
165
- """Log metrics to W&B."""
166
- if not self.is_running or not self.run:
167
- logger.warning("No active W&B run for logging metrics")
168
- return
169
-
170
- try:
171
- # Filter out non-numeric values
172
- numeric_metrics = {k: v for k, v in metrics.items()
173
- if isinstance(v, (int, float)) and not str(v).lower() in ['nan', 'inf', '-inf']}
174
-
175
- if numeric_metrics:
176
- self.run.log(numeric_metrics, step=step)
177
- logger.debug(f"Logged {len(numeric_metrics)} metrics to W&B")
178
-
179
- except Exception as e:
180
- logger.error(f"Failed to log metrics to W&B: {e}")
181
-
182
- async def log_params(self, params: Dict[str, Any]) -> None:
183
- """Log parameters to W&B."""
184
- if not self.is_running or not self.run:
185
- logger.warning("No active W&B run for logging params")
186
- return
187
-
188
- try:
189
- # W&B config is set during init, but we can update it
190
- for key, value in params.items():
191
- self.run.config[key] = value
192
-
193
- logger.debug(f"Logged {len(params)} parameters to W&B")
194
-
195
- except Exception as e:
196
- logger.error(f"Failed to log parameters to W&B: {e}")
197
-
198
- async def log_artifacts(self, artifacts: Dict[str, Any]) -> None:
199
- """Log artifacts to W&B."""
200
- if not self.is_running or not self.run:
201
- logger.warning("No active W&B run for logging artifacts")
202
- return
203
-
204
- try:
205
- for name, artifact in artifacts.items():
206
- if isinstance(artifact, str):
207
- # File path
208
- self.run.save(artifact, base_path=".")
209
- elif isinstance(artifact, dict):
210
- # Save as JSON
211
- artifact_path = f"{name}.json"
212
- with open(artifact_path, 'w') as f:
213
- json.dump(artifact, f, indent=2)
214
- self.run.save(artifact_path)
215
-
216
- logger.debug(f"Logged {len(artifacts)} artifacts to W&B")
217
-
218
- except Exception as e:
219
- logger.error(f"Failed to log artifacts to W&B: {e}")
220
-
221
- async def end_run(self) -> None:
222
- """End the current W&B run."""
223
- if self.run:
224
- try:
225
- self.run.finish()
226
- logger.info(f"Ended W&B run: {self.active_run_id}")
227
- except Exception as e:
228
- logger.error(f"Failed to end W&B run: {e}")
229
- finally:
230
- self.run = None
231
- self.active_run_id = None
232
- self.is_running = False
233
-
234
-
235
- class MLflowTracker(ExperimentTracker):
236
- """
237
- MLflow experiment tracker.
238
-
239
- Features:
240
- - Model lifecycle management
241
- - Experiment comparison
242
- - Model registry integration
243
- - Production deployment tracking
244
- """
245
-
246
- def __init__(self,
247
- experiment_name: str,
248
- tracking_uri: Optional[str] = None,
249
- config: Optional[Dict[str, Any]] = None):
250
- """
251
- Initialize MLflow tracker.
252
-
253
- Args:
254
- experiment_name: MLflow experiment name
255
- tracking_uri: MLflow tracking server URI
256
- config: Additional configuration
257
- """
258
- super().__init__(config)
259
-
260
- if not MLFLOW_AVAILABLE:
261
- raise ImportError("mlflow is not installed. Install with: pip install mlflow")
262
-
263
- self.experiment_name = experiment_name
264
-
265
- # Set tracking URI if provided
266
- if tracking_uri:
267
- mlflow.set_tracking_uri(tracking_uri)
268
-
269
- # Get or create experiment
270
- try:
271
- self.experiment = mlflow.get_experiment_by_name(experiment_name)
272
- if self.experiment is None:
273
- experiment_id = mlflow.create_experiment(experiment_name)
274
- self.experiment = mlflow.get_experiment(experiment_id)
275
- except Exception as e:
276
- logger.error(f"Failed to initialize MLflow experiment: {e}")
277
- raise
278
-
279
- logger.info(f"Initialized MLflow tracker for experiment: {experiment_name}")
280
-
281
- async def start_run(self, name: str, config: Dict[str, Any]) -> str:
282
- """Start a new MLflow run."""
283
- try:
284
- mlflow.start_run(
285
- experiment_id=self.experiment.experiment_id,
286
- run_name=name
287
- )
288
-
289
- run = mlflow.active_run()
290
- self.active_run_id = run.info.run_id
291
- self.is_running = True
292
-
293
- # Log initial config
294
- await self.log_params(config)
295
-
296
- logger.info(f"Started MLflow run: {name} (ID: {self.active_run_id})")
297
- return self.active_run_id
298
-
299
- except Exception as e:
300
- logger.error(f"Failed to start MLflow run: {e}")
301
- raise
302
-
303
- async def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> None:
304
- """Log metrics to MLflow."""
305
- if not self.is_running:
306
- logger.warning("No active MLflow run for logging metrics")
307
- return
308
-
309
- try:
310
- for key, value in metrics.items():
311
- if isinstance(value, (int, float)) and not str(value).lower() in ['nan', 'inf', '-inf']:
312
- mlflow.log_metric(key, value, step=step)
313
-
314
- logger.debug(f"Logged {len(metrics)} metrics to MLflow")
315
-
316
- except Exception as e:
317
- logger.error(f"Failed to log metrics to MLflow: {e}")
318
-
319
- async def log_params(self, params: Dict[str, Any]) -> None:
320
- """Log parameters to MLflow."""
321
- if not self.is_running:
322
- logger.warning("No active MLflow run for logging params")
323
- return
324
-
325
- try:
326
- # Convert complex objects to strings
327
- str_params = {}
328
- for key, value in params.items():
329
- if isinstance(value, (dict, list)):
330
- str_params[key] = json.dumps(value)
331
- else:
332
- str_params[key] = str(value)
333
-
334
- mlflow.log_params(str_params)
335
- logger.debug(f"Logged {len(params)} parameters to MLflow")
336
-
337
- except Exception as e:
338
- logger.error(f"Failed to log parameters to MLflow: {e}")
339
-
340
- async def log_artifacts(self, artifacts: Dict[str, Any]) -> None:
341
- """Log artifacts to MLflow."""
342
- if not self.is_running:
343
- logger.warning("No active MLflow run for logging artifacts")
344
- return
345
-
346
- try:
347
- for name, artifact in artifacts.items():
348
- if isinstance(artifact, str):
349
- # File path
350
- mlflow.log_artifact(artifact)
351
- elif isinstance(artifact, dict):
352
- # Save as JSON and log
353
- artifact_path = f"{name}.json"
354
- with open(artifact_path, 'w') as f:
355
- json.dump(artifact, f, indent=2)
356
- mlflow.log_artifact(artifact_path)
357
-
358
- logger.debug(f"Logged {len(artifacts)} artifacts to MLflow")
359
-
360
- except Exception as e:
361
- logger.error(f"Failed to log artifacts to MLflow: {e}")
362
-
363
- async def end_run(self) -> None:
364
- """End the current MLflow run."""
365
- if self.is_running:
366
- try:
367
- mlflow.end_run()
368
- logger.info(f"Ended MLflow run: {self.active_run_id}")
369
- except Exception as e:
370
- logger.error(f"Failed to end MLflow run: {e}")
371
- finally:
372
- self.active_run_id = None
373
- self.is_running = False
374
-
375
-
376
- class MultiTracker(ExperimentTracker):
377
- """
378
- Multi-backend experiment tracker.
379
-
380
- Logs to multiple tracking systems simultaneously for redundancy.
381
- """
382
-
383
- def __init__(self, trackers: List[ExperimentTracker]):
384
- """
385
- Initialize multi-tracker.
386
-
387
- Args:
388
- trackers: List of tracker instances
389
- """
390
- super().__init__()
391
- self.trackers = trackers
392
- logger.info(f"Initialized multi-tracker with {len(trackers)} backends")
393
-
394
- async def start_run(self, name: str, config: Dict[str, Any]) -> str:
395
- """Start runs on all trackers."""
396
- run_ids = []
397
-
398
- for tracker in self.trackers:
399
- try:
400
- run_id = await tracker.start_run(name, config)
401
- run_ids.append(run_id)
402
- except Exception as e:
403
- logger.error(f"Failed to start run on {type(tracker).__name__}: {e}")
404
-
405
- self.is_running = len(run_ids) > 0
406
- self.active_run_id = run_ids[0] if run_ids else None
407
-
408
- return self.active_run_id or "multi_tracker_run"
409
-
410
- async def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> None:
411
- """Log metrics to all trackers."""
412
- tasks = []
413
- for tracker in self.trackers:
414
- tasks.append(tracker.log_metrics(metrics, step))
415
-
416
- # Run all logging tasks concurrently
417
- await asyncio.gather(*tasks, return_exceptions=True)
418
-
419
- async def log_params(self, params: Dict[str, Any]) -> None:
420
- """Log parameters to all trackers."""
421
- tasks = []
422
- for tracker in self.trackers:
423
- tasks.append(tracker.log_params(params))
424
-
425
- await asyncio.gather(*tasks, return_exceptions=True)
426
-
427
- async def log_artifacts(self, artifacts: Dict[str, Any]) -> None:
428
- """Log artifacts to all trackers."""
429
- tasks = []
430
- for tracker in self.trackers:
431
- tasks.append(tracker.log_artifacts(artifacts))
432
-
433
- await asyncio.gather(*tasks, return_exceptions=True)
434
-
435
- async def end_run(self) -> None:
436
- """End runs on all trackers."""
437
- tasks = []
438
- for tracker in self.trackers:
439
- tasks.append(tracker.end_run())
440
-
441
- await asyncio.gather(*tasks, return_exceptions=True)
442
-
443
- self.is_running = False
444
- self.active_run_id = None
445
-
446
-
447
- def create_experiment_tracker(tracker_type: str, **kwargs) -> ExperimentTracker:
448
- """
449
- Factory function to create experiment trackers.
450
-
451
- Args:
452
- tracker_type: Type of tracker ("wandb", "mlflow", "multi")
453
- **kwargs: Tracker-specific configuration
454
-
455
- Returns:
456
- Configured experiment tracker
457
- """
458
- if tracker_type.lower() == "wandb":
459
- return WandBTracker(**kwargs)
460
- elif tracker_type.lower() == "mlflow":
461
- return MLflowTracker(**kwargs)
462
- elif tracker_type.lower() == "multi":
463
- trackers = kwargs.get("trackers", [])
464
- return MultiTracker(trackers)
465
- else:
466
- raise ValueError(f"Unknown tracker type: {tracker_type}")