isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +1166 -584
- isa_model/core/cache/redis_cache.py +410 -0
- isa_model/core/config/config_manager.py +282 -12
- isa_model/core/config.py +91 -1
- isa_model/core/database/__init__.py +1 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +297 -0
- isa_model/core/database/supabase_client.py +258 -0
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +46 -0
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_billing_tracker.py +60 -88
- isa_model/core/models/model_manager.py +66 -25
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +217 -55
- isa_model/core/models/model_statistics_tracker.py +234 -0
- isa_model/core/models/model_storage.py +0 -1
- isa_model/core/models/model_version_manager.py +959 -0
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/pricing_manager.py +2 -249
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/resilience/circuit_breaker.py +366 -0
- isa_model/core/security/secrets.py +358 -0
- isa_model/core/services/__init__.py +2 -4
- isa_model/core/services/intelligent_model_selector.py +479 -370
- isa_model/core/storage/hf_storage.py +2 -2
- isa_model/core/types.py +8 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -368
- isa_model/deployment/local/__init__.py +31 -0
- isa_model/deployment/local/config.py +248 -0
- isa_model/deployment/local/gpu_gateway.py +607 -0
- isa_model/deployment/local/health_checker.py +428 -0
- isa_model/deployment/local/provider.py +586 -0
- isa_model/deployment/local/tensorrt_service.py +621 -0
- isa_model/deployment/local/transformers_service.py +644 -0
- isa_model/deployment/local/vllm_service.py +527 -0
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/modal/deployer.py +894 -0
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
- isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
- isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
- isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
- isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +179 -16
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/__init__.py +21 -0
- isa_model/inference/services/audio/base_realtime_service.py +225 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/isa_tts_service.py +0 -0
- isa_model/inference/services/audio/openai_realtime_service.py +320 -124
- isa_model/inference/services/audio/openai_stt_service.py +53 -11
- isa_model/inference/services/base_service.py +17 -1
- isa_model/inference/services/custom_model_manager.py +277 -0
- isa_model/inference/services/embedding/__init__.py +13 -0
- isa_model/inference/services/embedding/base_embed_service.py +111 -8
- isa_model/inference/services/embedding/isa_embed_service.py +305 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/openai_embed_service.py +2 -4
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
- isa_model/inference/services/img/__init__.py +2 -2
- isa_model/inference/services/img/base_image_gen_service.py +24 -7
- isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
- isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
- isa_model/inference/services/img/services/replicate_flux.py +226 -0
- isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
- isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
- isa_model/inference/services/img/tests/test_img_client.py +297 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +361 -26
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/local_llm_service.py +747 -0
- isa_model/inference/services/llm/ollama_llm_service.py +11 -3
- isa_model/inference/services/llm/openai_llm_service.py +670 -56
- isa_model/inference/services/llm/yyds_llm_service.py +10 -3
- isa_model/inference/services/vision/__init__.py +27 -6
- isa_model/inference/services/vision/base_vision_service.py +118 -185
- isa_model/inference/services/vision/blip_vision_service.py +359 -0
- isa_model/inference/services/vision/helpers/image_utils.py +19 -10
- isa_model/inference/services/vision/isa_vision_service.py +634 -0
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +240 -18
- isa_model/serving/api/middleware/auth.py +317 -0
- isa_model/serving/api/middleware/security.py +268 -0
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +489 -0
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +475 -0
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/logs.py +430 -0
- isa_model/serving/api/routes/settings.py +582 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +992 -171
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +318 -0
- isa_model/serving/modal_proxy_server.py +249 -0
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
- isa_model-0.4.3.dist-info/RECORD +193 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
- isa_model/deployment/cloud/modal/register_models.py +0 -321
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks.py +0 -469
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -18
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/factory.py +0 -531
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/metrics.py +0 -798
- isa_model/inference/adapter/unified_api.py +0 -248
- isa_model/inference/services/helpers/stacked_config.py +0 -148
- isa_model/inference/services/img/flux_professional_service.py +0 -603
- isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/others/table_transformer_service.py +0 -61
- isa_model/inference/services/vision/doc_analysis_service.py +0 -640
- isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/vision/ui_analysis_service.py +0 -823
- isa_model/scripts/inference_tracker.py +0 -283
- isa_model/scripts/mlflow_manager.py +0 -379
- isa_model/scripts/model_registry.py +0 -465
- isa_model/scripts/register_models.py +0 -370
- isa_model/scripts/register_models_with_embeddings.py +0 -510
- isa_model/scripts/start_mlflow.py +0 -95
- isa_model/scripts/training_tracker.py +0 -257
- isa_model/training/__init__.py +0 -74
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -23
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/factory.py +0 -424
- isa_model-0.3.91.dist-info/RECORD +0 -138
- /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -1,466 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Experiment tracking infrastructure with W&B and MLflow integration.
|
3
|
-
|
4
|
-
Implements industry best practices for ML experiment tracking:
|
5
|
-
- Automatic metric logging and visualization
|
6
|
-
- Hyperparameter tracking and optimization
|
7
|
-
- Model artifact management
|
8
|
-
- Distributed experiment coordination
|
9
|
-
- Cost and resource tracking
|
10
|
-
"""
|
11
|
-
|
12
|
-
import logging
|
13
|
-
import asyncio
|
14
|
-
from abc import ABC, abstractmethod
|
15
|
-
from typing import Dict, Any, Optional, List
|
16
|
-
from datetime import datetime
|
17
|
-
import json
|
18
|
-
|
19
|
-
try:
|
20
|
-
import wandb
|
21
|
-
WANDB_AVAILABLE = True
|
22
|
-
except ImportError:
|
23
|
-
WANDB_AVAILABLE = False
|
24
|
-
|
25
|
-
try:
|
26
|
-
import mlflow
|
27
|
-
import mlflow.tracking
|
28
|
-
MLFLOW_AVAILABLE = True
|
29
|
-
except ImportError:
|
30
|
-
MLFLOW_AVAILABLE = False
|
31
|
-
|
32
|
-
logger = logging.getLogger(__name__)
|
33
|
-
|
34
|
-
|
35
|
-
class ExperimentTracker(ABC):
|
36
|
-
"""
|
37
|
-
Abstract base class for experiment tracking systems.
|
38
|
-
|
39
|
-
Provides unified interface for different tracking backends.
|
40
|
-
"""
|
41
|
-
|
42
|
-
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
43
|
-
"""
|
44
|
-
Initialize experiment tracker.
|
45
|
-
|
46
|
-
Args:
|
47
|
-
config: Tracker configuration
|
48
|
-
"""
|
49
|
-
self.config = config or {}
|
50
|
-
self.active_run_id: Optional[str] = None
|
51
|
-
self.is_running = False
|
52
|
-
|
53
|
-
@abstractmethod
|
54
|
-
async def start_run(self, name: str, config: Dict[str, Any]) -> str:
|
55
|
-
"""
|
56
|
-
Start a new experiment run.
|
57
|
-
|
58
|
-
Args:
|
59
|
-
name: Run name
|
60
|
-
config: Run configuration
|
61
|
-
|
62
|
-
Returns:
|
63
|
-
Run ID
|
64
|
-
"""
|
65
|
-
pass
|
66
|
-
|
67
|
-
@abstractmethod
|
68
|
-
async def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> None:
|
69
|
-
"""
|
70
|
-
Log metrics to the experiment tracker.
|
71
|
-
|
72
|
-
Args:
|
73
|
-
metrics: Metrics to log
|
74
|
-
step: Optional step number
|
75
|
-
"""
|
76
|
-
pass
|
77
|
-
|
78
|
-
@abstractmethod
|
79
|
-
async def log_params(self, params: Dict[str, Any]) -> None:
|
80
|
-
"""
|
81
|
-
Log parameters to the experiment tracker.
|
82
|
-
|
83
|
-
Args:
|
84
|
-
params: Parameters to log
|
85
|
-
"""
|
86
|
-
pass
|
87
|
-
|
88
|
-
@abstractmethod
|
89
|
-
async def log_artifacts(self, artifacts: Dict[str, Any]) -> None:
|
90
|
-
"""
|
91
|
-
Log artifacts to the experiment tracker.
|
92
|
-
|
93
|
-
Args:
|
94
|
-
artifacts: Artifacts to log
|
95
|
-
"""
|
96
|
-
pass
|
97
|
-
|
98
|
-
@abstractmethod
|
99
|
-
async def end_run(self) -> None:
|
100
|
-
"""End the current experiment run."""
|
101
|
-
pass
|
102
|
-
|
103
|
-
def get_run_id(self) -> Optional[str]:
|
104
|
-
"""Get current run ID."""
|
105
|
-
return self.active_run_id
|
106
|
-
|
107
|
-
|
108
|
-
class WandBTracker(ExperimentTracker):
|
109
|
-
"""
|
110
|
-
Weights & Biases experiment tracker.
|
111
|
-
|
112
|
-
Features:
|
113
|
-
- Real-time metric visualization
|
114
|
-
- Hyperparameter sweeps
|
115
|
-
- Model artifact tracking
|
116
|
-
- Team collaboration
|
117
|
-
"""
|
118
|
-
|
119
|
-
def __init__(self,
|
120
|
-
project: str,
|
121
|
-
entity: Optional[str] = None,
|
122
|
-
config: Optional[Dict[str, Any]] = None):
|
123
|
-
"""
|
124
|
-
Initialize W&B tracker.
|
125
|
-
|
126
|
-
Args:
|
127
|
-
project: W&B project name
|
128
|
-
entity: W&B entity (team) name
|
129
|
-
config: Additional configuration
|
130
|
-
"""
|
131
|
-
super().__init__(config)
|
132
|
-
|
133
|
-
if not WANDB_AVAILABLE:
|
134
|
-
raise ImportError("wandb is not installed. Install with: pip install wandb")
|
135
|
-
|
136
|
-
self.project = project
|
137
|
-
self.entity = entity
|
138
|
-
self.run = None
|
139
|
-
|
140
|
-
logger.info(f"Initialized W&B tracker for project: {project}")
|
141
|
-
|
142
|
-
async def start_run(self, name: str, config: Dict[str, Any]) -> str:
|
143
|
-
"""Start a new W&B run."""
|
144
|
-
try:
|
145
|
-
# Initialize wandb run
|
146
|
-
self.run = wandb.init(
|
147
|
-
project=self.project,
|
148
|
-
entity=self.entity,
|
149
|
-
name=name,
|
150
|
-
config=config,
|
151
|
-
reinit=True
|
152
|
-
)
|
153
|
-
|
154
|
-
self.active_run_id = self.run.id
|
155
|
-
self.is_running = True
|
156
|
-
|
157
|
-
logger.info(f"Started W&B run: {name} (ID: {self.active_run_id})")
|
158
|
-
return self.active_run_id
|
159
|
-
|
160
|
-
except Exception as e:
|
161
|
-
logger.error(f"Failed to start W&B run: {e}")
|
162
|
-
raise
|
163
|
-
|
164
|
-
async def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> None:
|
165
|
-
"""Log metrics to W&B."""
|
166
|
-
if not self.is_running or not self.run:
|
167
|
-
logger.warning("No active W&B run for logging metrics")
|
168
|
-
return
|
169
|
-
|
170
|
-
try:
|
171
|
-
# Filter out non-numeric values
|
172
|
-
numeric_metrics = {k: v for k, v in metrics.items()
|
173
|
-
if isinstance(v, (int, float)) and not str(v).lower() in ['nan', 'inf', '-inf']}
|
174
|
-
|
175
|
-
if numeric_metrics:
|
176
|
-
self.run.log(numeric_metrics, step=step)
|
177
|
-
logger.debug(f"Logged {len(numeric_metrics)} metrics to W&B")
|
178
|
-
|
179
|
-
except Exception as e:
|
180
|
-
logger.error(f"Failed to log metrics to W&B: {e}")
|
181
|
-
|
182
|
-
async def log_params(self, params: Dict[str, Any]) -> None:
|
183
|
-
"""Log parameters to W&B."""
|
184
|
-
if not self.is_running or not self.run:
|
185
|
-
logger.warning("No active W&B run for logging params")
|
186
|
-
return
|
187
|
-
|
188
|
-
try:
|
189
|
-
# W&B config is set during init, but we can update it
|
190
|
-
for key, value in params.items():
|
191
|
-
self.run.config[key] = value
|
192
|
-
|
193
|
-
logger.debug(f"Logged {len(params)} parameters to W&B")
|
194
|
-
|
195
|
-
except Exception as e:
|
196
|
-
logger.error(f"Failed to log parameters to W&B: {e}")
|
197
|
-
|
198
|
-
async def log_artifacts(self, artifacts: Dict[str, Any]) -> None:
|
199
|
-
"""Log artifacts to W&B."""
|
200
|
-
if not self.is_running or not self.run:
|
201
|
-
logger.warning("No active W&B run for logging artifacts")
|
202
|
-
return
|
203
|
-
|
204
|
-
try:
|
205
|
-
for name, artifact in artifacts.items():
|
206
|
-
if isinstance(artifact, str):
|
207
|
-
# File path
|
208
|
-
self.run.save(artifact, base_path=".")
|
209
|
-
elif isinstance(artifact, dict):
|
210
|
-
# Save as JSON
|
211
|
-
artifact_path = f"{name}.json"
|
212
|
-
with open(artifact_path, 'w') as f:
|
213
|
-
json.dump(artifact, f, indent=2)
|
214
|
-
self.run.save(artifact_path)
|
215
|
-
|
216
|
-
logger.debug(f"Logged {len(artifacts)} artifacts to W&B")
|
217
|
-
|
218
|
-
except Exception as e:
|
219
|
-
logger.error(f"Failed to log artifacts to W&B: {e}")
|
220
|
-
|
221
|
-
async def end_run(self) -> None:
|
222
|
-
"""End the current W&B run."""
|
223
|
-
if self.run:
|
224
|
-
try:
|
225
|
-
self.run.finish()
|
226
|
-
logger.info(f"Ended W&B run: {self.active_run_id}")
|
227
|
-
except Exception as e:
|
228
|
-
logger.error(f"Failed to end W&B run: {e}")
|
229
|
-
finally:
|
230
|
-
self.run = None
|
231
|
-
self.active_run_id = None
|
232
|
-
self.is_running = False
|
233
|
-
|
234
|
-
|
235
|
-
class MLflowTracker(ExperimentTracker):
|
236
|
-
"""
|
237
|
-
MLflow experiment tracker.
|
238
|
-
|
239
|
-
Features:
|
240
|
-
- Model lifecycle management
|
241
|
-
- Experiment comparison
|
242
|
-
- Model registry integration
|
243
|
-
- Production deployment tracking
|
244
|
-
"""
|
245
|
-
|
246
|
-
def __init__(self,
|
247
|
-
experiment_name: str,
|
248
|
-
tracking_uri: Optional[str] = None,
|
249
|
-
config: Optional[Dict[str, Any]] = None):
|
250
|
-
"""
|
251
|
-
Initialize MLflow tracker.
|
252
|
-
|
253
|
-
Args:
|
254
|
-
experiment_name: MLflow experiment name
|
255
|
-
tracking_uri: MLflow tracking server URI
|
256
|
-
config: Additional configuration
|
257
|
-
"""
|
258
|
-
super().__init__(config)
|
259
|
-
|
260
|
-
if not MLFLOW_AVAILABLE:
|
261
|
-
raise ImportError("mlflow is not installed. Install with: pip install mlflow")
|
262
|
-
|
263
|
-
self.experiment_name = experiment_name
|
264
|
-
|
265
|
-
# Set tracking URI if provided
|
266
|
-
if tracking_uri:
|
267
|
-
mlflow.set_tracking_uri(tracking_uri)
|
268
|
-
|
269
|
-
# Get or create experiment
|
270
|
-
try:
|
271
|
-
self.experiment = mlflow.get_experiment_by_name(experiment_name)
|
272
|
-
if self.experiment is None:
|
273
|
-
experiment_id = mlflow.create_experiment(experiment_name)
|
274
|
-
self.experiment = mlflow.get_experiment(experiment_id)
|
275
|
-
except Exception as e:
|
276
|
-
logger.error(f"Failed to initialize MLflow experiment: {e}")
|
277
|
-
raise
|
278
|
-
|
279
|
-
logger.info(f"Initialized MLflow tracker for experiment: {experiment_name}")
|
280
|
-
|
281
|
-
async def start_run(self, name: str, config: Dict[str, Any]) -> str:
|
282
|
-
"""Start a new MLflow run."""
|
283
|
-
try:
|
284
|
-
mlflow.start_run(
|
285
|
-
experiment_id=self.experiment.experiment_id,
|
286
|
-
run_name=name
|
287
|
-
)
|
288
|
-
|
289
|
-
run = mlflow.active_run()
|
290
|
-
self.active_run_id = run.info.run_id
|
291
|
-
self.is_running = True
|
292
|
-
|
293
|
-
# Log initial config
|
294
|
-
await self.log_params(config)
|
295
|
-
|
296
|
-
logger.info(f"Started MLflow run: {name} (ID: {self.active_run_id})")
|
297
|
-
return self.active_run_id
|
298
|
-
|
299
|
-
except Exception as e:
|
300
|
-
logger.error(f"Failed to start MLflow run: {e}")
|
301
|
-
raise
|
302
|
-
|
303
|
-
async def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> None:
|
304
|
-
"""Log metrics to MLflow."""
|
305
|
-
if not self.is_running:
|
306
|
-
logger.warning("No active MLflow run for logging metrics")
|
307
|
-
return
|
308
|
-
|
309
|
-
try:
|
310
|
-
for key, value in metrics.items():
|
311
|
-
if isinstance(value, (int, float)) and not str(value).lower() in ['nan', 'inf', '-inf']:
|
312
|
-
mlflow.log_metric(key, value, step=step)
|
313
|
-
|
314
|
-
logger.debug(f"Logged {len(metrics)} metrics to MLflow")
|
315
|
-
|
316
|
-
except Exception as e:
|
317
|
-
logger.error(f"Failed to log metrics to MLflow: {e}")
|
318
|
-
|
319
|
-
async def log_params(self, params: Dict[str, Any]) -> None:
|
320
|
-
"""Log parameters to MLflow."""
|
321
|
-
if not self.is_running:
|
322
|
-
logger.warning("No active MLflow run for logging params")
|
323
|
-
return
|
324
|
-
|
325
|
-
try:
|
326
|
-
# Convert complex objects to strings
|
327
|
-
str_params = {}
|
328
|
-
for key, value in params.items():
|
329
|
-
if isinstance(value, (dict, list)):
|
330
|
-
str_params[key] = json.dumps(value)
|
331
|
-
else:
|
332
|
-
str_params[key] = str(value)
|
333
|
-
|
334
|
-
mlflow.log_params(str_params)
|
335
|
-
logger.debug(f"Logged {len(params)} parameters to MLflow")
|
336
|
-
|
337
|
-
except Exception as e:
|
338
|
-
logger.error(f"Failed to log parameters to MLflow: {e}")
|
339
|
-
|
340
|
-
async def log_artifacts(self, artifacts: Dict[str, Any]) -> None:
|
341
|
-
"""Log artifacts to MLflow."""
|
342
|
-
if not self.is_running:
|
343
|
-
logger.warning("No active MLflow run for logging artifacts")
|
344
|
-
return
|
345
|
-
|
346
|
-
try:
|
347
|
-
for name, artifact in artifacts.items():
|
348
|
-
if isinstance(artifact, str):
|
349
|
-
# File path
|
350
|
-
mlflow.log_artifact(artifact)
|
351
|
-
elif isinstance(artifact, dict):
|
352
|
-
# Save as JSON and log
|
353
|
-
artifact_path = f"{name}.json"
|
354
|
-
with open(artifact_path, 'w') as f:
|
355
|
-
json.dump(artifact, f, indent=2)
|
356
|
-
mlflow.log_artifact(artifact_path)
|
357
|
-
|
358
|
-
logger.debug(f"Logged {len(artifacts)} artifacts to MLflow")
|
359
|
-
|
360
|
-
except Exception as e:
|
361
|
-
logger.error(f"Failed to log artifacts to MLflow: {e}")
|
362
|
-
|
363
|
-
async def end_run(self) -> None:
|
364
|
-
"""End the current MLflow run."""
|
365
|
-
if self.is_running:
|
366
|
-
try:
|
367
|
-
mlflow.end_run()
|
368
|
-
logger.info(f"Ended MLflow run: {self.active_run_id}")
|
369
|
-
except Exception as e:
|
370
|
-
logger.error(f"Failed to end MLflow run: {e}")
|
371
|
-
finally:
|
372
|
-
self.active_run_id = None
|
373
|
-
self.is_running = False
|
374
|
-
|
375
|
-
|
376
|
-
class MultiTracker(ExperimentTracker):
|
377
|
-
"""
|
378
|
-
Multi-backend experiment tracker.
|
379
|
-
|
380
|
-
Logs to multiple tracking systems simultaneously for redundancy.
|
381
|
-
"""
|
382
|
-
|
383
|
-
def __init__(self, trackers: List[ExperimentTracker]):
|
384
|
-
"""
|
385
|
-
Initialize multi-tracker.
|
386
|
-
|
387
|
-
Args:
|
388
|
-
trackers: List of tracker instances
|
389
|
-
"""
|
390
|
-
super().__init__()
|
391
|
-
self.trackers = trackers
|
392
|
-
logger.info(f"Initialized multi-tracker with {len(trackers)} backends")
|
393
|
-
|
394
|
-
async def start_run(self, name: str, config: Dict[str, Any]) -> str:
|
395
|
-
"""Start runs on all trackers."""
|
396
|
-
run_ids = []
|
397
|
-
|
398
|
-
for tracker in self.trackers:
|
399
|
-
try:
|
400
|
-
run_id = await tracker.start_run(name, config)
|
401
|
-
run_ids.append(run_id)
|
402
|
-
except Exception as e:
|
403
|
-
logger.error(f"Failed to start run on {type(tracker).__name__}: {e}")
|
404
|
-
|
405
|
-
self.is_running = len(run_ids) > 0
|
406
|
-
self.active_run_id = run_ids[0] if run_ids else None
|
407
|
-
|
408
|
-
return self.active_run_id or "multi_tracker_run"
|
409
|
-
|
410
|
-
async def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> None:
|
411
|
-
"""Log metrics to all trackers."""
|
412
|
-
tasks = []
|
413
|
-
for tracker in self.trackers:
|
414
|
-
tasks.append(tracker.log_metrics(metrics, step))
|
415
|
-
|
416
|
-
# Run all logging tasks concurrently
|
417
|
-
await asyncio.gather(*tasks, return_exceptions=True)
|
418
|
-
|
419
|
-
async def log_params(self, params: Dict[str, Any]) -> None:
|
420
|
-
"""Log parameters to all trackers."""
|
421
|
-
tasks = []
|
422
|
-
for tracker in self.trackers:
|
423
|
-
tasks.append(tracker.log_params(params))
|
424
|
-
|
425
|
-
await asyncio.gather(*tasks, return_exceptions=True)
|
426
|
-
|
427
|
-
async def log_artifacts(self, artifacts: Dict[str, Any]) -> None:
|
428
|
-
"""Log artifacts to all trackers."""
|
429
|
-
tasks = []
|
430
|
-
for tracker in self.trackers:
|
431
|
-
tasks.append(tracker.log_artifacts(artifacts))
|
432
|
-
|
433
|
-
await asyncio.gather(*tasks, return_exceptions=True)
|
434
|
-
|
435
|
-
async def end_run(self) -> None:
|
436
|
-
"""End runs on all trackers."""
|
437
|
-
tasks = []
|
438
|
-
for tracker in self.trackers:
|
439
|
-
tasks.append(tracker.end_run())
|
440
|
-
|
441
|
-
await asyncio.gather(*tasks, return_exceptions=True)
|
442
|
-
|
443
|
-
self.is_running = False
|
444
|
-
self.active_run_id = None
|
445
|
-
|
446
|
-
|
447
|
-
def create_experiment_tracker(tracker_type: str, **kwargs) -> ExperimentTracker:
|
448
|
-
"""
|
449
|
-
Factory function to create experiment trackers.
|
450
|
-
|
451
|
-
Args:
|
452
|
-
tracker_type: Type of tracker ("wandb", "mlflow", "multi")
|
453
|
-
**kwargs: Tracker-specific configuration
|
454
|
-
|
455
|
-
Returns:
|
456
|
-
Configured experiment tracker
|
457
|
-
"""
|
458
|
-
if tracker_type.lower() == "wandb":
|
459
|
-
return WandBTracker(**kwargs)
|
460
|
-
elif tracker_type.lower() == "mlflow":
|
461
|
-
return MLflowTracker(**kwargs)
|
462
|
-
elif tracker_type.lower() == "multi":
|
463
|
-
trackers = kwargs.get("trackers", [])
|
464
|
-
return MultiTracker(trackers)
|
465
|
-
else:
|
466
|
-
raise ValueError(f"Unknown tracker type: {tracker_type}")
|