isa-model 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/__init__.py +5 -0
- isa_model/core/model_manager.py +143 -0
- isa_model/core/model_registry.py +115 -0
- isa_model/core/model_router.py +226 -0
- isa_model/core/model_storage.py +133 -0
- isa_model/core/model_version.py +0 -0
- isa_model/core/resource_manager.py +202 -0
- isa_model/core/storage/hf_storage.py +0 -0
- isa_model/core/storage/local_storage.py +0 -0
- isa_model/core/storage/minio_storage.py +0 -0
- isa_model/deployment/mlflow_gateway/__init__.py +8 -0
- isa_model/deployment/mlflow_gateway/start_gateway.py +65 -0
- isa_model/deployment/unified_multimodal_client.py +341 -0
- isa_model/inference/__init__.py +11 -0
- isa_model/inference/adapter/triton_adapter.py +453 -0
- isa_model/inference/adapter/unified_api.py +248 -0
- isa_model/inference/ai_factory.py +354 -0
- isa_model/inference/backends/Pytorch/bge_embed_backend.py +188 -0
- isa_model/inference/backends/Pytorch/gemma_backend.py +167 -0
- isa_model/inference/backends/Pytorch/llama_backend.py +166 -0
- isa_model/inference/backends/Pytorch/whisper_backend.py +194 -0
- isa_model/inference/backends/__init__.py +53 -0
- isa_model/inference/backends/base_backend_client.py +26 -0
- isa_model/inference/backends/container_services.py +104 -0
- isa_model/inference/backends/local_services.py +72 -0
- isa_model/inference/backends/openai_client.py +130 -0
- isa_model/inference/backends/replicate_client.py +197 -0
- isa_model/inference/backends/third_party_services.py +239 -0
- isa_model/inference/backends/triton_client.py +97 -0
- isa_model/inference/base.py +46 -0
- isa_model/inference/client_sdk/__init__.py +0 -0
- isa_model/inference/client_sdk/client.py +134 -0
- isa_model/inference/client_sdk/client_data_std.py +34 -0
- isa_model/inference/client_sdk/client_sdk_schema.py +16 -0
- isa_model/inference/client_sdk/exceptions.py +0 -0
- isa_model/inference/engine/triton/model_repository/bge/1/model.py +174 -0
- isa_model/inference/engine/triton/model_repository/gemma/1/model.py +250 -0
- isa_model/inference/engine/triton/model_repository/llama/1/model.py +76 -0
- isa_model/inference/engine/triton/model_repository/whisper/1/model.py +195 -0
- isa_model/inference/providers/__init__.py +19 -0
- isa_model/inference/providers/base_provider.py +30 -0
- isa_model/inference/providers/model_cache_manager.py +341 -0
- isa_model/inference/providers/ollama_provider.py +73 -0
- isa_model/inference/providers/openai_provider.py +87 -0
- isa_model/inference/providers/replicate_provider.py +94 -0
- isa_model/inference/providers/triton_provider.py +439 -0
- isa_model/inference/providers/vllm_provider.py +0 -0
- isa_model/inference/providers/yyds_provider.py +83 -0
- isa_model/inference/services/__init__.py +14 -0
- isa_model/inference/services/audio/fish_speech/handler.py +215 -0
- isa_model/inference/services/audio/runpod_tts_fish_service.py +212 -0
- isa_model/inference/services/audio/triton_speech_service.py +138 -0
- isa_model/inference/services/audio/whisper_service.py +186 -0
- isa_model/inference/services/audio/yyds_audio_service.py +71 -0
- isa_model/inference/services/base_service.py +106 -0
- isa_model/inference/services/base_tts_service.py +66 -0
- isa_model/inference/services/embedding/bge_service.py +183 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +85 -0
- isa_model/inference/services/embedding/ollama_rerank_service.py +118 -0
- isa_model/inference/services/embedding/onnx_rerank_service.py +73 -0
- isa_model/inference/services/llm/__init__.py +16 -0
- isa_model/inference/services/llm/gemma_service.py +143 -0
- isa_model/inference/services/llm/llama_service.py +143 -0
- isa_model/inference/services/llm/ollama_llm_service.py +108 -0
- isa_model/inference/services/llm/openai_llm_service.py +129 -0
- isa_model/inference/services/llm/replicate_llm_service.py +179 -0
- isa_model/inference/services/llm/triton_llm_service.py +230 -0
- isa_model/inference/services/others/table_transformer_service.py +61 -0
- isa_model/inference/services/vision/__init__.py +12 -0
- isa_model/inference/services/vision/helpers/image_utils.py +58 -0
- isa_model/inference/services/vision/helpers/text_splitter.py +46 -0
- isa_model/inference/services/vision/ollama_vision_service.py +60 -0
- isa_model/inference/services/vision/replicate_vision_service.py +241 -0
- isa_model/inference/services/vision/triton_vision_service.py +199 -0
- isa_model/inference/services/vision/yyds_vision_service.py +80 -0
- isa_model/inference/utils/conversion/bge_rerank_convert.py +73 -0
- isa_model/inference/utils/conversion/onnx_converter.py +0 -0
- isa_model/inference/utils/conversion/torch_converter.py +0 -0
- isa_model/scripts/inference_tracker.py +283 -0
- isa_model/scripts/mlflow_manager.py +379 -0
- isa_model/scripts/model_registry.py +465 -0
- isa_model/scripts/start_mlflow.py +95 -0
- isa_model/scripts/training_tracker.py +257 -0
- isa_model/training/engine/llama_factory/__init__.py +39 -0
- isa_model/training/engine/llama_factory/config.py +115 -0
- isa_model/training/engine/llama_factory/data_adapter.py +284 -0
- isa_model/training/engine/llama_factory/examples/__init__.py +6 -0
- isa_model/training/engine/llama_factory/examples/finetune_with_tracking.py +185 -0
- isa_model/training/engine/llama_factory/examples/rlhf_with_tracking.py +163 -0
- isa_model/training/engine/llama_factory/factory.py +331 -0
- isa_model/training/engine/llama_factory/rl.py +254 -0
- isa_model/training/engine/llama_factory/trainer.py +171 -0
- isa_model/training/image_model/configs/create_config.py +37 -0
- isa_model/training/image_model/configs/create_flux_config.py +26 -0
- isa_model/training/image_model/configs/create_lora_config.py +21 -0
- isa_model/training/image_model/prepare_massed_compute.py +97 -0
- isa_model/training/image_model/prepare_upload.py +17 -0
- isa_model/training/image_model/raw_data/create_captions.py +16 -0
- isa_model/training/image_model/raw_data/create_lora_captions.py +20 -0
- isa_model/training/image_model/raw_data/pre_processing.py +200 -0
- isa_model/training/image_model/train/train.py +42 -0
- isa_model/training/image_model/train/train_flux.py +41 -0
- isa_model/training/image_model/train/train_lora.py +57 -0
- isa_model/training/image_model/train_main.py +25 -0
- isa_model/training/llm_model/annotation/annotation_schema.py +47 -0
- isa_model/training/llm_model/annotation/processors/annotation_processor.py +126 -0
- isa_model/training/llm_model/annotation/storage/dataset_manager.py +131 -0
- isa_model/training/llm_model/annotation/storage/dataset_schema.py +44 -0
- isa_model/training/llm_model/annotation/tests/test_annotation_flow.py +109 -0
- isa_model/training/llm_model/annotation/tests/test_minio copy.py +113 -0
- isa_model/training/llm_model/annotation/tests/test_minio_upload.py +43 -0
- isa_model/training/llm_model/annotation/views/annotation_controller.py +158 -0
- isa_model-0.1.0.dist-info/METADATA +116 -0
- isa_model-0.1.0.dist-info/RECORD +117 -0
- isa_model-0.1.0.dist-info/WHEEL +5 -0
- isa_model-0.1.0.dist-info/licenses/LICENSE +21 -0
- isa_model-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,379 @@
|
|
1
|
+
"""
|
2
|
+
MLflow manager for experiment tracking and model management.
|
3
|
+
"""
|
4
|
+
|
5
|
+
import os
|
6
|
+
import logging
|
7
|
+
from enum import Enum
|
8
|
+
from typing import Dict, List, Optional, Any, Union
|
9
|
+
import mlflow
|
10
|
+
from mlflow.tracking import MlflowClient
|
11
|
+
|
12
|
+
logger = logging.getLogger(__name__)
|
13
|
+
|
14
|
+
|
15
|
+
class ExperimentType(str, Enum):
|
16
|
+
"""Types of experiments that can be tracked."""
|
17
|
+
|
18
|
+
TRAINING = "training"
|
19
|
+
FINETUNING = "finetuning"
|
20
|
+
REINFORCEMENT_LEARNING = "rl"
|
21
|
+
INFERENCE = "inference"
|
22
|
+
EVALUATION = "evaluation"
|
23
|
+
|
24
|
+
|
25
|
+
class MLflowManager:
|
26
|
+
"""
|
27
|
+
Manager class for MLflow operations.
|
28
|
+
|
29
|
+
This class provides methods to set up MLflow, track experiments,
|
30
|
+
log metrics, and manage models.
|
31
|
+
|
32
|
+
Example:
|
33
|
+
```python
|
34
|
+
# Initialize MLflow manager
|
35
|
+
mlflow_manager = MLflowManager(
|
36
|
+
tracking_uri="http://localhost:5000",
|
37
|
+
artifact_uri="s3://bucket/artifacts"
|
38
|
+
)
|
39
|
+
|
40
|
+
# Set up experiment and start run
|
41
|
+
with mlflow_manager.start_run(
|
42
|
+
experiment_type=ExperimentType.FINETUNING,
|
43
|
+
model_name="llama-7b"
|
44
|
+
) as run:
|
45
|
+
# Log parameters
|
46
|
+
mlflow_manager.log_params({
|
47
|
+
"learning_rate": 2e-5,
|
48
|
+
"batch_size": 8
|
49
|
+
})
|
50
|
+
|
51
|
+
# Train model...
|
52
|
+
|
53
|
+
# Log metrics
|
54
|
+
mlflow_manager.log_metrics({
|
55
|
+
"accuracy": 0.95,
|
56
|
+
"loss": 0.02
|
57
|
+
})
|
58
|
+
|
59
|
+
# Log model
|
60
|
+
mlflow_manager.log_model(
|
61
|
+
model_path="/path/to/model",
|
62
|
+
name="finetuned-llama-7b"
|
63
|
+
)
|
64
|
+
```
|
65
|
+
"""
|
66
|
+
|
67
|
+
def __init__(
|
68
|
+
self,
|
69
|
+
tracking_uri: Optional[str] = None,
|
70
|
+
artifact_uri: Optional[str] = None,
|
71
|
+
registry_uri: Optional[str] = None
|
72
|
+
):
|
73
|
+
"""
|
74
|
+
Initialize the MLflow manager.
|
75
|
+
|
76
|
+
Args:
|
77
|
+
tracking_uri: URI for MLflow tracking server
|
78
|
+
artifact_uri: URI for MLflow artifacts
|
79
|
+
registry_uri: URI for MLflow model registry
|
80
|
+
"""
|
81
|
+
self.tracking_uri = tracking_uri or os.environ.get("MLFLOW_TRACKING_URI", "")
|
82
|
+
self.artifact_uri = artifact_uri or os.environ.get("MLFLOW_ARTIFACT_URI", "")
|
83
|
+
self.registry_uri = registry_uri or os.environ.get("MLFLOW_REGISTRY_URI", "")
|
84
|
+
|
85
|
+
self._setup_mlflow()
|
86
|
+
self.client = MlflowClient(tracking_uri=self.tracking_uri, registry_uri=self.registry_uri)
|
87
|
+
self.active_run = None
|
88
|
+
|
89
|
+
def _setup_mlflow(self) -> None:
|
90
|
+
"""Set up MLflow configuration."""
|
91
|
+
if self.tracking_uri:
|
92
|
+
mlflow.set_tracking_uri(self.tracking_uri)
|
93
|
+
logger.info(f"Set MLflow tracking URI to {self.tracking_uri}")
|
94
|
+
|
95
|
+
if self.registry_uri:
|
96
|
+
mlflow.set_registry_uri(self.registry_uri)
|
97
|
+
logger.info(f"Set MLflow registry URI to {self.registry_uri}")
|
98
|
+
|
99
|
+
def create_experiment(
|
100
|
+
self,
|
101
|
+
experiment_type: ExperimentType,
|
102
|
+
model_name: str,
|
103
|
+
tags: Optional[Dict[str, str]] = None
|
104
|
+
) -> str:
|
105
|
+
"""
|
106
|
+
Create a new experiment if it doesn't exist.
|
107
|
+
|
108
|
+
Args:
|
109
|
+
experiment_type: Type of experiment
|
110
|
+
model_name: Name of the model
|
111
|
+
tags: Tags for the experiment
|
112
|
+
|
113
|
+
Returns:
|
114
|
+
ID of the experiment
|
115
|
+
"""
|
116
|
+
experiment_name = f"{model_name}_{experiment_type.value}"
|
117
|
+
|
118
|
+
# Get experiment if exists, create if not
|
119
|
+
experiment = mlflow.get_experiment_by_name(experiment_name)
|
120
|
+
if experiment is None:
|
121
|
+
experiment_id = mlflow.create_experiment(
|
122
|
+
name=experiment_name,
|
123
|
+
artifact_location=self.artifact_uri if self.artifact_uri else None,
|
124
|
+
tags=tags
|
125
|
+
)
|
126
|
+
logger.info(f"Created new experiment: {experiment_name} (ID: {experiment_id})")
|
127
|
+
else:
|
128
|
+
experiment_id = experiment.experiment_id
|
129
|
+
logger.info(f"Using existing experiment: {experiment_name} (ID: {experiment_id})")
|
130
|
+
|
131
|
+
return experiment_id
|
132
|
+
|
133
|
+
def start_run(
|
134
|
+
self,
|
135
|
+
experiment_type: ExperimentType,
|
136
|
+
model_name: str,
|
137
|
+
run_name: Optional[str] = None,
|
138
|
+
tags: Optional[Dict[str, str]] = None,
|
139
|
+
nested: bool = False
|
140
|
+
) -> mlflow.ActiveRun:
|
141
|
+
"""
|
142
|
+
Start a new MLflow run.
|
143
|
+
|
144
|
+
Args:
|
145
|
+
experiment_type: Type of experiment
|
146
|
+
model_name: Name of the model
|
147
|
+
run_name: Name for the run
|
148
|
+
tags: Tags for the run
|
149
|
+
nested: Whether this is a nested run
|
150
|
+
|
151
|
+
Returns:
|
152
|
+
MLflow active run context
|
153
|
+
"""
|
154
|
+
experiment_id = self.create_experiment(experiment_type, model_name)
|
155
|
+
|
156
|
+
if not run_name:
|
157
|
+
import datetime
|
158
|
+
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
159
|
+
run_name = f"{model_name}_{experiment_type.value}_{timestamp}"
|
160
|
+
|
161
|
+
self.active_run = mlflow.start_run(
|
162
|
+
experiment_id=experiment_id,
|
163
|
+
run_name=run_name,
|
164
|
+
tags=tags,
|
165
|
+
nested=nested
|
166
|
+
)
|
167
|
+
|
168
|
+
logger.info(f"Started MLflow run: {run_name} (ID: {self.active_run.info.run_id})")
|
169
|
+
return self.active_run
|
170
|
+
|
171
|
+
def end_run(self) -> None:
|
172
|
+
"""End the current MLflow run."""
|
173
|
+
if mlflow.active_run():
|
174
|
+
run_id = mlflow.active_run().info.run_id
|
175
|
+
mlflow.end_run()
|
176
|
+
logger.info(f"Ended MLflow run: {run_id}")
|
177
|
+
self.active_run = None
|
178
|
+
|
179
|
+
def log_params(self, params: Dict[str, Any]) -> None:
|
180
|
+
"""
|
181
|
+
Log parameters to the current run.
|
182
|
+
|
183
|
+
Args:
|
184
|
+
params: Dictionary of parameters to log
|
185
|
+
"""
|
186
|
+
if not mlflow.active_run():
|
187
|
+
logger.warning("No active run. Parameters will not be logged.")
|
188
|
+
return
|
189
|
+
|
190
|
+
mlflow.log_params(params)
|
191
|
+
logger.debug(f"Logged parameters: {params}")
|
192
|
+
|
193
|
+
def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> None:
|
194
|
+
"""
|
195
|
+
Log metrics to the current run.
|
196
|
+
|
197
|
+
Args:
|
198
|
+
metrics: Dictionary of metrics to log
|
199
|
+
step: Step value for the metrics
|
200
|
+
"""
|
201
|
+
if not mlflow.active_run():
|
202
|
+
logger.warning("No active run. Metrics will not be logged.")
|
203
|
+
return
|
204
|
+
|
205
|
+
mlflow.log_metrics(metrics, step=step)
|
206
|
+
logger.debug(f"Logged metrics: {metrics}")
|
207
|
+
|
208
|
+
def log_model(
|
209
|
+
self,
|
210
|
+
model_path: str,
|
211
|
+
name: str,
|
212
|
+
flavor: str = "pyfunc",
|
213
|
+
**kwargs
|
214
|
+
) -> str:
|
215
|
+
"""
|
216
|
+
Log a model to MLflow.
|
217
|
+
|
218
|
+
Args:
|
219
|
+
model_path: Path to the model
|
220
|
+
name: Name for the logged model
|
221
|
+
flavor: MLflow model flavor
|
222
|
+
**kwargs: Additional arguments for model logging
|
223
|
+
|
224
|
+
Returns:
|
225
|
+
Path where the model is logged
|
226
|
+
"""
|
227
|
+
if not mlflow.active_run():
|
228
|
+
logger.warning("No active run. Model will not be logged.")
|
229
|
+
return ""
|
230
|
+
|
231
|
+
log_func = getattr(mlflow, f"log_{flavor}")
|
232
|
+
if not log_func:
|
233
|
+
logger.warning(f"Unsupported model flavor: {flavor}. Using pyfunc instead.")
|
234
|
+
log_func = mlflow.pyfunc.log_model
|
235
|
+
|
236
|
+
artifact_path = f"models/{name}"
|
237
|
+
logged_model = log_func(
|
238
|
+
artifact_path=artifact_path,
|
239
|
+
path=model_path,
|
240
|
+
**kwargs
|
241
|
+
)
|
242
|
+
|
243
|
+
logger.info(f"Logged model: {name} at {artifact_path}")
|
244
|
+
return artifact_path
|
245
|
+
|
246
|
+
def log_artifact(self, local_path: str, artifact_path: Optional[str] = None) -> None:
|
247
|
+
"""
|
248
|
+
Log an artifact to MLflow.
|
249
|
+
|
250
|
+
Args:
|
251
|
+
local_path: Local path to the artifact
|
252
|
+
artifact_path: Path for the artifact in MLflow
|
253
|
+
"""
|
254
|
+
if not mlflow.active_run():
|
255
|
+
logger.warning("No active run. Artifact will not be logged.")
|
256
|
+
return
|
257
|
+
|
258
|
+
mlflow.log_artifact(local_path, artifact_path)
|
259
|
+
logger.debug(f"Logged artifact: {local_path} to {artifact_path or 'root'}")
|
260
|
+
|
261
|
+
def log_artifacts(self, local_dir: str, artifact_path: Optional[str] = None) -> None:
|
262
|
+
"""
|
263
|
+
Log multiple artifacts to MLflow.
|
264
|
+
|
265
|
+
Args:
|
266
|
+
local_dir: Local directory containing artifacts
|
267
|
+
artifact_path: Path for the artifacts in MLflow
|
268
|
+
"""
|
269
|
+
if not mlflow.active_run():
|
270
|
+
logger.warning("No active run. Artifacts will not be logged.")
|
271
|
+
return
|
272
|
+
|
273
|
+
mlflow.log_artifacts(local_dir, artifact_path)
|
274
|
+
logger.debug(f"Logged artifacts from directory: {local_dir} to {artifact_path or 'root'}")
|
275
|
+
|
276
|
+
def get_run(self, run_id: str) -> Optional[mlflow.entities.Run]:
|
277
|
+
"""
|
278
|
+
Get a run by ID.
|
279
|
+
|
280
|
+
Args:
|
281
|
+
run_id: ID of the run
|
282
|
+
|
283
|
+
Returns:
|
284
|
+
MLflow run entity or None if not found
|
285
|
+
"""
|
286
|
+
try:
|
287
|
+
return self.client.get_run(run_id)
|
288
|
+
except mlflow.exceptions.MlflowException as e:
|
289
|
+
logger.error(f"Failed to get run {run_id}: {e}")
|
290
|
+
return None
|
291
|
+
|
292
|
+
def search_runs(
|
293
|
+
self,
|
294
|
+
experiment_ids: List[str],
|
295
|
+
filter_string: Optional[str] = None,
|
296
|
+
max_results: int = 100
|
297
|
+
) -> List[mlflow.entities.Run]:
|
298
|
+
"""
|
299
|
+
Search for runs in the given experiments.
|
300
|
+
|
301
|
+
Args:
|
302
|
+
experiment_ids: List of experiment IDs
|
303
|
+
filter_string: Filter string for the search
|
304
|
+
max_results: Maximum number of results to return
|
305
|
+
|
306
|
+
Returns:
|
307
|
+
List of MLflow run entities
|
308
|
+
"""
|
309
|
+
try:
|
310
|
+
return self.client.search_runs(
|
311
|
+
experiment_ids=experiment_ids,
|
312
|
+
filter_string=filter_string,
|
313
|
+
max_results=max_results
|
314
|
+
)
|
315
|
+
except mlflow.exceptions.MlflowException as e:
|
316
|
+
logger.error(f"Failed to search runs: {e}")
|
317
|
+
return []
|
318
|
+
|
319
|
+
def get_experiment_id_by_name(self, experiment_name: str) -> Optional[str]:
|
320
|
+
"""
|
321
|
+
Get experiment ID by name.
|
322
|
+
|
323
|
+
Args:
|
324
|
+
experiment_name: Name of the experiment
|
325
|
+
|
326
|
+
Returns:
|
327
|
+
Experiment ID or None if not found
|
328
|
+
"""
|
329
|
+
experiment = mlflow.get_experiment_by_name(experiment_name)
|
330
|
+
if experiment:
|
331
|
+
return experiment.experiment_id
|
332
|
+
return None
|
333
|
+
|
334
|
+
def set_tracking_tag(self, key: str, value: str) -> None:
|
335
|
+
"""
|
336
|
+
Set a tag for the current run.
|
337
|
+
|
338
|
+
Args:
|
339
|
+
key: Tag key
|
340
|
+
value: Tag value
|
341
|
+
"""
|
342
|
+
if not mlflow.active_run():
|
343
|
+
logger.warning("No active run. Tag will not be set.")
|
344
|
+
return
|
345
|
+
|
346
|
+
mlflow.set_tag(key, value)
|
347
|
+
logger.debug(f"Set tag: {key}={value}")
|
348
|
+
|
349
|
+
def create_model_version(
|
350
|
+
self,
|
351
|
+
name: str,
|
352
|
+
source: str,
|
353
|
+
description: Optional[str] = None,
|
354
|
+
tags: Optional[Dict[str, str]] = None
|
355
|
+
) -> Optional[str]:
|
356
|
+
"""
|
357
|
+
Create a new model version in the registry.
|
358
|
+
|
359
|
+
Args:
|
360
|
+
name: Name of the registered model
|
361
|
+
source: Source path of the model
|
362
|
+
description: Description for the model version
|
363
|
+
tags: Tags for the model version
|
364
|
+
|
365
|
+
Returns:
|
366
|
+
Version of the created model or None if creation failed
|
367
|
+
"""
|
368
|
+
try:
|
369
|
+
version = self.client.create_model_version(
|
370
|
+
name=name,
|
371
|
+
source=source,
|
372
|
+
description=description,
|
373
|
+
tags=tags
|
374
|
+
)
|
375
|
+
logger.info(f"Created model version: {name} v{version.version}")
|
376
|
+
return version.version
|
377
|
+
except mlflow.exceptions.MlflowException as e:
|
378
|
+
logger.error(f"Failed to create model version: {e}")
|
379
|
+
return None
|