isa-model 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. isa_model/__init__.py +5 -0
  2. isa_model/core/model_manager.py +143 -0
  3. isa_model/core/model_registry.py +115 -0
  4. isa_model/core/model_router.py +226 -0
  5. isa_model/core/model_storage.py +133 -0
  6. isa_model/core/model_version.py +0 -0
  7. isa_model/core/resource_manager.py +202 -0
  8. isa_model/core/storage/hf_storage.py +0 -0
  9. isa_model/core/storage/local_storage.py +0 -0
  10. isa_model/core/storage/minio_storage.py +0 -0
  11. isa_model/deployment/mlflow_gateway/__init__.py +8 -0
  12. isa_model/deployment/mlflow_gateway/start_gateway.py +65 -0
  13. isa_model/deployment/unified_multimodal_client.py +341 -0
  14. isa_model/inference/__init__.py +11 -0
  15. isa_model/inference/adapter/triton_adapter.py +453 -0
  16. isa_model/inference/adapter/unified_api.py +248 -0
  17. isa_model/inference/ai_factory.py +354 -0
  18. isa_model/inference/backends/Pytorch/bge_embed_backend.py +188 -0
  19. isa_model/inference/backends/Pytorch/gemma_backend.py +167 -0
  20. isa_model/inference/backends/Pytorch/llama_backend.py +166 -0
  21. isa_model/inference/backends/Pytorch/whisper_backend.py +194 -0
  22. isa_model/inference/backends/__init__.py +53 -0
  23. isa_model/inference/backends/base_backend_client.py +26 -0
  24. isa_model/inference/backends/container_services.py +104 -0
  25. isa_model/inference/backends/local_services.py +72 -0
  26. isa_model/inference/backends/openai_client.py +130 -0
  27. isa_model/inference/backends/replicate_client.py +197 -0
  28. isa_model/inference/backends/third_party_services.py +239 -0
  29. isa_model/inference/backends/triton_client.py +97 -0
  30. isa_model/inference/base.py +46 -0
  31. isa_model/inference/client_sdk/__init__.py +0 -0
  32. isa_model/inference/client_sdk/client.py +134 -0
  33. isa_model/inference/client_sdk/client_data_std.py +34 -0
  34. isa_model/inference/client_sdk/client_sdk_schema.py +16 -0
  35. isa_model/inference/client_sdk/exceptions.py +0 -0
  36. isa_model/inference/engine/triton/model_repository/bge/1/model.py +174 -0
  37. isa_model/inference/engine/triton/model_repository/gemma/1/model.py +250 -0
  38. isa_model/inference/engine/triton/model_repository/llama/1/model.py +76 -0
  39. isa_model/inference/engine/triton/model_repository/whisper/1/model.py +195 -0
  40. isa_model/inference/providers/__init__.py +19 -0
  41. isa_model/inference/providers/base_provider.py +30 -0
  42. isa_model/inference/providers/model_cache_manager.py +341 -0
  43. isa_model/inference/providers/ollama_provider.py +73 -0
  44. isa_model/inference/providers/openai_provider.py +87 -0
  45. isa_model/inference/providers/replicate_provider.py +94 -0
  46. isa_model/inference/providers/triton_provider.py +439 -0
  47. isa_model/inference/providers/vllm_provider.py +0 -0
  48. isa_model/inference/providers/yyds_provider.py +83 -0
  49. isa_model/inference/services/__init__.py +14 -0
  50. isa_model/inference/services/audio/fish_speech/handler.py +215 -0
  51. isa_model/inference/services/audio/runpod_tts_fish_service.py +212 -0
  52. isa_model/inference/services/audio/triton_speech_service.py +138 -0
  53. isa_model/inference/services/audio/whisper_service.py +186 -0
  54. isa_model/inference/services/audio/yyds_audio_service.py +71 -0
  55. isa_model/inference/services/base_service.py +106 -0
  56. isa_model/inference/services/base_tts_service.py +66 -0
  57. isa_model/inference/services/embedding/bge_service.py +183 -0
  58. isa_model/inference/services/embedding/ollama_embed_service.py +85 -0
  59. isa_model/inference/services/embedding/ollama_rerank_service.py +118 -0
  60. isa_model/inference/services/embedding/onnx_rerank_service.py +73 -0
  61. isa_model/inference/services/llm/__init__.py +16 -0
  62. isa_model/inference/services/llm/gemma_service.py +143 -0
  63. isa_model/inference/services/llm/llama_service.py +143 -0
  64. isa_model/inference/services/llm/ollama_llm_service.py +108 -0
  65. isa_model/inference/services/llm/openai_llm_service.py +129 -0
  66. isa_model/inference/services/llm/replicate_llm_service.py +179 -0
  67. isa_model/inference/services/llm/triton_llm_service.py +230 -0
  68. isa_model/inference/services/others/table_transformer_service.py +61 -0
  69. isa_model/inference/services/vision/__init__.py +12 -0
  70. isa_model/inference/services/vision/helpers/image_utils.py +58 -0
  71. isa_model/inference/services/vision/helpers/text_splitter.py +46 -0
  72. isa_model/inference/services/vision/ollama_vision_service.py +60 -0
  73. isa_model/inference/services/vision/replicate_vision_service.py +241 -0
  74. isa_model/inference/services/vision/triton_vision_service.py +199 -0
  75. isa_model/inference/services/vision/yyds_vision_service.py +80 -0
  76. isa_model/inference/utils/conversion/bge_rerank_convert.py +73 -0
  77. isa_model/inference/utils/conversion/onnx_converter.py +0 -0
  78. isa_model/inference/utils/conversion/torch_converter.py +0 -0
  79. isa_model/scripts/inference_tracker.py +283 -0
  80. isa_model/scripts/mlflow_manager.py +379 -0
  81. isa_model/scripts/model_registry.py +465 -0
  82. isa_model/scripts/start_mlflow.py +95 -0
  83. isa_model/scripts/training_tracker.py +257 -0
  84. isa_model/training/engine/llama_factory/__init__.py +39 -0
  85. isa_model/training/engine/llama_factory/config.py +115 -0
  86. isa_model/training/engine/llama_factory/data_adapter.py +284 -0
  87. isa_model/training/engine/llama_factory/examples/__init__.py +6 -0
  88. isa_model/training/engine/llama_factory/examples/finetune_with_tracking.py +185 -0
  89. isa_model/training/engine/llama_factory/examples/rlhf_with_tracking.py +163 -0
  90. isa_model/training/engine/llama_factory/factory.py +331 -0
  91. isa_model/training/engine/llama_factory/rl.py +254 -0
  92. isa_model/training/engine/llama_factory/trainer.py +171 -0
  93. isa_model/training/image_model/configs/create_config.py +37 -0
  94. isa_model/training/image_model/configs/create_flux_config.py +26 -0
  95. isa_model/training/image_model/configs/create_lora_config.py +21 -0
  96. isa_model/training/image_model/prepare_massed_compute.py +97 -0
  97. isa_model/training/image_model/prepare_upload.py +17 -0
  98. isa_model/training/image_model/raw_data/create_captions.py +16 -0
  99. isa_model/training/image_model/raw_data/create_lora_captions.py +20 -0
  100. isa_model/training/image_model/raw_data/pre_processing.py +200 -0
  101. isa_model/training/image_model/train/train.py +42 -0
  102. isa_model/training/image_model/train/train_flux.py +41 -0
  103. isa_model/training/image_model/train/train_lora.py +57 -0
  104. isa_model/training/image_model/train_main.py +25 -0
  105. isa_model/training/llm_model/annotation/annotation_schema.py +47 -0
  106. isa_model/training/llm_model/annotation/processors/annotation_processor.py +126 -0
  107. isa_model/training/llm_model/annotation/storage/dataset_manager.py +131 -0
  108. isa_model/training/llm_model/annotation/storage/dataset_schema.py +44 -0
  109. isa_model/training/llm_model/annotation/tests/test_annotation_flow.py +109 -0
  110. isa_model/training/llm_model/annotation/tests/test_minio copy.py +113 -0
  111. isa_model/training/llm_model/annotation/tests/test_minio_upload.py +43 -0
  112. isa_model/training/llm_model/annotation/views/annotation_controller.py +158 -0
  113. isa_model-0.1.0.dist-info/METADATA +116 -0
  114. isa_model-0.1.0.dist-info/RECORD +117 -0
  115. isa_model-0.1.0.dist-info/WHEEL +5 -0
  116. isa_model-0.1.0.dist-info/licenses/LICENSE +21 -0
  117. isa_model-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,73 @@
1
+ import os
2
+ import torch
3
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
4
+ from pathlib import Path
5
+
6
+ def convert_bge_to_onnx(save_dir: str):
7
+ """Convert BGE reranker to ONNX format"""
8
+ try:
9
+ # Create save directory if it doesn't exist
10
+ save_dir = Path(save_dir).resolve() # Get absolute path
11
+ save_dir.mkdir(parents=True, exist_ok=True)
12
+
13
+ model_name = "BAAI/bge-reranker-v2-m3"
14
+ save_path = str(save_dir / "model.onnx") # Convert to string for absolute path
15
+
16
+ print(f"Loading model {model_name}...")
17
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
18
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
19
+ model.eval()
20
+
21
+ # Save tokenizer for later use
22
+ print("Saving tokenizer...")
23
+ tokenizer.save_pretrained(save_dir)
24
+
25
+ # Create dummy input
26
+ print("Creating dummy input...")
27
+ dummy_input = tokenizer(
28
+ [["what is panda?", "The giant panda is a bear species."]],
29
+ padding=True,
30
+ truncation=True,
31
+ return_tensors='pt',
32
+ max_length=512
33
+ )
34
+
35
+ # Export to ONNX with external data storage
36
+ print(f"Exporting to ONNX: {save_path}")
37
+ torch.onnx.export(
38
+ model,
39
+ (dummy_input['input_ids'], dummy_input['attention_mask']),
40
+ save_path, # Using string absolute path
41
+ input_names=['input_ids', 'attention_mask'],
42
+ output_names=['logits'],
43
+ dynamic_axes={
44
+ 'input_ids': {0: 'batch', 1: 'sequence'},
45
+ 'attention_mask': {0: 'batch', 1: 'sequence'},
46
+ 'logits': {0: 'batch'}
47
+ },
48
+ opset_version=16,
49
+ export_params=True, # Export the trained parameter weights
50
+ do_constant_folding=True, # Optimize constant-folding
51
+ verbose=True,
52
+ use_external_data_format=True # Enable external data storage
53
+ )
54
+ print("Conversion completed successfully!")
55
+ return True
56
+
57
+ except Exception as e:
58
+ print(f"Error during conversion: {e}")
59
+ return False
60
+
61
+ if __name__ == "__main__":
62
+ # Get the absolute path to the model directory
63
+ current_dir = Path(__file__).parent.parent
64
+ model_dir = current_dir / "model_converted" / "bge-reranker-v2-m3"
65
+
66
+ success = convert_bge_to_onnx(str(model_dir))
67
+ if success:
68
+ print(f"Model saved to: {model_dir}")
69
+ print("Files created:")
70
+ for file in model_dir.glob('*'):
71
+ print(f"- {file.name}")
72
+ else:
73
+ print("Conversion failed!")
File without changes
@@ -0,0 +1,283 @@
1
+ """
2
+ MLflow tracker for inference workflows.
3
+ """
4
+
5
+ import os
6
+ import json
7
+ import time
8
+ import logging
9
+ from typing import Dict, List, Optional, Any, Union
10
+ from contextlib import contextmanager
11
+
12
+ from .mlflow_manager import MLflowManager, ExperimentType
13
+ from .model_registry import ModelRegistry, ModelStage, ModelVersion
14
+
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class InferenceTracker:
20
+ """
21
+ Tracker for model inference workflows.
22
+
23
+ This class provides utilities to track model inference using MLflow,
24
+ including performance metrics and input/output logging.
25
+
26
+ Example:
27
+ ```python
28
+ # Initialize tracker
29
+ tracker = InferenceTracker(
30
+ tracking_uri="http://localhost:5000"
31
+ )
32
+
33
+ # Get model from registry
34
+ model_version = tracker.get_production_model("llama-7b")
35
+
36
+ # Track inference
37
+ with tracker.track_inference(
38
+ model_name="llama-7b",
39
+ model_version=model_version.version
40
+ ):
41
+ # Start timer
42
+ start_time = time.time()
43
+
44
+ # Generate text
45
+ output = model.generate(prompt)
46
+
47
+ # Log inference
48
+ tracker.log_inference(
49
+ input=prompt,
50
+ output=output,
51
+ latency_ms=(time.time() - start_time) * 1000
52
+ )
53
+ ```
54
+ """
55
+
56
+ def __init__(
57
+ self,
58
+ tracking_uri: Optional[str] = None,
59
+ artifact_uri: Optional[str] = None,
60
+ registry_uri: Optional[str] = None
61
+ ):
62
+ """
63
+ Initialize the inference tracker.
64
+
65
+ Args:
66
+ tracking_uri: URI for MLflow tracking server
67
+ artifact_uri: URI for MLflow artifacts
68
+ registry_uri: URI for MLflow model registry
69
+ """
70
+ self.mlflow_manager = MLflowManager(
71
+ tracking_uri=tracking_uri,
72
+ artifact_uri=artifact_uri,
73
+ registry_uri=registry_uri
74
+ )
75
+ self.model_registry = ModelRegistry(
76
+ tracking_uri=tracking_uri,
77
+ registry_uri=registry_uri
78
+ )
79
+ self.current_run_info = {}
80
+ self.inference_samples = []
81
+
82
+ def get_production_model(self, model_name: str) -> Optional[ModelVersion]:
83
+ """
84
+ Get the production version of a model.
85
+
86
+ Args:
87
+ model_name: Name of the model
88
+
89
+ Returns:
90
+ Production ModelVersion or None if not found
91
+ """
92
+ return self.model_registry.get_latest_model_version(
93
+ name=model_name,
94
+ stage=ModelStage.PRODUCTION
95
+ )
96
+
97
+ def get_staging_model(self, model_name: str) -> Optional[ModelVersion]:
98
+ """
99
+ Get the staging version of a model.
100
+
101
+ Args:
102
+ model_name: Name of the model
103
+
104
+ Returns:
105
+ Staging ModelVersion or None if not found
106
+ """
107
+ return self.model_registry.get_latest_model_version(
108
+ name=model_name,
109
+ stage=ModelStage.STAGING
110
+ )
111
+
112
+ @contextmanager
113
+ def track_inference(
114
+ self,
115
+ model_name: str,
116
+ model_version: Optional[str] = None,
117
+ batch_size: Optional[int] = None,
118
+ tags: Optional[Dict[str, str]] = None
119
+ ):
120
+ """
121
+ Track model inference with MLflow.
122
+
123
+ Args:
124
+ model_name: Name of the model
125
+ model_version: Version of the model
126
+ batch_size: Batch size for inference
127
+ tags: Tags for the run
128
+
129
+ Yields:
130
+ Dictionary with run information
131
+ """
132
+ run_info = {
133
+ "model_name": model_name,
134
+ "model_version": model_version,
135
+ "batch_size": batch_size,
136
+ "start_time": time.time(),
137
+ "metrics": {}
138
+ }
139
+
140
+ # Prepare tags
141
+ if tags is None:
142
+ tags = {}
143
+
144
+ tags["model_name"] = model_name
145
+ if model_version:
146
+ tags["model_version"] = model_version
147
+
148
+ if batch_size:
149
+ tags["batch_size"] = str(batch_size)
150
+
151
+ # Start the MLflow run
152
+ with self.mlflow_manager.start_run(
153
+ experiment_type=ExperimentType.INFERENCE,
154
+ model_name=model_name,
155
+ tags=tags
156
+ ) as run:
157
+ run_info["run_id"] = run.info.run_id
158
+ run_info["experiment_id"] = run.info.experiment_id
159
+
160
+ # Reset inference samples
161
+ self.inference_samples = []
162
+
163
+ self.current_run_info = run_info
164
+ try:
165
+ yield run_info
166
+
167
+ # Calculate and log summary metrics
168
+ self._log_summary_metrics()
169
+
170
+ # Save inference samples
171
+ if self.inference_samples:
172
+ self._save_inference_samples()
173
+
174
+ finally:
175
+ run_info["end_time"] = time.time()
176
+ run_info["duration"] = run_info["end_time"] - run_info["start_time"]
177
+
178
+ # Log duration
179
+ self.mlflow_manager.log_metrics({
180
+ "duration_seconds": run_info["duration"]
181
+ })
182
+
183
+ self.current_run_info = {}
184
+
185
+ def log_inference(
186
+ self,
187
+ input: str,
188
+ output: str,
189
+ latency_ms: Optional[float] = None,
190
+ token_count: Optional[int] = None,
191
+ tokens_per_second: Optional[float] = None,
192
+ metadata: Optional[Dict[str, Any]] = None
193
+ ) -> None:
194
+ """
195
+ Log an inference sample.
196
+
197
+ Args:
198
+ input: Input prompt
199
+ output: Generated output
200
+ latency_ms: Latency in milliseconds
201
+ token_count: Number of tokens generated
202
+ tokens_per_second: Tokens per second
203
+ metadata: Additional metadata
204
+ """
205
+ if not self.current_run_info:
206
+ logger.warning("No active run. Inference will not be logged.")
207
+ return
208
+
209
+ sample = {
210
+ "input": input,
211
+ "output": output,
212
+ "timestamp": time.time()
213
+ }
214
+
215
+ if latency_ms is not None:
216
+ sample["latency_ms"] = latency_ms
217
+
218
+ if token_count is not None:
219
+ sample["token_count"] = token_count
220
+
221
+ if tokens_per_second is not None:
222
+ sample["tokens_per_second"] = tokens_per_second
223
+
224
+ if metadata:
225
+ sample["metadata"] = metadata
226
+
227
+ self.inference_samples.append(sample)
228
+
229
+ # Log individual metrics
230
+ metrics = {}
231
+ if latency_ms is not None:
232
+ metrics["latency_ms"] = latency_ms
233
+
234
+ if token_count is not None:
235
+ metrics["token_count"] = token_count
236
+
237
+ if tokens_per_second is not None:
238
+ metrics["tokens_per_second"] = tokens_per_second
239
+
240
+ if metrics:
241
+ self.mlflow_manager.log_metrics(metrics)
242
+
243
+ def _log_summary_metrics(self) -> None:
244
+ """Log summary metrics based on all inference samples."""
245
+ if not self.inference_samples:
246
+ return
247
+
248
+ latencies = [s.get("latency_ms") for s in self.inference_samples if "latency_ms" in s]
249
+ token_counts = [s.get("token_count") for s in self.inference_samples if "token_count" in s]
250
+ tokens_per_second = [s.get("tokens_per_second") for s in self.inference_samples if "tokens_per_second" in s]
251
+
252
+ metrics = {
253
+ "inference_count": len(self.inference_samples)
254
+ }
255
+
256
+ if latencies:
257
+ metrics["avg_latency_ms"] = sum(latencies) / len(latencies)
258
+ metrics["min_latency_ms"] = min(latencies)
259
+ metrics["max_latency_ms"] = max(latencies)
260
+
261
+ if token_counts:
262
+ metrics["avg_token_count"] = sum(token_counts) / len(token_counts)
263
+ metrics["total_tokens"] = sum(token_counts)
264
+
265
+ if tokens_per_second:
266
+ metrics["avg_tokens_per_second"] = sum(tokens_per_second) / len(tokens_per_second)
267
+
268
+ self.mlflow_manager.log_metrics(metrics)
269
+
270
+ def _save_inference_samples(self) -> None:
271
+ """Save inference samples as an artifact."""
272
+ import tempfile
273
+
274
+ with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as f:
275
+ json.dump(self.inference_samples, f, indent=2)
276
+ temp_path = f.name
277
+
278
+ self.mlflow_manager.log_artifact(temp_path, "inference_samples.json")
279
+
280
+ try:
281
+ os.remove(temp_path)
282
+ except:
283
+ pass