isa-model 0.2.0__py3-none-any.whl → 0.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. isa_model/__init__.py +1 -1
  2. isa_model/core/storage/hf_storage.py +419 -0
  3. isa_model/deployment/__init__.py +52 -0
  4. isa_model/deployment/core/__init__.py +34 -0
  5. isa_model/deployment/core/deployment_config.py +356 -0
  6. isa_model/deployment/core/deployment_manager.py +549 -0
  7. isa_model/deployment/core/isa_deployment_service.py +401 -0
  8. isa_model/eval/factory.py +381 -140
  9. isa_model/inference/ai_factory.py +142 -240
  10. isa_model/inference/providers/ml_provider.py +50 -0
  11. isa_model/inference/services/audio/openai_tts_service.py +104 -3
  12. isa_model/inference/services/embedding/base_embed_service.py +112 -0
  13. isa_model/inference/services/embedding/ollama_embed_service.py +28 -2
  14. isa_model/inference/services/llm/__init__.py +2 -0
  15. isa_model/inference/services/llm/base_llm_service.py +111 -1
  16. isa_model/inference/services/llm/ollama_llm_service.py +234 -26
  17. isa_model/inference/services/llm/openai_llm_service.py +225 -28
  18. isa_model/inference/services/llm/triton_llm_service.py +481 -0
  19. isa_model/inference/services/ml/base_ml_service.py +78 -0
  20. isa_model/inference/services/ml/sklearn_ml_service.py +140 -0
  21. isa_model/inference/services/vision/__init__.py +3 -3
  22. isa_model/inference/services/vision/base_image_gen_service.py +161 -0
  23. isa_model/inference/services/vision/base_vision_service.py +177 -0
  24. isa_model/inference/services/vision/ollama_vision_service.py +143 -17
  25. isa_model/inference/services/vision/replicate_image_gen_service.py +139 -7
  26. isa_model/training/__init__.py +62 -32
  27. isa_model/training/cloud/__init__.py +22 -0
  28. isa_model/training/cloud/job_orchestrator.py +402 -0
  29. isa_model/training/cloud/runpod_trainer.py +454 -0
  30. isa_model/training/cloud/storage_manager.py +482 -0
  31. isa_model/training/core/__init__.py +23 -0
  32. isa_model/training/core/config.py +181 -0
  33. isa_model/training/core/dataset.py +222 -0
  34. isa_model/training/core/trainer.py +720 -0
  35. isa_model/training/core/utils.py +213 -0
  36. isa_model/training/factory.py +229 -198
  37. isa_model-0.2.8.dist-info/METADATA +465 -0
  38. isa_model-0.2.8.dist-info/RECORD +86 -0
  39. isa_model/core/model_router.py +0 -226
  40. isa_model/core/model_version.py +0 -0
  41. isa_model/core/resource_manager.py +0 -202
  42. isa_model/deployment/gpu_fp16_ds8/models/deepseek_r1/1/model.py +0 -120
  43. isa_model/deployment/gpu_fp16_ds8/scripts/download_model.py +0 -18
  44. isa_model/training/engine/llama_factory/__init__.py +0 -39
  45. isa_model/training/engine/llama_factory/config.py +0 -115
  46. isa_model/training/engine/llama_factory/data_adapter.py +0 -284
  47. isa_model/training/engine/llama_factory/examples/__init__.py +0 -6
  48. isa_model/training/engine/llama_factory/examples/finetune_with_tracking.py +0 -185
  49. isa_model/training/engine/llama_factory/examples/rlhf_with_tracking.py +0 -163
  50. isa_model/training/engine/llama_factory/factory.py +0 -331
  51. isa_model/training/engine/llama_factory/rl.py +0 -254
  52. isa_model/training/engine/llama_factory/trainer.py +0 -171
  53. isa_model/training/image_model/configs/create_config.py +0 -37
  54. isa_model/training/image_model/configs/create_flux_config.py +0 -26
  55. isa_model/training/image_model/configs/create_lora_config.py +0 -21
  56. isa_model/training/image_model/prepare_massed_compute.py +0 -97
  57. isa_model/training/image_model/prepare_upload.py +0 -17
  58. isa_model/training/image_model/raw_data/create_captions.py +0 -16
  59. isa_model/training/image_model/raw_data/create_lora_captions.py +0 -20
  60. isa_model/training/image_model/raw_data/pre_processing.py +0 -200
  61. isa_model/training/image_model/train/train.py +0 -42
  62. isa_model/training/image_model/train/train_flux.py +0 -41
  63. isa_model/training/image_model/train/train_lora.py +0 -57
  64. isa_model/training/image_model/train_main.py +0 -25
  65. isa_model-0.2.0.dist-info/METADATA +0 -327
  66. isa_model-0.2.0.dist-info/RECORD +0 -92
  67. isa_model-0.2.0.dist-info/licenses/LICENSE +0 -21
  68. /isa_model/training/{llm_model/annotation → annotation}/annotation_schema.py +0 -0
  69. /isa_model/training/{llm_model/annotation → annotation}/processors/annotation_processor.py +0 -0
  70. /isa_model/training/{llm_model/annotation → annotation}/storage/dataset_manager.py +0 -0
  71. /isa_model/training/{llm_model/annotation → annotation}/storage/dataset_schema.py +0 -0
  72. /isa_model/training/{llm_model/annotation → annotation}/tests/test_annotation_flow.py +0 -0
  73. /isa_model/training/{llm_model/annotation → annotation}/tests/test_minio copy.py +0 -0
  74. /isa_model/training/{llm_model/annotation → annotation}/tests/test_minio_upload.py +0 -0
  75. /isa_model/training/{llm_model/annotation → annotation}/views/annotation_controller.py +0 -0
  76. {isa_model-0.2.0.dist-info → isa_model-0.2.8.dist-info}/WHEEL +0 -0
  77. {isa_model-0.2.0.dist-info → isa_model-0.2.8.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,356 @@
1
+ """
2
+ Deployment Configuration Classes
3
+
4
+ Defines configuration classes for different deployment scenarios including
5
+ RunPod serverless, Triton inference server, and TensorRT-LLM backend.
6
+ """
7
+
8
+ from dataclasses import dataclass, field
9
+ from typing import Optional, Dict, Any, List
10
+ from enum import Enum
11
+ from pathlib import Path
12
+
13
+
14
+ class DeploymentProvider(str, Enum):
15
+ """Deployment providers"""
16
+ RUNPOD_SERVERLESS = "runpod_serverless"
17
+ RUNPOD_PODS = "runpod_pods"
18
+ AWS_LAMBDA = "aws_lambda"
19
+ GOOGLE_CLOUD_RUN = "google_cloud_run"
20
+ AZURE_CONTAINER_INSTANCES = "azure_container_instances"
21
+ LOCAL = "local"
22
+
23
+
24
+ class InferenceEngine(str, Enum):
25
+ """Inference engines"""
26
+ TRITON = "triton"
27
+ VLLM = "vllm"
28
+ TENSORRT_LLM = "tensorrt_llm"
29
+ HUGGINGFACE = "huggingface"
30
+ ONNX = "onnx"
31
+ TORCHSCRIPT = "torchscript"
32
+
33
+
34
+ class ModelFormat(str, Enum):
35
+ """Model formats for deployment"""
36
+ HUGGINGFACE = "huggingface"
37
+ TENSORRT = "tensorrt"
38
+ ONNX = "onnx"
39
+ TORCHSCRIPT = "torchscript"
40
+ SAFETENSORS = "safetensors"
41
+
42
+
43
+ @dataclass
44
+ class TritonConfig:
45
+ """Configuration for Triton Inference Server"""
46
+
47
+ # Model repository configuration
48
+ model_repository: str = "/models"
49
+ model_name: str = "model"
50
+ model_version: str = "1"
51
+
52
+ # Backend configuration
53
+ backend: str = "tensorrtllm" # tensorrtllm, python, onnxruntime
54
+ max_batch_size: int = 8
55
+ max_sequence_length: int = 2048
56
+
57
+ # TensorRT-LLM specific
58
+ tensorrt_llm_model_dir: str = "/models/tensorrt_llm"
59
+ engine_dir: str = "/models/engines"
60
+ tokenizer_dir: str = "/models/tokenizer"
61
+
62
+ # Performance settings
63
+ instance_group_count: int = 1
64
+ instance_group_kind: str = "KIND_GPU" # KIND_GPU, KIND_CPU
65
+
66
+ # Memory settings
67
+ optimization_level: str = "OPTIMIZATION_LEVEL_ENABLED"
68
+ enable_pinned_input: bool = True
69
+ enable_pinned_output: bool = True
70
+
71
+ def to_dict(self) -> Dict[str, Any]:
72
+ """Convert to dictionary"""
73
+ return self.__dict__.copy()
74
+
75
+
76
+ @dataclass
77
+ class RunPodServerlessConfig:
78
+ """Configuration for RunPod Serverless deployment"""
79
+
80
+ # RunPod settings
81
+ api_key: str
82
+ endpoint_id: Optional[str] = None
83
+ template_id: Optional[str] = None
84
+
85
+ # Container configuration
86
+ container_image: str = "runpod/pytorch:2.1.0-py3.10-cuda11.8.0-devel-ubuntu22.04"
87
+ container_disk_in_gb: int = 20
88
+
89
+ # GPU configuration
90
+ gpu_type: str = "NVIDIA RTX A6000"
91
+ gpu_count: int = 1
92
+
93
+ # Scaling configuration
94
+ min_workers: int = 0
95
+ max_workers: int = 3
96
+ idle_timeout: int = 5 # seconds
97
+
98
+ # Network configuration
99
+ network_volume_id: Optional[str] = None
100
+
101
+ # Environment variables
102
+ env_vars: Dict[str, str] = field(default_factory=dict)
103
+
104
+ def to_dict(self) -> Dict[str, Any]:
105
+ """Convert to dictionary"""
106
+ return self.__dict__.copy()
107
+
108
+
109
+ @dataclass
110
+ class ModelConfig:
111
+ """Configuration for model deployment"""
112
+
113
+ # Model identification
114
+ model_id: str
115
+ model_name: str
116
+ model_version: str = "1.0.0"
117
+
118
+ # Model source
119
+ source_type: str = "huggingface" # huggingface, local, s3, gcs
120
+ source_path: str = ""
121
+
122
+ # Model format and engine
123
+ model_format: ModelFormat = ModelFormat.HUGGINGFACE
124
+ inference_engine: InferenceEngine = InferenceEngine.TRITON
125
+
126
+ # Model metadata
127
+ model_type: str = "llm" # llm, embedding, vision, audio
128
+ capabilities: List[str] = field(default_factory=lambda: ["text_generation"])
129
+
130
+ # Performance configuration
131
+ max_batch_size: int = 8
132
+ max_sequence_length: int = 2048
133
+ dtype: str = "float16" # float32, float16, int8, int4
134
+
135
+ # Optimization settings
136
+ use_tensorrt: bool = True
137
+ use_quantization: bool = False
138
+ quantization_method: str = "int8" # int8, int4, awq, gptq
139
+
140
+ def to_dict(self) -> Dict[str, Any]:
141
+ """Convert to dictionary"""
142
+ return self.__dict__.copy()
143
+
144
+
145
+ @dataclass
146
+ class DeploymentConfig:
147
+ """Main deployment configuration"""
148
+
149
+ # Deployment identification
150
+ deployment_id: str
151
+ deployment_name: str
152
+ description: Optional[str] = None
153
+
154
+ # Provider and engine configuration
155
+ provider: DeploymentProvider = DeploymentProvider.RUNPOD_SERVERLESS
156
+ inference_engine: InferenceEngine = InferenceEngine.TRITON
157
+
158
+ # Model configuration
159
+ model_config: ModelConfig = None
160
+
161
+ # Provider-specific configurations
162
+ runpod_config: Optional[RunPodServerlessConfig] = None
163
+ triton_config: Optional[TritonConfig] = None
164
+
165
+ # Health check configuration
166
+ health_check_path: str = "/health"
167
+ health_check_timeout: int = 30
168
+
169
+ # Monitoring configuration
170
+ enable_logging: bool = True
171
+ log_level: str = "INFO"
172
+ enable_metrics: bool = True
173
+
174
+ # Networking
175
+ custom_domain: Optional[str] = None
176
+ allowed_origins: List[str] = field(default_factory=lambda: ["*"])
177
+
178
+ # Additional settings
179
+ extra_config: Dict[str, Any] = field(default_factory=dict)
180
+
181
+ def __post_init__(self):
182
+ """Validate configuration after initialization"""
183
+ if not self.deployment_id:
184
+ raise ValueError("deployment_id is required")
185
+
186
+ if not self.deployment_name:
187
+ raise ValueError("deployment_name is required")
188
+
189
+ if not self.model_config:
190
+ raise ValueError("model_config is required")
191
+
192
+ # Set default provider configs if not provided
193
+ if self.provider == DeploymentProvider.RUNPOD_SERVERLESS and not self.runpod_config:
194
+ self.runpod_config = RunPodServerlessConfig(api_key="")
195
+
196
+ if self.inference_engine == InferenceEngine.TRITON and not self.triton_config:
197
+ self.triton_config = TritonConfig()
198
+
199
+ def to_dict(self) -> Dict[str, Any]:
200
+ """Convert config to dictionary"""
201
+ config_dict = {}
202
+
203
+ for key, value in self.__dict__.items():
204
+ if key in ['model_config', 'runpod_config', 'triton_config']:
205
+ if value is not None:
206
+ config_dict[key] = value.to_dict()
207
+ else:
208
+ config_dict[key] = None
209
+ elif isinstance(value, Enum):
210
+ config_dict[key] = value.value
211
+ else:
212
+ config_dict[key] = value
213
+
214
+ return config_dict
215
+
216
+ @classmethod
217
+ def from_dict(cls, config_dict: Dict[str, Any]) -> 'DeploymentConfig':
218
+ """Create config from dictionary"""
219
+ # Handle nested configs
220
+ if 'model_config' in config_dict and config_dict['model_config'] is not None:
221
+ config_dict['model_config'] = ModelConfig(**config_dict['model_config'])
222
+
223
+ if 'runpod_config' in config_dict and config_dict['runpod_config'] is not None:
224
+ config_dict['runpod_config'] = RunPodServerlessConfig(**config_dict['runpod_config'])
225
+
226
+ if 'triton_config' in config_dict and config_dict['triton_config'] is not None:
227
+ config_dict['triton_config'] = TritonConfig(**config_dict['triton_config'])
228
+
229
+ # Handle enums
230
+ if 'provider' in config_dict:
231
+ config_dict['provider'] = DeploymentProvider(config_dict['provider'])
232
+
233
+ if 'inference_engine' in config_dict:
234
+ config_dict['inference_engine'] = InferenceEngine(config_dict['inference_engine'])
235
+
236
+ return cls(**config_dict)
237
+
238
+
239
+ # Predefined configurations for common deployment scenarios
240
+
241
+ def create_gemma_runpod_triton_config(
242
+ model_id: str,
243
+ runpod_api_key: str,
244
+ model_source_path: str = "xenobordom/gemma-4b-alpaca-v1"
245
+ ) -> DeploymentConfig:
246
+ """
247
+ Create a deployment configuration for Gemma model on RunPod with Triton + TensorRT-LLM.
248
+
249
+ Args:
250
+ model_id: Unique identifier for the deployment
251
+ runpod_api_key: RunPod API key
252
+ model_source_path: HuggingFace model path or local path
253
+
254
+ Returns:
255
+ DeploymentConfig for Gemma deployment
256
+ """
257
+ model_config = ModelConfig(
258
+ model_id=model_id,
259
+ model_name="gemma-4b-alpaca",
260
+ source_type="huggingface",
261
+ source_path=model_source_path,
262
+ model_format=ModelFormat.HUGGINGFACE,
263
+ inference_engine=InferenceEngine.TRITON,
264
+ model_type="llm",
265
+ capabilities=["text_generation", "chat"],
266
+ max_batch_size=8,
267
+ max_sequence_length=2048,
268
+ dtype="float16",
269
+ use_tensorrt=True
270
+ )
271
+
272
+ runpod_config = RunPodServerlessConfig(
273
+ api_key=runpod_api_key,
274
+ container_image="nvcr.io/nvidia/tritonserver:23.10-trtllm-python-py3",
275
+ container_disk_in_gb=30,
276
+ gpu_type="NVIDIA RTX A6000",
277
+ gpu_count=1,
278
+ min_workers=0,
279
+ max_workers=3,
280
+ idle_timeout=5,
281
+ env_vars={
282
+ "TRITON_MODEL_REPOSITORY": "/models",
283
+ "CUDA_VISIBLE_DEVICES": "0"
284
+ }
285
+ )
286
+
287
+ triton_config = TritonConfig(
288
+ model_repository="/models",
289
+ model_name="gemma-4b-alpaca",
290
+ backend="tensorrtllm",
291
+ max_batch_size=8,
292
+ max_sequence_length=2048,
293
+ tensorrt_llm_model_dir="/models/tensorrt_llm",
294
+ engine_dir="/models/engines",
295
+ tokenizer_dir="/models/tokenizer"
296
+ )
297
+
298
+ return DeploymentConfig(
299
+ deployment_id=f"gemma-deployment-{model_id}",
300
+ deployment_name=f"Gemma 4B Alpaca - {model_id}",
301
+ description="Gemma 4B model fine-tuned on Alpaca dataset, deployed with Triton + TensorRT-LLM",
302
+ provider=DeploymentProvider.RUNPOD_SERVERLESS,
303
+ inference_engine=InferenceEngine.TRITON,
304
+ model_config=model_config,
305
+ runpod_config=runpod_config,
306
+ triton_config=triton_config
307
+ )
308
+
309
+
310
+ def create_local_triton_config(
311
+ model_id: str,
312
+ model_source_path: str,
313
+ triton_model_repository: str = "./models/triton"
314
+ ) -> DeploymentConfig:
315
+ """
316
+ Create a deployment configuration for local Triton deployment.
317
+
318
+ Args:
319
+ model_id: Unique identifier for the deployment
320
+ model_source_path: Path to the model
321
+ triton_model_repository: Path to Triton model repository
322
+
323
+ Returns:
324
+ DeploymentConfig for local deployment
325
+ """
326
+ model_config = ModelConfig(
327
+ model_id=model_id,
328
+ model_name=f"local-model-{model_id}",
329
+ source_type="local",
330
+ source_path=model_source_path,
331
+ model_format=ModelFormat.HUGGINGFACE,
332
+ inference_engine=InferenceEngine.TRITON,
333
+ model_type="llm",
334
+ capabilities=["text_generation"],
335
+ max_batch_size=4,
336
+ max_sequence_length=1024,
337
+ dtype="float16"
338
+ )
339
+
340
+ triton_config = TritonConfig(
341
+ model_repository=triton_model_repository,
342
+ model_name=f"local-model-{model_id}",
343
+ backend="python", # Use Python backend for local development
344
+ max_batch_size=4,
345
+ max_sequence_length=1024
346
+ )
347
+
348
+ return DeploymentConfig(
349
+ deployment_id=f"local-deployment-{model_id}",
350
+ deployment_name=f"Local Model - {model_id}",
351
+ description="Local model deployment for development and testing",
352
+ provider=DeploymentProvider.LOCAL,
353
+ inference_engine=InferenceEngine.TRITON,
354
+ model_config=model_config,
355
+ triton_config=triton_config
356
+ )