isa-model 0.2.0__py3-none-any.whl → 0.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/__init__.py +1 -1
- isa_model/core/storage/hf_storage.py +419 -0
- isa_model/deployment/__init__.py +52 -0
- isa_model/deployment/core/__init__.py +34 -0
- isa_model/deployment/core/deployment_config.py +356 -0
- isa_model/deployment/core/deployment_manager.py +549 -0
- isa_model/deployment/core/isa_deployment_service.py +401 -0
- isa_model/eval/factory.py +381 -140
- isa_model/inference/ai_factory.py +142 -240
- isa_model/inference/providers/ml_provider.py +50 -0
- isa_model/inference/services/audio/openai_tts_service.py +104 -3
- isa_model/inference/services/embedding/base_embed_service.py +112 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +28 -2
- isa_model/inference/services/llm/__init__.py +2 -0
- isa_model/inference/services/llm/base_llm_service.py +111 -1
- isa_model/inference/services/llm/ollama_llm_service.py +234 -26
- isa_model/inference/services/llm/openai_llm_service.py +225 -28
- isa_model/inference/services/llm/triton_llm_service.py +481 -0
- isa_model/inference/services/ml/base_ml_service.py +78 -0
- isa_model/inference/services/ml/sklearn_ml_service.py +140 -0
- isa_model/inference/services/vision/__init__.py +3 -3
- isa_model/inference/services/vision/base_image_gen_service.py +161 -0
- isa_model/inference/services/vision/base_vision_service.py +177 -0
- isa_model/inference/services/vision/ollama_vision_service.py +143 -17
- isa_model/inference/services/vision/replicate_image_gen_service.py +139 -7
- isa_model/training/__init__.py +62 -32
- isa_model/training/cloud/__init__.py +22 -0
- isa_model/training/cloud/job_orchestrator.py +402 -0
- isa_model/training/cloud/runpod_trainer.py +454 -0
- isa_model/training/cloud/storage_manager.py +482 -0
- isa_model/training/core/__init__.py +23 -0
- isa_model/training/core/config.py +181 -0
- isa_model/training/core/dataset.py +222 -0
- isa_model/training/core/trainer.py +720 -0
- isa_model/training/core/utils.py +213 -0
- isa_model/training/factory.py +229 -198
- isa_model-0.2.8.dist-info/METADATA +465 -0
- isa_model-0.2.8.dist-info/RECORD +86 -0
- isa_model/core/model_router.py +0 -226
- isa_model/core/model_version.py +0 -0
- isa_model/core/resource_manager.py +0 -202
- isa_model/deployment/gpu_fp16_ds8/models/deepseek_r1/1/model.py +0 -120
- isa_model/deployment/gpu_fp16_ds8/scripts/download_model.py +0 -18
- isa_model/training/engine/llama_factory/__init__.py +0 -39
- isa_model/training/engine/llama_factory/config.py +0 -115
- isa_model/training/engine/llama_factory/data_adapter.py +0 -284
- isa_model/training/engine/llama_factory/examples/__init__.py +0 -6
- isa_model/training/engine/llama_factory/examples/finetune_with_tracking.py +0 -185
- isa_model/training/engine/llama_factory/examples/rlhf_with_tracking.py +0 -163
- isa_model/training/engine/llama_factory/factory.py +0 -331
- isa_model/training/engine/llama_factory/rl.py +0 -254
- isa_model/training/engine/llama_factory/trainer.py +0 -171
- isa_model/training/image_model/configs/create_config.py +0 -37
- isa_model/training/image_model/configs/create_flux_config.py +0 -26
- isa_model/training/image_model/configs/create_lora_config.py +0 -21
- isa_model/training/image_model/prepare_massed_compute.py +0 -97
- isa_model/training/image_model/prepare_upload.py +0 -17
- isa_model/training/image_model/raw_data/create_captions.py +0 -16
- isa_model/training/image_model/raw_data/create_lora_captions.py +0 -20
- isa_model/training/image_model/raw_data/pre_processing.py +0 -200
- isa_model/training/image_model/train/train.py +0 -42
- isa_model/training/image_model/train/train_flux.py +0 -41
- isa_model/training/image_model/train/train_lora.py +0 -57
- isa_model/training/image_model/train_main.py +0 -25
- isa_model-0.2.0.dist-info/METADATA +0 -327
- isa_model-0.2.0.dist-info/RECORD +0 -92
- isa_model-0.2.0.dist-info/licenses/LICENSE +0 -21
- /isa_model/training/{llm_model/annotation → annotation}/annotation_schema.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/processors/annotation_processor.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/storage/dataset_manager.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/storage/dataset_schema.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/tests/test_annotation_flow.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/tests/test_minio copy.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/tests/test_minio_upload.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/views/annotation_controller.py +0 -0
- {isa_model-0.2.0.dist-info → isa_model-0.2.8.dist-info}/WHEEL +0 -0
- {isa_model-0.2.0.dist-info → isa_model-0.2.8.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,356 @@
|
|
1
|
+
"""
|
2
|
+
Deployment Configuration Classes
|
3
|
+
|
4
|
+
Defines configuration classes for different deployment scenarios including
|
5
|
+
RunPod serverless, Triton inference server, and TensorRT-LLM backend.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from dataclasses import dataclass, field
|
9
|
+
from typing import Optional, Dict, Any, List
|
10
|
+
from enum import Enum
|
11
|
+
from pathlib import Path
|
12
|
+
|
13
|
+
|
14
|
+
class DeploymentProvider(str, Enum):
|
15
|
+
"""Deployment providers"""
|
16
|
+
RUNPOD_SERVERLESS = "runpod_serverless"
|
17
|
+
RUNPOD_PODS = "runpod_pods"
|
18
|
+
AWS_LAMBDA = "aws_lambda"
|
19
|
+
GOOGLE_CLOUD_RUN = "google_cloud_run"
|
20
|
+
AZURE_CONTAINER_INSTANCES = "azure_container_instances"
|
21
|
+
LOCAL = "local"
|
22
|
+
|
23
|
+
|
24
|
+
class InferenceEngine(str, Enum):
|
25
|
+
"""Inference engines"""
|
26
|
+
TRITON = "triton"
|
27
|
+
VLLM = "vllm"
|
28
|
+
TENSORRT_LLM = "tensorrt_llm"
|
29
|
+
HUGGINGFACE = "huggingface"
|
30
|
+
ONNX = "onnx"
|
31
|
+
TORCHSCRIPT = "torchscript"
|
32
|
+
|
33
|
+
|
34
|
+
class ModelFormat(str, Enum):
|
35
|
+
"""Model formats for deployment"""
|
36
|
+
HUGGINGFACE = "huggingface"
|
37
|
+
TENSORRT = "tensorrt"
|
38
|
+
ONNX = "onnx"
|
39
|
+
TORCHSCRIPT = "torchscript"
|
40
|
+
SAFETENSORS = "safetensors"
|
41
|
+
|
42
|
+
|
43
|
+
@dataclass
|
44
|
+
class TritonConfig:
|
45
|
+
"""Configuration for Triton Inference Server"""
|
46
|
+
|
47
|
+
# Model repository configuration
|
48
|
+
model_repository: str = "/models"
|
49
|
+
model_name: str = "model"
|
50
|
+
model_version: str = "1"
|
51
|
+
|
52
|
+
# Backend configuration
|
53
|
+
backend: str = "tensorrtllm" # tensorrtllm, python, onnxruntime
|
54
|
+
max_batch_size: int = 8
|
55
|
+
max_sequence_length: int = 2048
|
56
|
+
|
57
|
+
# TensorRT-LLM specific
|
58
|
+
tensorrt_llm_model_dir: str = "/models/tensorrt_llm"
|
59
|
+
engine_dir: str = "/models/engines"
|
60
|
+
tokenizer_dir: str = "/models/tokenizer"
|
61
|
+
|
62
|
+
# Performance settings
|
63
|
+
instance_group_count: int = 1
|
64
|
+
instance_group_kind: str = "KIND_GPU" # KIND_GPU, KIND_CPU
|
65
|
+
|
66
|
+
# Memory settings
|
67
|
+
optimization_level: str = "OPTIMIZATION_LEVEL_ENABLED"
|
68
|
+
enable_pinned_input: bool = True
|
69
|
+
enable_pinned_output: bool = True
|
70
|
+
|
71
|
+
def to_dict(self) -> Dict[str, Any]:
|
72
|
+
"""Convert to dictionary"""
|
73
|
+
return self.__dict__.copy()
|
74
|
+
|
75
|
+
|
76
|
+
@dataclass
|
77
|
+
class RunPodServerlessConfig:
|
78
|
+
"""Configuration for RunPod Serverless deployment"""
|
79
|
+
|
80
|
+
# RunPod settings
|
81
|
+
api_key: str
|
82
|
+
endpoint_id: Optional[str] = None
|
83
|
+
template_id: Optional[str] = None
|
84
|
+
|
85
|
+
# Container configuration
|
86
|
+
container_image: str = "runpod/pytorch:2.1.0-py3.10-cuda11.8.0-devel-ubuntu22.04"
|
87
|
+
container_disk_in_gb: int = 20
|
88
|
+
|
89
|
+
# GPU configuration
|
90
|
+
gpu_type: str = "NVIDIA RTX A6000"
|
91
|
+
gpu_count: int = 1
|
92
|
+
|
93
|
+
# Scaling configuration
|
94
|
+
min_workers: int = 0
|
95
|
+
max_workers: int = 3
|
96
|
+
idle_timeout: int = 5 # seconds
|
97
|
+
|
98
|
+
# Network configuration
|
99
|
+
network_volume_id: Optional[str] = None
|
100
|
+
|
101
|
+
# Environment variables
|
102
|
+
env_vars: Dict[str, str] = field(default_factory=dict)
|
103
|
+
|
104
|
+
def to_dict(self) -> Dict[str, Any]:
|
105
|
+
"""Convert to dictionary"""
|
106
|
+
return self.__dict__.copy()
|
107
|
+
|
108
|
+
|
109
|
+
@dataclass
|
110
|
+
class ModelConfig:
|
111
|
+
"""Configuration for model deployment"""
|
112
|
+
|
113
|
+
# Model identification
|
114
|
+
model_id: str
|
115
|
+
model_name: str
|
116
|
+
model_version: str = "1.0.0"
|
117
|
+
|
118
|
+
# Model source
|
119
|
+
source_type: str = "huggingface" # huggingface, local, s3, gcs
|
120
|
+
source_path: str = ""
|
121
|
+
|
122
|
+
# Model format and engine
|
123
|
+
model_format: ModelFormat = ModelFormat.HUGGINGFACE
|
124
|
+
inference_engine: InferenceEngine = InferenceEngine.TRITON
|
125
|
+
|
126
|
+
# Model metadata
|
127
|
+
model_type: str = "llm" # llm, embedding, vision, audio
|
128
|
+
capabilities: List[str] = field(default_factory=lambda: ["text_generation"])
|
129
|
+
|
130
|
+
# Performance configuration
|
131
|
+
max_batch_size: int = 8
|
132
|
+
max_sequence_length: int = 2048
|
133
|
+
dtype: str = "float16" # float32, float16, int8, int4
|
134
|
+
|
135
|
+
# Optimization settings
|
136
|
+
use_tensorrt: bool = True
|
137
|
+
use_quantization: bool = False
|
138
|
+
quantization_method: str = "int8" # int8, int4, awq, gptq
|
139
|
+
|
140
|
+
def to_dict(self) -> Dict[str, Any]:
|
141
|
+
"""Convert to dictionary"""
|
142
|
+
return self.__dict__.copy()
|
143
|
+
|
144
|
+
|
145
|
+
@dataclass
|
146
|
+
class DeploymentConfig:
|
147
|
+
"""Main deployment configuration"""
|
148
|
+
|
149
|
+
# Deployment identification
|
150
|
+
deployment_id: str
|
151
|
+
deployment_name: str
|
152
|
+
description: Optional[str] = None
|
153
|
+
|
154
|
+
# Provider and engine configuration
|
155
|
+
provider: DeploymentProvider = DeploymentProvider.RUNPOD_SERVERLESS
|
156
|
+
inference_engine: InferenceEngine = InferenceEngine.TRITON
|
157
|
+
|
158
|
+
# Model configuration
|
159
|
+
model_config: ModelConfig = None
|
160
|
+
|
161
|
+
# Provider-specific configurations
|
162
|
+
runpod_config: Optional[RunPodServerlessConfig] = None
|
163
|
+
triton_config: Optional[TritonConfig] = None
|
164
|
+
|
165
|
+
# Health check configuration
|
166
|
+
health_check_path: str = "/health"
|
167
|
+
health_check_timeout: int = 30
|
168
|
+
|
169
|
+
# Monitoring configuration
|
170
|
+
enable_logging: bool = True
|
171
|
+
log_level: str = "INFO"
|
172
|
+
enable_metrics: bool = True
|
173
|
+
|
174
|
+
# Networking
|
175
|
+
custom_domain: Optional[str] = None
|
176
|
+
allowed_origins: List[str] = field(default_factory=lambda: ["*"])
|
177
|
+
|
178
|
+
# Additional settings
|
179
|
+
extra_config: Dict[str, Any] = field(default_factory=dict)
|
180
|
+
|
181
|
+
def __post_init__(self):
|
182
|
+
"""Validate configuration after initialization"""
|
183
|
+
if not self.deployment_id:
|
184
|
+
raise ValueError("deployment_id is required")
|
185
|
+
|
186
|
+
if not self.deployment_name:
|
187
|
+
raise ValueError("deployment_name is required")
|
188
|
+
|
189
|
+
if not self.model_config:
|
190
|
+
raise ValueError("model_config is required")
|
191
|
+
|
192
|
+
# Set default provider configs if not provided
|
193
|
+
if self.provider == DeploymentProvider.RUNPOD_SERVERLESS and not self.runpod_config:
|
194
|
+
self.runpod_config = RunPodServerlessConfig(api_key="")
|
195
|
+
|
196
|
+
if self.inference_engine == InferenceEngine.TRITON and not self.triton_config:
|
197
|
+
self.triton_config = TritonConfig()
|
198
|
+
|
199
|
+
def to_dict(self) -> Dict[str, Any]:
|
200
|
+
"""Convert config to dictionary"""
|
201
|
+
config_dict = {}
|
202
|
+
|
203
|
+
for key, value in self.__dict__.items():
|
204
|
+
if key in ['model_config', 'runpod_config', 'triton_config']:
|
205
|
+
if value is not None:
|
206
|
+
config_dict[key] = value.to_dict()
|
207
|
+
else:
|
208
|
+
config_dict[key] = None
|
209
|
+
elif isinstance(value, Enum):
|
210
|
+
config_dict[key] = value.value
|
211
|
+
else:
|
212
|
+
config_dict[key] = value
|
213
|
+
|
214
|
+
return config_dict
|
215
|
+
|
216
|
+
@classmethod
|
217
|
+
def from_dict(cls, config_dict: Dict[str, Any]) -> 'DeploymentConfig':
|
218
|
+
"""Create config from dictionary"""
|
219
|
+
# Handle nested configs
|
220
|
+
if 'model_config' in config_dict and config_dict['model_config'] is not None:
|
221
|
+
config_dict['model_config'] = ModelConfig(**config_dict['model_config'])
|
222
|
+
|
223
|
+
if 'runpod_config' in config_dict and config_dict['runpod_config'] is not None:
|
224
|
+
config_dict['runpod_config'] = RunPodServerlessConfig(**config_dict['runpod_config'])
|
225
|
+
|
226
|
+
if 'triton_config' in config_dict and config_dict['triton_config'] is not None:
|
227
|
+
config_dict['triton_config'] = TritonConfig(**config_dict['triton_config'])
|
228
|
+
|
229
|
+
# Handle enums
|
230
|
+
if 'provider' in config_dict:
|
231
|
+
config_dict['provider'] = DeploymentProvider(config_dict['provider'])
|
232
|
+
|
233
|
+
if 'inference_engine' in config_dict:
|
234
|
+
config_dict['inference_engine'] = InferenceEngine(config_dict['inference_engine'])
|
235
|
+
|
236
|
+
return cls(**config_dict)
|
237
|
+
|
238
|
+
|
239
|
+
# Predefined configurations for common deployment scenarios
|
240
|
+
|
241
|
+
def create_gemma_runpod_triton_config(
|
242
|
+
model_id: str,
|
243
|
+
runpod_api_key: str,
|
244
|
+
model_source_path: str = "xenobordom/gemma-4b-alpaca-v1"
|
245
|
+
) -> DeploymentConfig:
|
246
|
+
"""
|
247
|
+
Create a deployment configuration for Gemma model on RunPod with Triton + TensorRT-LLM.
|
248
|
+
|
249
|
+
Args:
|
250
|
+
model_id: Unique identifier for the deployment
|
251
|
+
runpod_api_key: RunPod API key
|
252
|
+
model_source_path: HuggingFace model path or local path
|
253
|
+
|
254
|
+
Returns:
|
255
|
+
DeploymentConfig for Gemma deployment
|
256
|
+
"""
|
257
|
+
model_config = ModelConfig(
|
258
|
+
model_id=model_id,
|
259
|
+
model_name="gemma-4b-alpaca",
|
260
|
+
source_type="huggingface",
|
261
|
+
source_path=model_source_path,
|
262
|
+
model_format=ModelFormat.HUGGINGFACE,
|
263
|
+
inference_engine=InferenceEngine.TRITON,
|
264
|
+
model_type="llm",
|
265
|
+
capabilities=["text_generation", "chat"],
|
266
|
+
max_batch_size=8,
|
267
|
+
max_sequence_length=2048,
|
268
|
+
dtype="float16",
|
269
|
+
use_tensorrt=True
|
270
|
+
)
|
271
|
+
|
272
|
+
runpod_config = RunPodServerlessConfig(
|
273
|
+
api_key=runpod_api_key,
|
274
|
+
container_image="nvcr.io/nvidia/tritonserver:23.10-trtllm-python-py3",
|
275
|
+
container_disk_in_gb=30,
|
276
|
+
gpu_type="NVIDIA RTX A6000",
|
277
|
+
gpu_count=1,
|
278
|
+
min_workers=0,
|
279
|
+
max_workers=3,
|
280
|
+
idle_timeout=5,
|
281
|
+
env_vars={
|
282
|
+
"TRITON_MODEL_REPOSITORY": "/models",
|
283
|
+
"CUDA_VISIBLE_DEVICES": "0"
|
284
|
+
}
|
285
|
+
)
|
286
|
+
|
287
|
+
triton_config = TritonConfig(
|
288
|
+
model_repository="/models",
|
289
|
+
model_name="gemma-4b-alpaca",
|
290
|
+
backend="tensorrtllm",
|
291
|
+
max_batch_size=8,
|
292
|
+
max_sequence_length=2048,
|
293
|
+
tensorrt_llm_model_dir="/models/tensorrt_llm",
|
294
|
+
engine_dir="/models/engines",
|
295
|
+
tokenizer_dir="/models/tokenizer"
|
296
|
+
)
|
297
|
+
|
298
|
+
return DeploymentConfig(
|
299
|
+
deployment_id=f"gemma-deployment-{model_id}",
|
300
|
+
deployment_name=f"Gemma 4B Alpaca - {model_id}",
|
301
|
+
description="Gemma 4B model fine-tuned on Alpaca dataset, deployed with Triton + TensorRT-LLM",
|
302
|
+
provider=DeploymentProvider.RUNPOD_SERVERLESS,
|
303
|
+
inference_engine=InferenceEngine.TRITON,
|
304
|
+
model_config=model_config,
|
305
|
+
runpod_config=runpod_config,
|
306
|
+
triton_config=triton_config
|
307
|
+
)
|
308
|
+
|
309
|
+
|
310
|
+
def create_local_triton_config(
|
311
|
+
model_id: str,
|
312
|
+
model_source_path: str,
|
313
|
+
triton_model_repository: str = "./models/triton"
|
314
|
+
) -> DeploymentConfig:
|
315
|
+
"""
|
316
|
+
Create a deployment configuration for local Triton deployment.
|
317
|
+
|
318
|
+
Args:
|
319
|
+
model_id: Unique identifier for the deployment
|
320
|
+
model_source_path: Path to the model
|
321
|
+
triton_model_repository: Path to Triton model repository
|
322
|
+
|
323
|
+
Returns:
|
324
|
+
DeploymentConfig for local deployment
|
325
|
+
"""
|
326
|
+
model_config = ModelConfig(
|
327
|
+
model_id=model_id,
|
328
|
+
model_name=f"local-model-{model_id}",
|
329
|
+
source_type="local",
|
330
|
+
source_path=model_source_path,
|
331
|
+
model_format=ModelFormat.HUGGINGFACE,
|
332
|
+
inference_engine=InferenceEngine.TRITON,
|
333
|
+
model_type="llm",
|
334
|
+
capabilities=["text_generation"],
|
335
|
+
max_batch_size=4,
|
336
|
+
max_sequence_length=1024,
|
337
|
+
dtype="float16"
|
338
|
+
)
|
339
|
+
|
340
|
+
triton_config = TritonConfig(
|
341
|
+
model_repository=triton_model_repository,
|
342
|
+
model_name=f"local-model-{model_id}",
|
343
|
+
backend="python", # Use Python backend for local development
|
344
|
+
max_batch_size=4,
|
345
|
+
max_sequence_length=1024
|
346
|
+
)
|
347
|
+
|
348
|
+
return DeploymentConfig(
|
349
|
+
deployment_id=f"local-deployment-{model_id}",
|
350
|
+
deployment_name=f"Local Model - {model_id}",
|
351
|
+
description="Local model deployment for development and testing",
|
352
|
+
provider=DeploymentProvider.LOCAL,
|
353
|
+
inference_engine=InferenceEngine.TRITON,
|
354
|
+
model_config=model_config,
|
355
|
+
triton_config=triton_config
|
356
|
+
)
|