isa-model 0.2.0__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/__init__.py +1 -1
- isa_model/core/model_manager.py +69 -4
- isa_model/core/storage/hf_storage.py +419 -0
- isa_model/deployment/__init__.py +52 -0
- isa_model/deployment/core/__init__.py +34 -0
- isa_model/deployment/core/deployment_config.py +356 -0
- isa_model/deployment/core/deployment_manager.py +549 -0
- isa_model/deployment/core/isa_deployment_service.py +401 -0
- isa_model/eval/factory.py +381 -140
- isa_model/inference/ai_factory.py +427 -236
- isa_model/inference/billing_tracker.py +406 -0
- isa_model/inference/providers/base_provider.py +51 -4
- isa_model/inference/providers/ml_provider.py +50 -0
- isa_model/inference/providers/ollama_provider.py +37 -18
- isa_model/inference/providers/openai_provider.py +65 -36
- isa_model/inference/providers/replicate_provider.py +42 -30
- isa_model/inference/services/audio/base_stt_service.py +21 -2
- isa_model/inference/services/audio/openai_realtime_service.py +353 -0
- isa_model/inference/services/audio/openai_stt_service.py +252 -0
- isa_model/inference/services/audio/openai_tts_service.py +149 -9
- isa_model/inference/services/audio/replicate_tts_service.py +239 -0
- isa_model/inference/services/base_service.py +36 -1
- isa_model/inference/services/embedding/base_embed_service.py +112 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +28 -2
- isa_model/inference/services/embedding/openai_embed_service.py +223 -0
- isa_model/inference/services/llm/__init__.py +2 -0
- isa_model/inference/services/llm/base_llm_service.py +158 -86
- isa_model/inference/services/llm/llm_adapter.py +414 -0
- isa_model/inference/services/llm/ollama_llm_service.py +252 -63
- isa_model/inference/services/llm/openai_llm_service.py +231 -93
- isa_model/inference/services/llm/triton_llm_service.py +481 -0
- isa_model/inference/services/ml/base_ml_service.py +78 -0
- isa_model/inference/services/ml/sklearn_ml_service.py +140 -0
- isa_model/inference/services/vision/__init__.py +3 -3
- isa_model/inference/services/vision/base_image_gen_service.py +161 -0
- isa_model/inference/services/vision/base_vision_service.py +177 -0
- isa_model/inference/services/vision/helpers/image_utils.py +4 -3
- isa_model/inference/services/vision/ollama_vision_service.py +151 -17
- isa_model/inference/services/vision/openai_vision_service.py +275 -41
- isa_model/inference/services/vision/replicate_image_gen_service.py +278 -118
- isa_model/training/__init__.py +62 -32
- isa_model/training/cloud/__init__.py +22 -0
- isa_model/training/cloud/job_orchestrator.py +402 -0
- isa_model/training/cloud/runpod_trainer.py +454 -0
- isa_model/training/cloud/storage_manager.py +482 -0
- isa_model/training/core/__init__.py +23 -0
- isa_model/training/core/config.py +181 -0
- isa_model/training/core/dataset.py +222 -0
- isa_model/training/core/trainer.py +720 -0
- isa_model/training/core/utils.py +213 -0
- isa_model/training/factory.py +229 -198
- isa_model-0.3.1.dist-info/METADATA +465 -0
- isa_model-0.3.1.dist-info/RECORD +91 -0
- isa_model/core/model_router.py +0 -226
- isa_model/core/model_version.py +0 -0
- isa_model/core/resource_manager.py +0 -202
- isa_model/deployment/gpu_fp16_ds8/models/deepseek_r1/1/model.py +0 -120
- isa_model/deployment/gpu_fp16_ds8/scripts/download_model.py +0 -18
- isa_model/training/engine/llama_factory/__init__.py +0 -39
- isa_model/training/engine/llama_factory/config.py +0 -115
- isa_model/training/engine/llama_factory/data_adapter.py +0 -284
- isa_model/training/engine/llama_factory/examples/__init__.py +0 -6
- isa_model/training/engine/llama_factory/examples/finetune_with_tracking.py +0 -185
- isa_model/training/engine/llama_factory/examples/rlhf_with_tracking.py +0 -163
- isa_model/training/engine/llama_factory/factory.py +0 -331
- isa_model/training/engine/llama_factory/rl.py +0 -254
- isa_model/training/engine/llama_factory/trainer.py +0 -171
- isa_model/training/image_model/configs/create_config.py +0 -37
- isa_model/training/image_model/configs/create_flux_config.py +0 -26
- isa_model/training/image_model/configs/create_lora_config.py +0 -21
- isa_model/training/image_model/prepare_massed_compute.py +0 -97
- isa_model/training/image_model/prepare_upload.py +0 -17
- isa_model/training/image_model/raw_data/create_captions.py +0 -16
- isa_model/training/image_model/raw_data/create_lora_captions.py +0 -20
- isa_model/training/image_model/raw_data/pre_processing.py +0 -200
- isa_model/training/image_model/train/train.py +0 -42
- isa_model/training/image_model/train/train_flux.py +0 -41
- isa_model/training/image_model/train/train_lora.py +0 -57
- isa_model/training/image_model/train_main.py +0 -25
- isa_model-0.2.0.dist-info/METADATA +0 -327
- isa_model-0.2.0.dist-info/RECORD +0 -92
- isa_model-0.2.0.dist-info/licenses/LICENSE +0 -21
- /isa_model/training/{llm_model/annotation → annotation}/annotation_schema.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/processors/annotation_processor.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/storage/dataset_manager.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/storage/dataset_schema.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/tests/test_annotation_flow.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/tests/test_minio copy.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/tests/test_minio_upload.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/views/annotation_controller.py +0 -0
- {isa_model-0.2.0.dist-info → isa_model-0.3.1.dist-info}/WHEEL +0 -0
- {isa_model-0.2.0.dist-info → isa_model-0.3.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,549 @@
|
|
1
|
+
"""
|
2
|
+
Deployment Manager
|
3
|
+
|
4
|
+
Orchestrates the complete deployment workflow including model preparation,
|
5
|
+
container building, deployment to cloud providers, and monitoring.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import os
|
9
|
+
import json
|
10
|
+
import logging
|
11
|
+
from typing import Dict, List, Optional, Any
|
12
|
+
from pathlib import Path
|
13
|
+
from datetime import datetime
|
14
|
+
import asyncio
|
15
|
+
|
16
|
+
from .deployment_config import (
|
17
|
+
DeploymentConfig, DeploymentProvider, InferenceEngine,
|
18
|
+
ModelConfig, TritonConfig, RunPodServerlessConfig
|
19
|
+
)
|
20
|
+
from ...core.model_manager import ModelManager
|
21
|
+
from ...core.model_registry import ModelRegistry, ModelType, ModelCapability
|
22
|
+
from ...core.storage.hf_storage import HuggingFaceStorage
|
23
|
+
|
24
|
+
logger = logging.getLogger(__name__)
|
25
|
+
|
26
|
+
|
27
|
+
class DeploymentManager:
|
28
|
+
"""
|
29
|
+
Manages the complete deployment lifecycle for AI models.
|
30
|
+
|
31
|
+
This manager coordinates:
|
32
|
+
- Model preparation and optimization
|
33
|
+
- Container building and configuration
|
34
|
+
- Deployment to cloud providers
|
35
|
+
- Health monitoring and scaling
|
36
|
+
- Integration with model registry
|
37
|
+
|
38
|
+
Example:
|
39
|
+
```python
|
40
|
+
from isa_model.deployment import DeploymentManager
|
41
|
+
from isa_model.deployment.core import create_gemma_runpod_triton_config
|
42
|
+
|
43
|
+
# Initialize deployment manager
|
44
|
+
manager = DeploymentManager()
|
45
|
+
|
46
|
+
# Create deployment configuration
|
47
|
+
config = create_gemma_runpod_triton_config(
|
48
|
+
model_id="gemma-v1",
|
49
|
+
runpod_api_key="your-api-key",
|
50
|
+
model_source_path="xenobordom/gemma-4b-alpaca-v1"
|
51
|
+
)
|
52
|
+
|
53
|
+
# Deploy the model
|
54
|
+
deployment = await manager.deploy_model(config)
|
55
|
+
print(f"Model deployed: {deployment['endpoint_url']}")
|
56
|
+
```
|
57
|
+
"""
|
58
|
+
|
59
|
+
def __init__(self,
|
60
|
+
model_manager: Optional[ModelManager] = None,
|
61
|
+
storage_backend: str = "huggingface",
|
62
|
+
workspace_dir: str = "./deployments"):
|
63
|
+
"""
|
64
|
+
Initialize deployment manager.
|
65
|
+
|
66
|
+
Args:
|
67
|
+
model_manager: Model manager instance
|
68
|
+
storage_backend: Storage backend to use ("huggingface", "local")
|
69
|
+
workspace_dir: Directory for deployment artifacts
|
70
|
+
"""
|
71
|
+
self.workspace_dir = Path(workspace_dir)
|
72
|
+
self.workspace_dir.mkdir(parents=True, exist_ok=True)
|
73
|
+
|
74
|
+
# Initialize model management
|
75
|
+
if storage_backend == "huggingface":
|
76
|
+
storage = HuggingFaceStorage()
|
77
|
+
else:
|
78
|
+
from ...core.model_storage import LocalModelStorage
|
79
|
+
storage = LocalModelStorage()
|
80
|
+
|
81
|
+
self.model_manager = model_manager or ModelManager(storage=storage)
|
82
|
+
self.model_registry = ModelRegistry()
|
83
|
+
|
84
|
+
# Deployment tracking
|
85
|
+
self.deployments: Dict[str, Dict[str, Any]] = {}
|
86
|
+
self.deployments_file = self.workspace_dir / "deployments.json"
|
87
|
+
self._load_deployments()
|
88
|
+
|
89
|
+
# Setup logging
|
90
|
+
self._setup_logging()
|
91
|
+
|
92
|
+
logger.info(f"Deployment manager initialized with {storage_backend} storage")
|
93
|
+
logger.info(f"Workspace directory: {self.workspace_dir}")
|
94
|
+
|
95
|
+
def _setup_logging(self):
|
96
|
+
"""Setup deployment logging"""
|
97
|
+
log_dir = self.workspace_dir / "logs"
|
98
|
+
log_dir.mkdir(exist_ok=True)
|
99
|
+
|
100
|
+
# Create deployment-specific logger
|
101
|
+
deployment_logger = logging.getLogger("deployment")
|
102
|
+
deployment_logger.setLevel(logging.DEBUG)
|
103
|
+
|
104
|
+
# File handler for deployment logs
|
105
|
+
file_handler = logging.FileHandler(log_dir / "deployments.log")
|
106
|
+
file_handler.setLevel(logging.DEBUG)
|
107
|
+
file_handler.setFormatter(logging.Formatter(
|
108
|
+
'%(asctime)s - %(name)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s'
|
109
|
+
))
|
110
|
+
|
111
|
+
deployment_logger.addHandler(file_handler)
|
112
|
+
|
113
|
+
def _load_deployments(self):
|
114
|
+
"""Load deployment tracking data"""
|
115
|
+
if self.deployments_file.exists():
|
116
|
+
with open(self.deployments_file, 'r') as f:
|
117
|
+
self.deployments = json.load(f)
|
118
|
+
else:
|
119
|
+
self.deployments = {}
|
120
|
+
self._save_deployments()
|
121
|
+
|
122
|
+
def _save_deployments(self):
|
123
|
+
"""Save deployment tracking data"""
|
124
|
+
with open(self.deployments_file, 'w') as f:
|
125
|
+
json.dump(self.deployments, f, indent=2, default=str)
|
126
|
+
|
127
|
+
async def deploy_model(self, config: DeploymentConfig) -> Dict[str, Any]:
|
128
|
+
"""
|
129
|
+
Deploy a model using the specified configuration.
|
130
|
+
|
131
|
+
Args:
|
132
|
+
config: Deployment configuration
|
133
|
+
|
134
|
+
Returns:
|
135
|
+
Deployment result with endpoint information
|
136
|
+
"""
|
137
|
+
deployment_id = config.deployment_id
|
138
|
+
|
139
|
+
logger.info("=" * 60)
|
140
|
+
logger.info(f"STARTING DEPLOYMENT: {deployment_id}")
|
141
|
+
logger.info("=" * 60)
|
142
|
+
|
143
|
+
try:
|
144
|
+
# Step 1: Validate configuration
|
145
|
+
logger.info("Step 1/6: Validating deployment configuration...")
|
146
|
+
self._validate_config(config)
|
147
|
+
|
148
|
+
# Step 2: Prepare model
|
149
|
+
logger.info("Step 2/6: Preparing model...")
|
150
|
+
model_path = await self._prepare_model(config.model_config)
|
151
|
+
|
152
|
+
# Step 3: Optimize model (TensorRT conversion if needed)
|
153
|
+
logger.info("Step 3/6: Optimizing model...")
|
154
|
+
optimized_model_path = await self._optimize_model(config, model_path)
|
155
|
+
|
156
|
+
# Step 4: Prepare deployment artifacts
|
157
|
+
logger.info("Step 4/6: Preparing deployment artifacts...")
|
158
|
+
artifacts_path = await self._prepare_deployment_artifacts(config, optimized_model_path)
|
159
|
+
|
160
|
+
# Step 5: Deploy to provider
|
161
|
+
logger.info("Step 5/6: Deploying to provider...")
|
162
|
+
deployment_result = await self._deploy_to_provider(config, artifacts_path)
|
163
|
+
|
164
|
+
# Step 6: Register deployment
|
165
|
+
logger.info("Step 6/6: Registering deployment...")
|
166
|
+
await self._register_deployment(config, deployment_result)
|
167
|
+
|
168
|
+
logger.info("=" * 60)
|
169
|
+
logger.info("DEPLOYMENT COMPLETED SUCCESSFULLY!")
|
170
|
+
logger.info("=" * 60)
|
171
|
+
logger.info(f"Deployment ID: {deployment_id}")
|
172
|
+
logger.info(f"Endpoint URL: {deployment_result.get('endpoint_url', 'N/A')}")
|
173
|
+
|
174
|
+
return deployment_result
|
175
|
+
|
176
|
+
except Exception as e:
|
177
|
+
logger.error("=" * 60)
|
178
|
+
logger.error("DEPLOYMENT FAILED!")
|
179
|
+
logger.error("=" * 60)
|
180
|
+
logger.error(f"Error: {e}")
|
181
|
+
|
182
|
+
# Update deployment status
|
183
|
+
self.deployments[deployment_id] = {
|
184
|
+
"config": config.to_dict(),
|
185
|
+
"status": "failed",
|
186
|
+
"error": str(e),
|
187
|
+
"created_at": datetime.now().isoformat(),
|
188
|
+
"updated_at": datetime.now().isoformat()
|
189
|
+
}
|
190
|
+
self._save_deployments()
|
191
|
+
|
192
|
+
raise
|
193
|
+
|
194
|
+
def _validate_config(self, config: DeploymentConfig):
|
195
|
+
"""Validate deployment configuration"""
|
196
|
+
logger.debug("Validating deployment configuration...")
|
197
|
+
|
198
|
+
# Check required fields
|
199
|
+
if not config.deployment_id:
|
200
|
+
raise ValueError("deployment_id is required")
|
201
|
+
|
202
|
+
if not config.model_config:
|
203
|
+
raise ValueError("model_config is required")
|
204
|
+
|
205
|
+
# Provider-specific validation
|
206
|
+
if config.provider == DeploymentProvider.RUNPOD_SERVERLESS:
|
207
|
+
if not config.runpod_config or not config.runpod_config.api_key:
|
208
|
+
raise ValueError("RunPod API key is required for RunPod deployment")
|
209
|
+
|
210
|
+
# Engine-specific validation
|
211
|
+
if config.inference_engine == InferenceEngine.TRITON:
|
212
|
+
if not config.triton_config:
|
213
|
+
raise ValueError("Triton configuration is required for Triton engine")
|
214
|
+
|
215
|
+
logger.info("Configuration validation passed")
|
216
|
+
|
217
|
+
async def _prepare_model(self, model_config: ModelConfig) -> Path:
|
218
|
+
"""Prepare model for deployment"""
|
219
|
+
logger.info(f"Preparing model: {model_config.model_id}")
|
220
|
+
|
221
|
+
# Determine model type for registry
|
222
|
+
if model_config.model_type == "llm":
|
223
|
+
model_type = ModelType.LLM
|
224
|
+
elif model_config.model_type == "embedding":
|
225
|
+
model_type = ModelType.EMBEDDING
|
226
|
+
elif model_config.model_type == "vision":
|
227
|
+
model_type = ModelType.VISION
|
228
|
+
else:
|
229
|
+
model_type = ModelType.LLM # Default
|
230
|
+
|
231
|
+
# Convert capabilities
|
232
|
+
capabilities = []
|
233
|
+
for cap in model_config.capabilities:
|
234
|
+
if cap == "text_generation":
|
235
|
+
capabilities.append(ModelCapability.TEXT_GENERATION)
|
236
|
+
elif cap == "chat":
|
237
|
+
capabilities.append(ModelCapability.CHAT)
|
238
|
+
elif cap == "embedding":
|
239
|
+
capabilities.append(ModelCapability.EMBEDDING)
|
240
|
+
else:
|
241
|
+
capabilities.append(ModelCapability.TEXT_GENERATION) # Default
|
242
|
+
|
243
|
+
# Get or download model
|
244
|
+
if model_config.source_type == "huggingface":
|
245
|
+
model_path = await self.model_manager.get_model(
|
246
|
+
model_id=model_config.model_id,
|
247
|
+
repo_id=model_config.source_path,
|
248
|
+
model_type=model_type,
|
249
|
+
capabilities=capabilities
|
250
|
+
)
|
251
|
+
elif model_config.source_type == "local":
|
252
|
+
model_path = Path(model_config.source_path)
|
253
|
+
if not model_path.exists():
|
254
|
+
raise FileNotFoundError(f"Model not found at {model_path}")
|
255
|
+
else:
|
256
|
+
raise ValueError(f"Unsupported source type: {model_config.source_type}")
|
257
|
+
|
258
|
+
logger.info(f"Model prepared at: {model_path}")
|
259
|
+
return model_path
|
260
|
+
|
261
|
+
async def _optimize_model(self, config: DeploymentConfig, model_path: Path) -> Path:
|
262
|
+
"""Optimize model for deployment"""
|
263
|
+
logger.info("Optimizing model for deployment...")
|
264
|
+
|
265
|
+
# For now, return the original path
|
266
|
+
# TODO: Implement TensorRT optimization, quantization, etc.
|
267
|
+
if config.model_config.use_tensorrt:
|
268
|
+
logger.info("TensorRT optimization requested (not yet implemented)")
|
269
|
+
|
270
|
+
if config.model_config.use_quantization:
|
271
|
+
logger.info(f"Quantization requested: {config.model_config.quantization_method}")
|
272
|
+
|
273
|
+
logger.info("Model optimization completed (pass-through for now)")
|
274
|
+
return model_path
|
275
|
+
|
276
|
+
async def _prepare_deployment_artifacts(self, config: DeploymentConfig, model_path: Path) -> Path:
|
277
|
+
"""Prepare deployment artifacts"""
|
278
|
+
logger.info("Preparing deployment artifacts...")
|
279
|
+
|
280
|
+
# Create deployment workspace
|
281
|
+
deployment_workspace = self.workspace_dir / config.deployment_id
|
282
|
+
deployment_workspace.mkdir(exist_ok=True)
|
283
|
+
|
284
|
+
artifacts = {
|
285
|
+
"config": config.to_dict(),
|
286
|
+
"model_path": str(model_path),
|
287
|
+
"created_at": datetime.now().isoformat()
|
288
|
+
}
|
289
|
+
|
290
|
+
# Save deployment artifacts
|
291
|
+
with open(deployment_workspace / "deployment_config.json", 'w') as f:
|
292
|
+
json.dump(artifacts, f, indent=2)
|
293
|
+
|
294
|
+
# Generate Triton model configuration if needed
|
295
|
+
if config.inference_engine == InferenceEngine.TRITON:
|
296
|
+
await self._generate_triton_config(config, deployment_workspace, model_path)
|
297
|
+
|
298
|
+
# Generate Docker configuration if needed
|
299
|
+
await self._generate_docker_config(config, deployment_workspace)
|
300
|
+
|
301
|
+
logger.info(f"Deployment artifacts prepared at: {deployment_workspace}")
|
302
|
+
return deployment_workspace
|
303
|
+
|
304
|
+
async def _generate_triton_config(self, config: DeploymentConfig, workspace: Path, model_path: Path):
|
305
|
+
"""Generate Triton model configuration"""
|
306
|
+
logger.info("Generating Triton model configuration...")
|
307
|
+
|
308
|
+
triton_config = config.triton_config
|
309
|
+
model_config = config.model_config
|
310
|
+
|
311
|
+
# Create model repository structure
|
312
|
+
model_repo = workspace / "model_repository"
|
313
|
+
model_dir = model_repo / triton_config.model_name / "1"
|
314
|
+
model_dir.mkdir(parents=True, exist_ok=True)
|
315
|
+
|
316
|
+
# Copy model files
|
317
|
+
import shutil
|
318
|
+
if model_path.is_file():
|
319
|
+
shutil.copy2(model_path, model_dir)
|
320
|
+
else:
|
321
|
+
shutil.copytree(model_path, model_dir / "model", dirs_exist_ok=True)
|
322
|
+
|
323
|
+
# Generate config.pbtxt
|
324
|
+
config_content = f"""
|
325
|
+
name: "{triton_config.model_name}"
|
326
|
+
backend: "{triton_config.backend}"
|
327
|
+
max_batch_size: {triton_config.max_batch_size}
|
328
|
+
|
329
|
+
input [
|
330
|
+
{{
|
331
|
+
name: "input_ids"
|
332
|
+
data_type: TYPE_INT32
|
333
|
+
dims: [ -1 ]
|
334
|
+
}},
|
335
|
+
{{
|
336
|
+
name: "attention_mask"
|
337
|
+
data_type: TYPE_INT32
|
338
|
+
dims: [ -1 ]
|
339
|
+
optional: true
|
340
|
+
}}
|
341
|
+
]
|
342
|
+
|
343
|
+
output [
|
344
|
+
{{
|
345
|
+
name: "output"
|
346
|
+
data_type: TYPE_STRING
|
347
|
+
dims: [ -1 ]
|
348
|
+
}}
|
349
|
+
]
|
350
|
+
|
351
|
+
instance_group [
|
352
|
+
{{
|
353
|
+
count: {triton_config.instance_group_count}
|
354
|
+
kind: {triton_config.instance_group_kind}
|
355
|
+
}}
|
356
|
+
]
|
357
|
+
|
358
|
+
dynamic_batching {{
|
359
|
+
max_queue_delay_microseconds: 100
|
360
|
+
}}
|
361
|
+
"""
|
362
|
+
|
363
|
+
with open(model_repo / triton_config.model_name / "config.pbtxt", 'w') as f:
|
364
|
+
f.write(config_content.strip())
|
365
|
+
|
366
|
+
logger.info("Triton configuration generated")
|
367
|
+
|
368
|
+
async def _generate_docker_config(self, config: DeploymentConfig, workspace: Path):
|
369
|
+
"""Generate Docker configuration"""
|
370
|
+
logger.info("Generating Docker configuration...")
|
371
|
+
|
372
|
+
# Generate Dockerfile
|
373
|
+
dockerfile_content = f"""
|
374
|
+
FROM {config.runpod_config.container_image if config.runpod_config else 'nvidia/tritonserver:23.10-py3'}
|
375
|
+
|
376
|
+
WORKDIR /workspace
|
377
|
+
|
378
|
+
# Copy model repository
|
379
|
+
COPY model_repository /models
|
380
|
+
|
381
|
+
# Copy deployment configuration
|
382
|
+
COPY deployment_config.json /workspace/
|
383
|
+
|
384
|
+
# Set environment variables
|
385
|
+
ENV TRITON_MODEL_REPOSITORY=/models
|
386
|
+
ENV CUDA_VISIBLE_DEVICES=0
|
387
|
+
|
388
|
+
# Expose Triton ports
|
389
|
+
EXPOSE 8000 8001 8002
|
390
|
+
|
391
|
+
# Health check
|
392
|
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \\
|
393
|
+
CMD curl -f http://localhost:8000/v2/health/ready || exit 1
|
394
|
+
|
395
|
+
# Start Triton server
|
396
|
+
CMD ["tritonserver", "--model-repository=/models", "--allow-http=true", "--allow-grpc=true", "--allow-metrics=true"]
|
397
|
+
"""
|
398
|
+
|
399
|
+
with open(workspace / "Dockerfile", 'w') as f:
|
400
|
+
f.write(dockerfile_content.strip())
|
401
|
+
|
402
|
+
# Generate docker-compose.yml for local testing
|
403
|
+
compose_content = f"""
|
404
|
+
version: '3.8'
|
405
|
+
|
406
|
+
services:
|
407
|
+
triton-server:
|
408
|
+
build: .
|
409
|
+
ports:
|
410
|
+
- "8000:8000"
|
411
|
+
- "8001:8001"
|
412
|
+
- "8002:8002"
|
413
|
+
environment:
|
414
|
+
- CUDA_VISIBLE_DEVICES=0
|
415
|
+
volumes:
|
416
|
+
- ./model_repository:/models
|
417
|
+
deploy:
|
418
|
+
resources:
|
419
|
+
reservations:
|
420
|
+
devices:
|
421
|
+
- driver: nvidia
|
422
|
+
count: 1
|
423
|
+
capabilities: [gpu]
|
424
|
+
"""
|
425
|
+
|
426
|
+
with open(workspace / "docker-compose.yml", 'w') as f:
|
427
|
+
f.write(compose_content.strip())
|
428
|
+
|
429
|
+
logger.info("Docker configuration generated")
|
430
|
+
|
431
|
+
async def _deploy_to_provider(self, config: DeploymentConfig, artifacts_path: Path) -> Dict[str, Any]:
|
432
|
+
"""Deploy to the specified provider"""
|
433
|
+
logger.info(f"Deploying to provider: {config.provider.value}")
|
434
|
+
|
435
|
+
if config.provider == DeploymentProvider.RUNPOD_SERVERLESS:
|
436
|
+
return await self._deploy_to_runpod_serverless(config, artifacts_path)
|
437
|
+
elif config.provider == DeploymentProvider.LOCAL:
|
438
|
+
return await self._deploy_locally(config, artifacts_path)
|
439
|
+
else:
|
440
|
+
raise ValueError(f"Provider {config.provider} not yet implemented")
|
441
|
+
|
442
|
+
async def _deploy_to_runpod_serverless(self, config: DeploymentConfig, artifacts_path: Path) -> Dict[str, Any]:
|
443
|
+
"""Deploy to RunPod Serverless"""
|
444
|
+
logger.info("Deploying to RunPod Serverless...")
|
445
|
+
|
446
|
+
# TODO: Implement RunPod Serverless deployment
|
447
|
+
# This would involve:
|
448
|
+
# 1. Building and pushing Docker image
|
449
|
+
# 2. Creating RunPod serverless endpoint
|
450
|
+
# 3. Configuring scaling and networking
|
451
|
+
|
452
|
+
# For now, return mock result
|
453
|
+
result = {
|
454
|
+
"provider": "runpod_serverless",
|
455
|
+
"endpoint_id": f"mock-endpoint-{config.deployment_id}",
|
456
|
+
"endpoint_url": f"https://api.runpod.ai/v2/{config.deployment_id}/run",
|
457
|
+
"status": "deployed",
|
458
|
+
"deployed_at": datetime.now().isoformat()
|
459
|
+
}
|
460
|
+
|
461
|
+
logger.info(f"RunPod deployment completed: {result['endpoint_url']}")
|
462
|
+
return result
|
463
|
+
|
464
|
+
async def _deploy_locally(self, config: DeploymentConfig, artifacts_path: Path) -> Dict[str, Any]:
|
465
|
+
"""Deploy locally using Docker"""
|
466
|
+
logger.info("Deploying locally using Docker...")
|
467
|
+
|
468
|
+
# TODO: Implement local Docker deployment
|
469
|
+
result = {
|
470
|
+
"provider": "local",
|
471
|
+
"endpoint_url": "http://localhost:8000",
|
472
|
+
"status": "deployed",
|
473
|
+
"deployed_at": datetime.now().isoformat(),
|
474
|
+
"container_id": f"triton-{config.deployment_id}"
|
475
|
+
}
|
476
|
+
|
477
|
+
logger.info(f"Local deployment completed: {result['endpoint_url']}")
|
478
|
+
return result
|
479
|
+
|
480
|
+
async def _register_deployment(self, config: DeploymentConfig, deployment_result: Dict[str, Any]):
|
481
|
+
"""Register deployment in tracking system"""
|
482
|
+
logger.info("Registering deployment...")
|
483
|
+
|
484
|
+
deployment_info = {
|
485
|
+
"config": config.to_dict(),
|
486
|
+
"result": deployment_result,
|
487
|
+
"status": "active",
|
488
|
+
"created_at": datetime.now().isoformat(),
|
489
|
+
"updated_at": datetime.now().isoformat()
|
490
|
+
}
|
491
|
+
|
492
|
+
self.deployments[config.deployment_id] = deployment_info
|
493
|
+
self._save_deployments()
|
494
|
+
|
495
|
+
logger.info(f"Deployment registered: {config.deployment_id}")
|
496
|
+
|
497
|
+
async def list_deployments(self) -> List[Dict[str, Any]]:
|
498
|
+
"""List all deployments"""
|
499
|
+
return [
|
500
|
+
{
|
501
|
+
"deployment_id": deployment_id,
|
502
|
+
**info
|
503
|
+
}
|
504
|
+
for deployment_id, info in self.deployments.items()
|
505
|
+
]
|
506
|
+
|
507
|
+
async def get_deployment(self, deployment_id: str) -> Optional[Dict[str, Any]]:
|
508
|
+
"""Get deployment information"""
|
509
|
+
return self.deployments.get(deployment_id)
|
510
|
+
|
511
|
+
async def delete_deployment(self, deployment_id: str) -> bool:
|
512
|
+
"""Delete a deployment"""
|
513
|
+
logger.info(f"Deleting deployment: {deployment_id}")
|
514
|
+
|
515
|
+
try:
|
516
|
+
if deployment_id in self.deployments:
|
517
|
+
# TODO: Implement actual provider cleanup
|
518
|
+
|
519
|
+
# Remove from tracking
|
520
|
+
del self.deployments[deployment_id]
|
521
|
+
self._save_deployments()
|
522
|
+
|
523
|
+
# Clean up workspace
|
524
|
+
deployment_workspace = self.workspace_dir / deployment_id
|
525
|
+
if deployment_workspace.exists():
|
526
|
+
import shutil
|
527
|
+
shutil.rmtree(deployment_workspace)
|
528
|
+
|
529
|
+
logger.info(f"Deployment deleted: {deployment_id}")
|
530
|
+
return True
|
531
|
+
else:
|
532
|
+
logger.warning(f"Deployment not found: {deployment_id}")
|
533
|
+
return False
|
534
|
+
|
535
|
+
except Exception as e:
|
536
|
+
logger.error(f"Failed to delete deployment {deployment_id}: {e}")
|
537
|
+
return False
|
538
|
+
|
539
|
+
async def update_deployment_status(self, deployment_id: str, status: str, **kwargs):
|
540
|
+
"""Update deployment status"""
|
541
|
+
if deployment_id in self.deployments:
|
542
|
+
self.deployments[deployment_id]["status"] = status
|
543
|
+
self.deployments[deployment_id]["updated_at"] = datetime.now().isoformat()
|
544
|
+
|
545
|
+
for key, value in kwargs.items():
|
546
|
+
self.deployments[deployment_id][key] = value
|
547
|
+
|
548
|
+
self._save_deployments()
|
549
|
+
logger.info(f"Updated deployment {deployment_id} status to {status}")
|