isa-model 0.4.0__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +40 -17
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/storage/hf_storage.py +1 -1
  26. isa_model/core/types.py +1 -0
  27. isa_model/deployment/__init__.py +5 -48
  28. isa_model/deployment/core/__init__.py +2 -31
  29. isa_model/deployment/core/deployment_manager.py +1278 -370
  30. isa_model/deployment/local/__init__.py +31 -0
  31. isa_model/deployment/local/config.py +248 -0
  32. isa_model/deployment/local/gpu_gateway.py +607 -0
  33. isa_model/deployment/local/health_checker.py +428 -0
  34. isa_model/deployment/local/provider.py +586 -0
  35. isa_model/deployment/local/tensorrt_service.py +621 -0
  36. isa_model/deployment/local/transformers_service.py +644 -0
  37. isa_model/deployment/local/vllm_service.py +527 -0
  38. isa_model/deployment/modal/__init__.py +8 -0
  39. isa_model/deployment/modal/config.py +136 -0
  40. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  41. isa_model/deployment/modal/services/__init__.py +3 -0
  42. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  43. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  44. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  45. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  46. isa_model/deployment/modal/services/video/__init__.py +1 -0
  47. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  48. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  49. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  50. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  51. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  52. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  53. isa_model/deployment/storage/__init__.py +5 -0
  54. isa_model/deployment/storage/deployment_repository.py +824 -0
  55. isa_model/deployment/triton/__init__.py +10 -0
  56. isa_model/deployment/triton/config.py +196 -0
  57. isa_model/deployment/triton/configs/__init__.py +1 -0
  58. isa_model/deployment/triton/provider.py +512 -0
  59. isa_model/deployment/triton/scripts/__init__.py +1 -0
  60. isa_model/deployment/triton/templates/__init__.py +1 -0
  61. isa_model/inference/__init__.py +47 -1
  62. isa_model/inference/ai_factory.py +137 -10
  63. isa_model/inference/legacy_services/__init__.py +21 -0
  64. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  65. isa_model/inference/legacy_services/model_service.py +573 -0
  66. isa_model/inference/legacy_services/model_serving.py +717 -0
  67. isa_model/inference/legacy_services/model_training.py +561 -0
  68. isa_model/inference/models/__init__.py +21 -0
  69. isa_model/inference/models/inference_config.py +551 -0
  70. isa_model/inference/models/inference_record.py +675 -0
  71. isa_model/inference/models/performance_models.py +714 -0
  72. isa_model/inference/repositories/__init__.py +9 -0
  73. isa_model/inference/repositories/inference_repository.py +828 -0
  74. isa_model/inference/services/audio/base_stt_service.py +184 -11
  75. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  76. isa_model/inference/services/custom_model_manager.py +277 -0
  77. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  78. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  79. isa_model/inference/services/llm/__init__.py +10 -2
  80. isa_model/inference/services/llm/base_llm_service.py +335 -24
  81. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  82. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  83. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  84. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  85. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  86. isa_model/inference/services/llm/local_llm_service.py +747 -0
  87. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  88. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  89. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  90. isa_model/inference/services/vision/__init__.py +22 -1
  91. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  92. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  93. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  94. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  95. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  96. isa_model/serving/api/cache_manager.py +245 -0
  97. isa_model/serving/api/dependencies/__init__.py +1 -0
  98. isa_model/serving/api/dependencies/auth.py +194 -0
  99. isa_model/serving/api/dependencies/database.py +139 -0
  100. isa_model/serving/api/error_handlers.py +284 -0
  101. isa_model/serving/api/fastapi_server.py +172 -22
  102. isa_model/serving/api/middleware/auth.py +8 -2
  103. isa_model/serving/api/middleware/security.py +23 -33
  104. isa_model/serving/api/middleware/tenant_context.py +414 -0
  105. isa_model/serving/api/routes/analytics.py +4 -1
  106. isa_model/serving/api/routes/config.py +645 -0
  107. isa_model/serving/api/routes/deployment_billing.py +315 -0
  108. isa_model/serving/api/routes/deployments.py +138 -2
  109. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  110. isa_model/serving/api/routes/health.py +32 -12
  111. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  112. isa_model/serving/api/routes/local_deployments.py +448 -0
  113. isa_model/serving/api/routes/tenants.py +575 -0
  114. isa_model/serving/api/routes/unified.py +680 -18
  115. isa_model/serving/api/routes/webhooks.py +479 -0
  116. isa_model/serving/api/startup.py +68 -54
  117. isa_model/utils/gpu_utils.py +311 -0
  118. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/METADATA +66 -24
  119. isa_model-0.4.3.dist-info/RECORD +193 -0
  120. isa_model/core/storage/minio_storage.py +0 -0
  121. isa_model/deployment/cloud/__init__.py +0 -9
  122. isa_model/deployment/cloud/modal/__init__.py +0 -10
  123. isa_model/deployment/core/deployment_config.py +0 -356
  124. isa_model/deployment/core/isa_deployment_service.py +0 -401
  125. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  126. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  127. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  128. isa_model/deployment/runtime/deployed_service.py +0 -338
  129. isa_model/deployment/services/__init__.py +0 -9
  130. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  131. isa_model/deployment/services/model_service.py +0 -332
  132. isa_model/deployment/services/service_monitor.py +0 -356
  133. isa_model/deployment/services/service_registry.py +0 -527
  134. isa_model/eval/__init__.py +0 -92
  135. isa_model/eval/benchmarks/__init__.py +0 -27
  136. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  137. isa_model/eval/benchmarks.py +0 -701
  138. isa_model/eval/config/__init__.py +0 -10
  139. isa_model/eval/config/evaluation_config.py +0 -108
  140. isa_model/eval/evaluators/__init__.py +0 -24
  141. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  142. isa_model/eval/evaluators/base_evaluator.py +0 -503
  143. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  144. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  145. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  146. isa_model/eval/example_evaluation.py +0 -395
  147. isa_model/eval/factory.py +0 -798
  148. isa_model/eval/infrastructure/__init__.py +0 -24
  149. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  150. isa_model/eval/isa_benchmarks.py +0 -700
  151. isa_model/eval/isa_integration.py +0 -582
  152. isa_model/eval/metrics.py +0 -951
  153. isa_model/eval/tests/unit/test_basic.py +0 -396
  154. isa_model/serving/api/routes/evaluations.py +0 -579
  155. isa_model/training/__init__.py +0 -168
  156. isa_model/training/annotation/annotation_schema.py +0 -47
  157. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  158. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  159. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  160. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  161. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  162. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  163. isa_model/training/annotation/views/annotation_controller.py +0 -158
  164. isa_model/training/cloud/__init__.py +0 -22
  165. isa_model/training/cloud/job_orchestrator.py +0 -402
  166. isa_model/training/cloud/runpod_trainer.py +0 -454
  167. isa_model/training/cloud/storage_manager.py +0 -482
  168. isa_model/training/core/__init__.py +0 -26
  169. isa_model/training/core/config.py +0 -181
  170. isa_model/training/core/dataset.py +0 -222
  171. isa_model/training/core/trainer.py +0 -720
  172. isa_model/training/core/utils.py +0 -213
  173. isa_model/training/examples/intelligent_training_example.py +0 -281
  174. isa_model/training/factory.py +0 -424
  175. isa_model/training/intelligent/__init__.py +0 -25
  176. isa_model/training/intelligent/decision_engine.py +0 -643
  177. isa_model/training/intelligent/intelligent_factory.py +0 -888
  178. isa_model/training/intelligent/knowledge_base.py +0 -751
  179. isa_model/training/intelligent/resource_optimizer.py +0 -839
  180. isa_model/training/intelligent/task_classifier.py +0 -576
  181. isa_model/training/storage/__init__.py +0 -24
  182. isa_model/training/storage/core_integration.py +0 -439
  183. isa_model/training/storage/training_repository.py +0 -552
  184. isa_model/training/storage/training_storage.py +0 -628
  185. isa_model-0.4.0.dist-info/RECORD +0 -182
  186. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  187. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  188. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  189. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  190. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  191. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  192. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  193. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  194. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  195. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  196. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  197. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  198. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  199. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,512 @@
1
+ """
2
+ Triton deployment provider
3
+
4
+ Handles deployment of models to Triton Inference Server with TensorRT-LLM optimization.
5
+ """
6
+
7
+ import os
8
+ import json
9
+ import logging
10
+ import subprocess
11
+ import tempfile
12
+ from typing import Dict, List, Optional, Any
13
+ from pathlib import Path
14
+ from datetime import datetime
15
+ import asyncio
16
+ import docker
17
+
18
+ from .config import TritonConfig, TritonServiceType, TritonBackend
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class TritonProvider:
24
+ """
25
+ Provider for deploying models to Triton Inference Server with TensorRT-LLM.
26
+
27
+ This provider handles:
28
+ - Model conversion to TensorRT engines
29
+ - Triton model configuration generation
30
+ - Docker container deployment
31
+ - Health monitoring and scaling
32
+ """
33
+
34
+ def __init__(self, workspace_dir: str = "./triton_deployments"):
35
+ """
36
+ Initialize Triton provider.
37
+
38
+ Args:
39
+ workspace_dir: Directory for deployment artifacts
40
+ """
41
+ self.workspace_dir = Path(workspace_dir)
42
+ self.workspace_dir.mkdir(parents=True, exist_ok=True)
43
+
44
+ # Initialize Docker client
45
+ try:
46
+ self.docker_client = docker.from_env()
47
+ except Exception as e:
48
+ logger.warning(f"Docker client initialization failed: {e}")
49
+ self.docker_client = None
50
+
51
+ # Deployment tracking
52
+ self.deployments: Dict[str, Dict[str, Any]] = {}
53
+
54
+ logger.info("Triton provider initialized")
55
+ logger.info(f"Workspace directory: {self.workspace_dir}")
56
+
57
+ async def deploy(self, config: TritonConfig) -> Dict[str, Any]:
58
+ """
59
+ Deploy a model to Triton Inference Server.
60
+
61
+ Args:
62
+ config: Triton deployment configuration
63
+
64
+ Returns:
65
+ Deployment result with endpoint information
66
+ """
67
+ deployment_id = f"{config.service_name}-triton-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
68
+
69
+ logger.info("=" * 60)
70
+ logger.info(f"STARTING TRITON DEPLOYMENT: {deployment_id}")
71
+ logger.info("=" * 60)
72
+
73
+ try:
74
+ # Step 1: Prepare workspace
75
+ logger.info("Step 1/6: Preparing deployment workspace...")
76
+ workspace = await self._prepare_workspace(deployment_id, config)
77
+
78
+ # Step 2: Download HF model
79
+ logger.info("Step 2/6: Downloading HuggingFace model...")
80
+ hf_model_path = await self._download_hf_model(config, workspace)
81
+
82
+ # Step 3: Convert to TensorRT engine (if needed)
83
+ if config.use_tensorrt and config.service_type == TritonServiceType.LLM:
84
+ logger.info("Step 3/6: Converting model to TensorRT engine...")
85
+ engine_path = await self._build_tensorrt_engine(config, workspace, hf_model_path)
86
+ else:
87
+ logger.info("Step 3/6: Skipping TensorRT conversion...")
88
+ engine_path = hf_model_path
89
+
90
+ # Step 4: Generate Triton model configuration
91
+ logger.info("Step 4/6: Generating Triton model configuration...")
92
+ await self._generate_triton_config(config, workspace, engine_path)
93
+
94
+ # Step 5: Deploy container
95
+ logger.info("Step 5/6: Deploying Triton container...")
96
+ container_info = await self._deploy_container(config, workspace)
97
+
98
+ # Step 6: Verify deployment
99
+ logger.info("Step 6/6: Verifying deployment...")
100
+ endpoint_url = await self._verify_deployment(config, container_info)
101
+
102
+ result = {
103
+ "provider": "triton",
104
+ "deployment_id": deployment_id,
105
+ "service_name": config.service_name,
106
+ "service_type": config.service_type.value,
107
+ "endpoint_url": endpoint_url,
108
+ "container_id": container_info.get("container_id"),
109
+ "status": "deployed",
110
+ "deployed_at": datetime.now().isoformat()
111
+ }
112
+
113
+ # Register deployment
114
+ self.deployments[deployment_id] = {
115
+ "config": config.to_dict(),
116
+ "result": result,
117
+ "workspace": str(workspace)
118
+ }
119
+
120
+ logger.info("=" * 60)
121
+ logger.info("TRITON DEPLOYMENT COMPLETED SUCCESSFULLY!")
122
+ logger.info("=" * 60)
123
+ logger.info(f"Deployment ID: {deployment_id}")
124
+ logger.info(f"Endpoint URL: {endpoint_url}")
125
+
126
+ return result
127
+
128
+ except Exception as e:
129
+ logger.error("=" * 60)
130
+ logger.error("TRITON DEPLOYMENT FAILED!")
131
+ logger.error("=" * 60)
132
+ logger.error(f"Error: {e}")
133
+ raise
134
+
135
+ async def _prepare_workspace(self, deployment_id: str, config: TritonConfig) -> Path:
136
+ """Prepare deployment workspace"""
137
+ workspace = self.workspace_dir / deployment_id
138
+ workspace.mkdir(exist_ok=True)
139
+
140
+ # Create required directories
141
+ (workspace / "hf_model").mkdir(exist_ok=True)
142
+ (workspace / "engines").mkdir(exist_ok=True)
143
+ (workspace / "model_repository" / config.model_name / config.model_version).mkdir(parents=True, exist_ok=True)
144
+
145
+ # Save deployment config
146
+ with open(workspace / "deployment_config.json", 'w') as f:
147
+ json.dump(config.to_dict(), f, indent=2)
148
+
149
+ logger.info(f"Workspace prepared at: {workspace}")
150
+ return workspace
151
+
152
+ async def _download_hf_model(self, config: TritonConfig, workspace: Path) -> Path:
153
+ """Download HuggingFace model"""
154
+ hf_model_path = workspace / "hf_model"
155
+
156
+ # Use git clone or huggingface_hub to download
157
+ try:
158
+ from huggingface_hub import snapshot_download
159
+
160
+ logger.info(f"Downloading model: {config.model_id}")
161
+ snapshot_download(
162
+ repo_id=config.model_id,
163
+ local_dir=str(hf_model_path),
164
+ local_dir_use_symlinks=False
165
+ )
166
+
167
+ logger.info(f"Model downloaded to: {hf_model_path}")
168
+ return hf_model_path
169
+
170
+ except Exception as e:
171
+ logger.error(f"Failed to download model: {e}")
172
+ raise
173
+
174
+ async def _build_tensorrt_engine(self, config: TritonConfig, workspace: Path, hf_model_path: Path) -> Path:
175
+ """Build TensorRT engine using Docker"""
176
+ engine_output_path = workspace / "engines"
177
+
178
+ logger.info("Building TensorRT engine using Docker...")
179
+
180
+ # Prepare build command
181
+ build_options = config.build_options
182
+ build_cmd_parts = [
183
+ "trtllm-build",
184
+ f"--checkpoint_dir /workspace/hf_model",
185
+ f"--output_dir /workspace/engines",
186
+ ]
187
+
188
+ # Add build options
189
+ for key, value in build_options.items():
190
+ if isinstance(value, bool):
191
+ if value:
192
+ build_cmd_parts.append(f"--{key}")
193
+ else:
194
+ build_cmd_parts.append(f"--{key} {value}")
195
+
196
+ build_cmd = " && ".join([
197
+ "set -e",
198
+ "echo '>>> Building TensorRT engine...'",
199
+ " ".join(build_cmd_parts),
200
+ "echo '>>> TensorRT engine build completed!'"
201
+ ])
202
+
203
+ # Run Docker container for building
204
+ if self.docker_client:
205
+ try:
206
+ logger.info("Starting TensorRT build container...")
207
+
208
+ container = self.docker_client.containers.run(
209
+ config.build_container_image,
210
+ command=f"bash -c \"{build_cmd}\"",
211
+ volumes={
212
+ str(hf_model_path): {"bind": "/workspace/hf_model", "mode": "ro"},
213
+ str(engine_output_path): {"bind": "/workspace/engines", "mode": "rw"}
214
+ },
215
+ device_requests=[
216
+ docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])
217
+ ],
218
+ remove=True,
219
+ detach=False
220
+ )
221
+
222
+ logger.info("TensorRT engine build completed")
223
+
224
+ except Exception as e:
225
+ logger.error(f"TensorRT build failed: {e}")
226
+ raise
227
+ else:
228
+ # Fallback to subprocess if Docker client unavailable
229
+ logger.warning("Docker client unavailable, using subprocess...")
230
+ # Implementation would depend on having docker command available
231
+ raise RuntimeError("Docker client required for TensorRT build")
232
+
233
+ return engine_output_path
234
+
235
+ async def _generate_triton_config(self, config: TritonConfig, workspace: Path, model_path: Path):
236
+ """Generate Triton model configuration"""
237
+ model_repo_path = workspace / "model_repository" / config.model_name
238
+
239
+ # Generate config.pbtxt
240
+ if config.backend == TritonBackend.TENSORRT_LLM:
241
+ config_content = self._generate_tensorrt_llm_config(config)
242
+ elif config.backend == TritonBackend.PYTHON:
243
+ config_content = self._generate_python_backend_config(config)
244
+ else:
245
+ raise ValueError(f"Unsupported backend: {config.backend}")
246
+
247
+ # Write config file
248
+ with open(model_repo_path / "config.pbtxt", 'w') as f:
249
+ f.write(config_content)
250
+
251
+ # Copy model files to model repository
252
+ model_version_path = model_repo_path / config.model_version
253
+ if config.use_tensorrt:
254
+ # Copy engine files
255
+ import shutil
256
+ if (model_path / "model.engine").exists():
257
+ shutil.copy2(model_path / "model.engine", model_version_path)
258
+ else:
259
+ # Copy all engine files
260
+ for engine_file in model_path.glob("*.engine"):
261
+ shutil.copy2(engine_file, model_version_path)
262
+ else:
263
+ # Copy HF model files
264
+ import shutil
265
+ shutil.copytree(model_path, model_version_path / "model", dirs_exist_ok=True)
266
+
267
+ logger.info(f"Triton configuration generated at: {model_repo_path}")
268
+
269
+ def _generate_tensorrt_llm_config(self, config: TritonConfig) -> str:
270
+ """Generate TensorRT-LLM backend configuration"""
271
+ return f'''name: "{config.model_name}"
272
+ backend: "tensorrtllm"
273
+ max_batch_size: {config.max_batch_size}
274
+
275
+ {"decoupled: true" if config.enable_streaming else ""}
276
+
277
+ input [
278
+ {{
279
+ name: "text_input"
280
+ data_type: TYPE_STRING
281
+ dims: [ -1 ]
282
+ }},
283
+ {{
284
+ name: "max_new_tokens"
285
+ data_type: TYPE_UINT32
286
+ dims: [ 1 ]
287
+ optional: true
288
+ }},
289
+ {{
290
+ name: "stream"
291
+ data_type: TYPE_BOOL
292
+ dims: [ 1 ]
293
+ optional: true
294
+ }},
295
+ {{
296
+ name: "temperature"
297
+ data_type: TYPE_FP32
298
+ dims: [ 1 ]
299
+ optional: true
300
+ }},
301
+ {{
302
+ name: "top_p"
303
+ data_type: TYPE_FP32
304
+ dims: [ 1 ]
305
+ optional: true
306
+ }}
307
+ ]
308
+
309
+ output [
310
+ {{
311
+ name: "text_output"
312
+ data_type: TYPE_STRING
313
+ dims: [ -1 ]
314
+ }}
315
+ ]
316
+
317
+ instance_group [
318
+ {{
319
+ count: {config.instance_group_count}
320
+ kind: {config.instance_group_kind}
321
+ }}
322
+ ]
323
+
324
+ parameters {{
325
+ key: "model_type"
326
+ value: {{ string_value: "{"inflight_batching_llm" if config.use_inflight_batching else "llm"}" }}
327
+ }}
328
+
329
+ parameters {{
330
+ key: "max_tokens_in_paged_kv_cache"
331
+ value: {{ string_value: "{config.max_sequence_length * config.max_batch_size}" }}
332
+ }}'''
333
+
334
+ def _generate_python_backend_config(self, config: TritonConfig) -> str:
335
+ """Generate Python backend configuration"""
336
+ return f'''name: "{config.model_name}"
337
+ backend: "python"
338
+ max_batch_size: {config.max_batch_size}
339
+
340
+ input [
341
+ {{
342
+ name: "input"
343
+ data_type: TYPE_STRING
344
+ dims: [ -1 ]
345
+ }}
346
+ ]
347
+
348
+ output [
349
+ {{
350
+ name: "output"
351
+ data_type: TYPE_STRING
352
+ dims: [ -1 ]
353
+ }}
354
+ ]
355
+
356
+ instance_group [
357
+ {{
358
+ count: {config.instance_group_count}
359
+ kind: {config.instance_group_kind}
360
+ }}
361
+ ]'''
362
+
363
+ async def _deploy_container(self, config: TritonConfig, workspace: Path) -> Dict[str, Any]:
364
+ """Deploy Triton container"""
365
+ if not self.docker_client:
366
+ raise RuntimeError("Docker client required for container deployment")
367
+
368
+ # Generate docker-compose.yml
369
+ await self._generate_docker_compose(config, workspace)
370
+
371
+ # Deploy using docker-compose
372
+ compose_file = workspace / "docker-compose.yml"
373
+
374
+ try:
375
+ # Run docker-compose up
376
+ cmd = f"cd {workspace} && docker-compose up -d"
377
+ result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
378
+
379
+ if result.returncode != 0:
380
+ raise RuntimeError(f"Docker compose failed: {result.stderr}")
381
+
382
+ logger.info("Triton container deployed successfully")
383
+
384
+ return {
385
+ "container_id": f"triton-{config.service_name}",
386
+ "compose_file": str(compose_file)
387
+ }
388
+
389
+ except Exception as e:
390
+ logger.error(f"Container deployment failed: {e}")
391
+ raise
392
+
393
+ async def _generate_docker_compose(self, config: TritonConfig, workspace: Path):
394
+ """Generate docker-compose.yml for Triton deployment"""
395
+ compose_content = f'''version: '3.8'
396
+
397
+ services:
398
+ triton-{config.service_name}:
399
+ image: {config.container_image}
400
+ ports:
401
+ - "{config.http_port}:{config.http_port}"
402
+ - "{config.grpc_port}:{config.grpc_port}"
403
+ - "{config.metrics_port}:{config.metrics_port}"
404
+ volumes:
405
+ - ./model_repository:/models
406
+ environment:
407
+ - CUDA_VISIBLE_DEVICES=0
408
+ {self._format_env_vars(config.environment)}
409
+ command: >
410
+ tritonserver
411
+ --model-repository=/models
412
+ --allow-http=true
413
+ --allow-grpc=true
414
+ --allow-metrics=true
415
+ --http-port={config.http_port}
416
+ --grpc-port={config.grpc_port}
417
+ --metrics-port={config.metrics_port}
418
+ --log-verbose=1
419
+ deploy:
420
+ resources:
421
+ reservations:
422
+ devices:
423
+ - driver: nvidia
424
+ count: {config.gpu_count}
425
+ capabilities: [gpu]
426
+ healthcheck:
427
+ test: ["CMD", "curl", "-f", "http://localhost:{config.http_port}/v2/health/ready"]
428
+ interval: 30s
429
+ timeout: 10s
430
+ retries: 3
431
+ start_period: 60s
432
+ '''
433
+
434
+ with open(workspace / "docker-compose.yml", 'w') as f:
435
+ f.write(compose_content)
436
+
437
+ logger.info("Docker compose configuration generated")
438
+
439
+ def _format_env_vars(self, env_vars: Dict[str, str]) -> str:
440
+ """Format environment variables for docker-compose"""
441
+ if not env_vars:
442
+ return ""
443
+
444
+ formatted = []
445
+ for key, value in env_vars.items():
446
+ formatted.append(f" - {key}={value}")
447
+
448
+ return "\n" + "\n".join(formatted)
449
+
450
+ async def _verify_deployment(self, config: TritonConfig, container_info: Dict[str, Any]) -> str:
451
+ """Verify deployment is healthy"""
452
+ import time
453
+ import requests
454
+
455
+ endpoint_url = f"http://localhost:{config.http_port}"
456
+ health_url = f"{endpoint_url}/v2/health/ready"
457
+
458
+ # Wait for service to be ready
459
+ max_retries = 30
460
+ for i in range(max_retries):
461
+ try:
462
+ response = requests.get(health_url, timeout=5)
463
+ if response.status_code == 200:
464
+ logger.info("Triton service is healthy and ready")
465
+ return endpoint_url
466
+ except Exception:
467
+ pass
468
+
469
+ if i < max_retries - 1:
470
+ logger.info(f"Waiting for Triton service... ({i+1}/{max_retries})")
471
+ time.sleep(10)
472
+
473
+ raise RuntimeError("Triton service failed to become ready")
474
+
475
+ async def list_deployments(self) -> List[Dict[str, Any]]:
476
+ """List all Triton deployments"""
477
+ return [
478
+ {
479
+ "deployment_id": deployment_id,
480
+ **info
481
+ }
482
+ for deployment_id, info in self.deployments.items()
483
+ ]
484
+
485
+ async def delete_deployment(self, deployment_id: str) -> bool:
486
+ """Delete a Triton deployment"""
487
+ if deployment_id not in self.deployments:
488
+ return False
489
+
490
+ try:
491
+ deployment_info = self.deployments[deployment_id]
492
+ workspace = Path(deployment_info["workspace"])
493
+
494
+ # Stop docker-compose services
495
+ if (workspace / "docker-compose.yml").exists():
496
+ cmd = f"cd {workspace} && docker-compose down"
497
+ subprocess.run(cmd, shell=True, capture_output=True)
498
+
499
+ # Clean up workspace
500
+ import shutil
501
+ if workspace.exists():
502
+ shutil.rmtree(workspace)
503
+
504
+ # Remove from tracking
505
+ del self.deployments[deployment_id]
506
+
507
+ logger.info(f"Triton deployment deleted: {deployment_id}")
508
+ return True
509
+
510
+ except Exception as e:
511
+ logger.error(f"Failed to delete Triton deployment {deployment_id}: {e}")
512
+ return False
@@ -0,0 +1 @@
1
+ """Triton deployment scripts"""
@@ -0,0 +1 @@
1
+ """Triton deployment templates"""
@@ -8,4 +8,50 @@ This module provides the main inference components for the IsA Model system.
8
8
  from .ai_factory import AIFactory
9
9
  from .base import ModelType, Capability, RoutingStrategy
10
10
 
11
- __all__ = ["AIFactory", "ModelType", "Capability", "RoutingStrategy"]
11
+ # Import legacy model services (migrated from isA_MCP)
12
+ try:
13
+ from .legacy_services import (
14
+ ModelTrainingService,
15
+ TrainingConfig,
16
+ TrainingResult,
17
+ ModelEvaluationService,
18
+ EvaluationResult,
19
+ ModelServingService,
20
+ ServingResult,
21
+ ModelService,
22
+ ModelConfig,
23
+ ModelResult
24
+ )
25
+ LEGACY_SERVICES_AVAILABLE = True
26
+ except ImportError:
27
+ LEGACY_SERVICES_AVAILABLE = False
28
+ ModelTrainingService = None
29
+ TrainingConfig = None
30
+ TrainingResult = None
31
+ ModelEvaluationService = None
32
+ EvaluationResult = None
33
+ ModelServingService = None
34
+ ServingResult = None
35
+ ModelService = None
36
+ ModelConfig = None
37
+ ModelResult = None
38
+
39
+ __all__ = [
40
+ "AIFactory",
41
+ "ModelType",
42
+ "Capability",
43
+ "RoutingStrategy",
44
+
45
+ # Legacy model services (migrated from isA_MCP)
46
+ 'ModelTrainingService',
47
+ 'TrainingConfig',
48
+ 'TrainingResult',
49
+ 'ModelEvaluationService',
50
+ 'EvaluationResult',
51
+ 'ModelServingService',
52
+ 'ServingResult',
53
+ 'ModelService',
54
+ 'ModelConfig',
55
+ 'ModelResult',
56
+ 'LEGACY_SERVICES_AVAILABLE'
57
+ ]