isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. isa_model/client.py +1166 -584
  2. isa_model/core/cache/redis_cache.py +410 -0
  3. isa_model/core/config/config_manager.py +282 -12
  4. isa_model/core/config.py +91 -1
  5. isa_model/core/database/__init__.py +1 -0
  6. isa_model/core/database/direct_db_client.py +114 -0
  7. isa_model/core/database/migration_manager.py +563 -0
  8. isa_model/core/database/migrations.py +297 -0
  9. isa_model/core/database/supabase_client.py +258 -0
  10. isa_model/core/dependencies.py +316 -0
  11. isa_model/core/discovery/__init__.py +19 -0
  12. isa_model/core/discovery/consul_discovery.py +190 -0
  13. isa_model/core/logging/__init__.py +54 -0
  14. isa_model/core/logging/influx_logger.py +523 -0
  15. isa_model/core/logging/loki_logger.py +160 -0
  16. isa_model/core/models/__init__.py +46 -0
  17. isa_model/core/models/config_models.py +625 -0
  18. isa_model/core/models/deployment_billing_tracker.py +430 -0
  19. isa_model/core/models/model_billing_tracker.py +60 -88
  20. isa_model/core/models/model_manager.py +66 -25
  21. isa_model/core/models/model_metadata.py +690 -0
  22. isa_model/core/models/model_repo.py +217 -55
  23. isa_model/core/models/model_statistics_tracker.py +234 -0
  24. isa_model/core/models/model_storage.py +0 -1
  25. isa_model/core/models/model_version_manager.py +959 -0
  26. isa_model/core/models/system_models.py +857 -0
  27. isa_model/core/pricing_manager.py +2 -249
  28. isa_model/core/repositories/__init__.py +9 -0
  29. isa_model/core/repositories/config_repository.py +912 -0
  30. isa_model/core/resilience/circuit_breaker.py +366 -0
  31. isa_model/core/security/secrets.py +358 -0
  32. isa_model/core/services/__init__.py +2 -4
  33. isa_model/core/services/intelligent_model_selector.py +479 -370
  34. isa_model/core/storage/hf_storage.py +2 -2
  35. isa_model/core/types.py +8 -0
  36. isa_model/deployment/__init__.py +5 -48
  37. isa_model/deployment/core/__init__.py +2 -31
  38. isa_model/deployment/core/deployment_manager.py +1278 -368
  39. isa_model/deployment/local/__init__.py +31 -0
  40. isa_model/deployment/local/config.py +248 -0
  41. isa_model/deployment/local/gpu_gateway.py +607 -0
  42. isa_model/deployment/local/health_checker.py +428 -0
  43. isa_model/deployment/local/provider.py +586 -0
  44. isa_model/deployment/local/tensorrt_service.py +621 -0
  45. isa_model/deployment/local/transformers_service.py +644 -0
  46. isa_model/deployment/local/vllm_service.py +527 -0
  47. isa_model/deployment/modal/__init__.py +8 -0
  48. isa_model/deployment/modal/config.py +136 -0
  49. isa_model/deployment/modal/deployer.py +894 -0
  50. isa_model/deployment/modal/services/__init__.py +3 -0
  51. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  52. isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
  53. isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
  54. isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
  55. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  56. isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
  57. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  58. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  59. isa_model/deployment/modal/services/video/__init__.py +1 -0
  60. isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
  61. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  62. isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
  63. isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
  64. isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
  65. isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
  66. isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
  67. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  68. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  69. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  70. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  71. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  72. isa_model/deployment/storage/__init__.py +5 -0
  73. isa_model/deployment/storage/deployment_repository.py +824 -0
  74. isa_model/deployment/triton/__init__.py +10 -0
  75. isa_model/deployment/triton/config.py +196 -0
  76. isa_model/deployment/triton/configs/__init__.py +1 -0
  77. isa_model/deployment/triton/provider.py +512 -0
  78. isa_model/deployment/triton/scripts/__init__.py +1 -0
  79. isa_model/deployment/triton/templates/__init__.py +1 -0
  80. isa_model/inference/__init__.py +47 -1
  81. isa_model/inference/ai_factory.py +179 -16
  82. isa_model/inference/legacy_services/__init__.py +21 -0
  83. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  84. isa_model/inference/legacy_services/model_service.py +573 -0
  85. isa_model/inference/legacy_services/model_serving.py +717 -0
  86. isa_model/inference/legacy_services/model_training.py +561 -0
  87. isa_model/inference/models/__init__.py +21 -0
  88. isa_model/inference/models/inference_config.py +551 -0
  89. isa_model/inference/models/inference_record.py +675 -0
  90. isa_model/inference/models/performance_models.py +714 -0
  91. isa_model/inference/repositories/__init__.py +9 -0
  92. isa_model/inference/repositories/inference_repository.py +828 -0
  93. isa_model/inference/services/audio/__init__.py +21 -0
  94. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  95. isa_model/inference/services/audio/base_stt_service.py +184 -11
  96. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  97. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  98. isa_model/inference/services/audio/openai_stt_service.py +53 -11
  99. isa_model/inference/services/base_service.py +17 -1
  100. isa_model/inference/services/custom_model_manager.py +277 -0
  101. isa_model/inference/services/embedding/__init__.py +13 -0
  102. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  103. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  104. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  105. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  106. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  107. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  108. isa_model/inference/services/img/__init__.py +2 -2
  109. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  110. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  111. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  112. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  113. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  114. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  115. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  116. isa_model/inference/services/llm/__init__.py +10 -2
  117. isa_model/inference/services/llm/base_llm_service.py +361 -26
  118. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  119. isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
  120. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  121. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  122. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  123. isa_model/inference/services/llm/local_llm_service.py +747 -0
  124. isa_model/inference/services/llm/ollama_llm_service.py +11 -3
  125. isa_model/inference/services/llm/openai_llm_service.py +670 -56
  126. isa_model/inference/services/llm/yyds_llm_service.py +10 -3
  127. isa_model/inference/services/vision/__init__.py +27 -6
  128. isa_model/inference/services/vision/base_vision_service.py +118 -185
  129. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  130. isa_model/inference/services/vision/helpers/image_utils.py +19 -10
  131. isa_model/inference/services/vision/isa_vision_service.py +634 -0
  132. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  133. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  134. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  135. isa_model/serving/api/cache_manager.py +245 -0
  136. isa_model/serving/api/dependencies/__init__.py +1 -0
  137. isa_model/serving/api/dependencies/auth.py +194 -0
  138. isa_model/serving/api/dependencies/database.py +139 -0
  139. isa_model/serving/api/error_handlers.py +284 -0
  140. isa_model/serving/api/fastapi_server.py +240 -18
  141. isa_model/serving/api/middleware/auth.py +317 -0
  142. isa_model/serving/api/middleware/security.py +268 -0
  143. isa_model/serving/api/middleware/tenant_context.py +414 -0
  144. isa_model/serving/api/routes/analytics.py +489 -0
  145. isa_model/serving/api/routes/config.py +645 -0
  146. isa_model/serving/api/routes/deployment_billing.py +315 -0
  147. isa_model/serving/api/routes/deployments.py +475 -0
  148. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  149. isa_model/serving/api/routes/health.py +32 -12
  150. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  151. isa_model/serving/api/routes/local_deployments.py +448 -0
  152. isa_model/serving/api/routes/logs.py +430 -0
  153. isa_model/serving/api/routes/settings.py +582 -0
  154. isa_model/serving/api/routes/tenants.py +575 -0
  155. isa_model/serving/api/routes/unified.py +992 -171
  156. isa_model/serving/api/routes/webhooks.py +479 -0
  157. isa_model/serving/api/startup.py +318 -0
  158. isa_model/serving/modal_proxy_server.py +249 -0
  159. isa_model/utils/gpu_utils.py +311 -0
  160. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
  161. isa_model-0.4.3.dist-info/RECORD +193 -0
  162. isa_model/deployment/cloud/__init__.py +0 -9
  163. isa_model/deployment/cloud/modal/__init__.py +0 -10
  164. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  165. isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
  166. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
  167. isa_model/deployment/cloud/modal/register_models.py +0 -321
  168. isa_model/deployment/core/deployment_config.py +0 -356
  169. isa_model/deployment/core/isa_deployment_service.py +0 -401
  170. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  171. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  172. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  173. isa_model/deployment/runtime/deployed_service.py +0 -338
  174. isa_model/deployment/services/__init__.py +0 -9
  175. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  176. isa_model/deployment/services/model_service.py +0 -332
  177. isa_model/deployment/services/service_monitor.py +0 -356
  178. isa_model/deployment/services/service_registry.py +0 -527
  179. isa_model/eval/__init__.py +0 -92
  180. isa_model/eval/benchmarks.py +0 -469
  181. isa_model/eval/config/__init__.py +0 -10
  182. isa_model/eval/config/evaluation_config.py +0 -108
  183. isa_model/eval/evaluators/__init__.py +0 -18
  184. isa_model/eval/evaluators/base_evaluator.py +0 -503
  185. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  186. isa_model/eval/factory.py +0 -531
  187. isa_model/eval/infrastructure/__init__.py +0 -24
  188. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  189. isa_model/eval/metrics.py +0 -798
  190. isa_model/inference/adapter/unified_api.py +0 -248
  191. isa_model/inference/services/helpers/stacked_config.py +0 -148
  192. isa_model/inference/services/img/flux_professional_service.py +0 -603
  193. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  194. isa_model/inference/services/others/table_transformer_service.py +0 -61
  195. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  196. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  197. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  198. isa_model/scripts/inference_tracker.py +0 -283
  199. isa_model/scripts/mlflow_manager.py +0 -379
  200. isa_model/scripts/model_registry.py +0 -465
  201. isa_model/scripts/register_models.py +0 -370
  202. isa_model/scripts/register_models_with_embeddings.py +0 -510
  203. isa_model/scripts/start_mlflow.py +0 -95
  204. isa_model/scripts/training_tracker.py +0 -257
  205. isa_model/training/__init__.py +0 -74
  206. isa_model/training/annotation/annotation_schema.py +0 -47
  207. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  208. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  209. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  210. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  211. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  212. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  213. isa_model/training/annotation/views/annotation_controller.py +0 -158
  214. isa_model/training/cloud/__init__.py +0 -22
  215. isa_model/training/cloud/job_orchestrator.py +0 -402
  216. isa_model/training/cloud/runpod_trainer.py +0 -454
  217. isa_model/training/cloud/storage_manager.py +0 -482
  218. isa_model/training/core/__init__.py +0 -23
  219. isa_model/training/core/config.py +0 -181
  220. isa_model/training/core/dataset.py +0 -222
  221. isa_model/training/core/trainer.py +0 -720
  222. isa_model/training/core/utils.py +0 -213
  223. isa_model/training/factory.py +0 -424
  224. isa_model-0.3.91.dist-info/RECORD +0 -138
  225. /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
  226. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  227. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  228. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,512 @@
1
+ """
2
+ Triton deployment provider
3
+
4
+ Handles deployment of models to Triton Inference Server with TensorRT-LLM optimization.
5
+ """
6
+
7
+ import os
8
+ import json
9
+ import logging
10
+ import subprocess
11
+ import tempfile
12
+ from typing import Dict, List, Optional, Any
13
+ from pathlib import Path
14
+ from datetime import datetime
15
+ import asyncio
16
+ import docker
17
+
18
+ from .config import TritonConfig, TritonServiceType, TritonBackend
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class TritonProvider:
24
+ """
25
+ Provider for deploying models to Triton Inference Server with TensorRT-LLM.
26
+
27
+ This provider handles:
28
+ - Model conversion to TensorRT engines
29
+ - Triton model configuration generation
30
+ - Docker container deployment
31
+ - Health monitoring and scaling
32
+ """
33
+
34
+ def __init__(self, workspace_dir: str = "./triton_deployments"):
35
+ """
36
+ Initialize Triton provider.
37
+
38
+ Args:
39
+ workspace_dir: Directory for deployment artifacts
40
+ """
41
+ self.workspace_dir = Path(workspace_dir)
42
+ self.workspace_dir.mkdir(parents=True, exist_ok=True)
43
+
44
+ # Initialize Docker client
45
+ try:
46
+ self.docker_client = docker.from_env()
47
+ except Exception as e:
48
+ logger.warning(f"Docker client initialization failed: {e}")
49
+ self.docker_client = None
50
+
51
+ # Deployment tracking
52
+ self.deployments: Dict[str, Dict[str, Any]] = {}
53
+
54
+ logger.info("Triton provider initialized")
55
+ logger.info(f"Workspace directory: {self.workspace_dir}")
56
+
57
+ async def deploy(self, config: TritonConfig) -> Dict[str, Any]:
58
+ """
59
+ Deploy a model to Triton Inference Server.
60
+
61
+ Args:
62
+ config: Triton deployment configuration
63
+
64
+ Returns:
65
+ Deployment result with endpoint information
66
+ """
67
+ deployment_id = f"{config.service_name}-triton-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
68
+
69
+ logger.info("=" * 60)
70
+ logger.info(f"STARTING TRITON DEPLOYMENT: {deployment_id}")
71
+ logger.info("=" * 60)
72
+
73
+ try:
74
+ # Step 1: Prepare workspace
75
+ logger.info("Step 1/6: Preparing deployment workspace...")
76
+ workspace = await self._prepare_workspace(deployment_id, config)
77
+
78
+ # Step 2: Download HF model
79
+ logger.info("Step 2/6: Downloading HuggingFace model...")
80
+ hf_model_path = await self._download_hf_model(config, workspace)
81
+
82
+ # Step 3: Convert to TensorRT engine (if needed)
83
+ if config.use_tensorrt and config.service_type == TritonServiceType.LLM:
84
+ logger.info("Step 3/6: Converting model to TensorRT engine...")
85
+ engine_path = await self._build_tensorrt_engine(config, workspace, hf_model_path)
86
+ else:
87
+ logger.info("Step 3/6: Skipping TensorRT conversion...")
88
+ engine_path = hf_model_path
89
+
90
+ # Step 4: Generate Triton model configuration
91
+ logger.info("Step 4/6: Generating Triton model configuration...")
92
+ await self._generate_triton_config(config, workspace, engine_path)
93
+
94
+ # Step 5: Deploy container
95
+ logger.info("Step 5/6: Deploying Triton container...")
96
+ container_info = await self._deploy_container(config, workspace)
97
+
98
+ # Step 6: Verify deployment
99
+ logger.info("Step 6/6: Verifying deployment...")
100
+ endpoint_url = await self._verify_deployment(config, container_info)
101
+
102
+ result = {
103
+ "provider": "triton",
104
+ "deployment_id": deployment_id,
105
+ "service_name": config.service_name,
106
+ "service_type": config.service_type.value,
107
+ "endpoint_url": endpoint_url,
108
+ "container_id": container_info.get("container_id"),
109
+ "status": "deployed",
110
+ "deployed_at": datetime.now().isoformat()
111
+ }
112
+
113
+ # Register deployment
114
+ self.deployments[deployment_id] = {
115
+ "config": config.to_dict(),
116
+ "result": result,
117
+ "workspace": str(workspace)
118
+ }
119
+
120
+ logger.info("=" * 60)
121
+ logger.info("TRITON DEPLOYMENT COMPLETED SUCCESSFULLY!")
122
+ logger.info("=" * 60)
123
+ logger.info(f"Deployment ID: {deployment_id}")
124
+ logger.info(f"Endpoint URL: {endpoint_url}")
125
+
126
+ return result
127
+
128
+ except Exception as e:
129
+ logger.error("=" * 60)
130
+ logger.error("TRITON DEPLOYMENT FAILED!")
131
+ logger.error("=" * 60)
132
+ logger.error(f"Error: {e}")
133
+ raise
134
+
135
+ async def _prepare_workspace(self, deployment_id: str, config: TritonConfig) -> Path:
136
+ """Prepare deployment workspace"""
137
+ workspace = self.workspace_dir / deployment_id
138
+ workspace.mkdir(exist_ok=True)
139
+
140
+ # Create required directories
141
+ (workspace / "hf_model").mkdir(exist_ok=True)
142
+ (workspace / "engines").mkdir(exist_ok=True)
143
+ (workspace / "model_repository" / config.model_name / config.model_version).mkdir(parents=True, exist_ok=True)
144
+
145
+ # Save deployment config
146
+ with open(workspace / "deployment_config.json", 'w') as f:
147
+ json.dump(config.to_dict(), f, indent=2)
148
+
149
+ logger.info(f"Workspace prepared at: {workspace}")
150
+ return workspace
151
+
152
+ async def _download_hf_model(self, config: TritonConfig, workspace: Path) -> Path:
153
+ """Download HuggingFace model"""
154
+ hf_model_path = workspace / "hf_model"
155
+
156
+ # Use git clone or huggingface_hub to download
157
+ try:
158
+ from huggingface_hub import snapshot_download
159
+
160
+ logger.info(f"Downloading model: {config.model_id}")
161
+ snapshot_download(
162
+ repo_id=config.model_id,
163
+ local_dir=str(hf_model_path),
164
+ local_dir_use_symlinks=False
165
+ )
166
+
167
+ logger.info(f"Model downloaded to: {hf_model_path}")
168
+ return hf_model_path
169
+
170
+ except Exception as e:
171
+ logger.error(f"Failed to download model: {e}")
172
+ raise
173
+
174
+ async def _build_tensorrt_engine(self, config: TritonConfig, workspace: Path, hf_model_path: Path) -> Path:
175
+ """Build TensorRT engine using Docker"""
176
+ engine_output_path = workspace / "engines"
177
+
178
+ logger.info("Building TensorRT engine using Docker...")
179
+
180
+ # Prepare build command
181
+ build_options = config.build_options
182
+ build_cmd_parts = [
183
+ "trtllm-build",
184
+ f"--checkpoint_dir /workspace/hf_model",
185
+ f"--output_dir /workspace/engines",
186
+ ]
187
+
188
+ # Add build options
189
+ for key, value in build_options.items():
190
+ if isinstance(value, bool):
191
+ if value:
192
+ build_cmd_parts.append(f"--{key}")
193
+ else:
194
+ build_cmd_parts.append(f"--{key} {value}")
195
+
196
+ build_cmd = " && ".join([
197
+ "set -e",
198
+ "echo '>>> Building TensorRT engine...'",
199
+ " ".join(build_cmd_parts),
200
+ "echo '>>> TensorRT engine build completed!'"
201
+ ])
202
+
203
+ # Run Docker container for building
204
+ if self.docker_client:
205
+ try:
206
+ logger.info("Starting TensorRT build container...")
207
+
208
+ container = self.docker_client.containers.run(
209
+ config.build_container_image,
210
+ command=f"bash -c \"{build_cmd}\"",
211
+ volumes={
212
+ str(hf_model_path): {"bind": "/workspace/hf_model", "mode": "ro"},
213
+ str(engine_output_path): {"bind": "/workspace/engines", "mode": "rw"}
214
+ },
215
+ device_requests=[
216
+ docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])
217
+ ],
218
+ remove=True,
219
+ detach=False
220
+ )
221
+
222
+ logger.info("TensorRT engine build completed")
223
+
224
+ except Exception as e:
225
+ logger.error(f"TensorRT build failed: {e}")
226
+ raise
227
+ else:
228
+ # Fallback to subprocess if Docker client unavailable
229
+ logger.warning("Docker client unavailable, using subprocess...")
230
+ # Implementation would depend on having docker command available
231
+ raise RuntimeError("Docker client required for TensorRT build")
232
+
233
+ return engine_output_path
234
+
235
+ async def _generate_triton_config(self, config: TritonConfig, workspace: Path, model_path: Path):
236
+ """Generate Triton model configuration"""
237
+ model_repo_path = workspace / "model_repository" / config.model_name
238
+
239
+ # Generate config.pbtxt
240
+ if config.backend == TritonBackend.TENSORRT_LLM:
241
+ config_content = self._generate_tensorrt_llm_config(config)
242
+ elif config.backend == TritonBackend.PYTHON:
243
+ config_content = self._generate_python_backend_config(config)
244
+ else:
245
+ raise ValueError(f"Unsupported backend: {config.backend}")
246
+
247
+ # Write config file
248
+ with open(model_repo_path / "config.pbtxt", 'w') as f:
249
+ f.write(config_content)
250
+
251
+ # Copy model files to model repository
252
+ model_version_path = model_repo_path / config.model_version
253
+ if config.use_tensorrt:
254
+ # Copy engine files
255
+ import shutil
256
+ if (model_path / "model.engine").exists():
257
+ shutil.copy2(model_path / "model.engine", model_version_path)
258
+ else:
259
+ # Copy all engine files
260
+ for engine_file in model_path.glob("*.engine"):
261
+ shutil.copy2(engine_file, model_version_path)
262
+ else:
263
+ # Copy HF model files
264
+ import shutil
265
+ shutil.copytree(model_path, model_version_path / "model", dirs_exist_ok=True)
266
+
267
+ logger.info(f"Triton configuration generated at: {model_repo_path}")
268
+
269
+ def _generate_tensorrt_llm_config(self, config: TritonConfig) -> str:
270
+ """Generate TensorRT-LLM backend configuration"""
271
+ return f'''name: "{config.model_name}"
272
+ backend: "tensorrtllm"
273
+ max_batch_size: {config.max_batch_size}
274
+
275
+ {"decoupled: true" if config.enable_streaming else ""}
276
+
277
+ input [
278
+ {{
279
+ name: "text_input"
280
+ data_type: TYPE_STRING
281
+ dims: [ -1 ]
282
+ }},
283
+ {{
284
+ name: "max_new_tokens"
285
+ data_type: TYPE_UINT32
286
+ dims: [ 1 ]
287
+ optional: true
288
+ }},
289
+ {{
290
+ name: "stream"
291
+ data_type: TYPE_BOOL
292
+ dims: [ 1 ]
293
+ optional: true
294
+ }},
295
+ {{
296
+ name: "temperature"
297
+ data_type: TYPE_FP32
298
+ dims: [ 1 ]
299
+ optional: true
300
+ }},
301
+ {{
302
+ name: "top_p"
303
+ data_type: TYPE_FP32
304
+ dims: [ 1 ]
305
+ optional: true
306
+ }}
307
+ ]
308
+
309
+ output [
310
+ {{
311
+ name: "text_output"
312
+ data_type: TYPE_STRING
313
+ dims: [ -1 ]
314
+ }}
315
+ ]
316
+
317
+ instance_group [
318
+ {{
319
+ count: {config.instance_group_count}
320
+ kind: {config.instance_group_kind}
321
+ }}
322
+ ]
323
+
324
+ parameters {{
325
+ key: "model_type"
326
+ value: {{ string_value: "{"inflight_batching_llm" if config.use_inflight_batching else "llm"}" }}
327
+ }}
328
+
329
+ parameters {{
330
+ key: "max_tokens_in_paged_kv_cache"
331
+ value: {{ string_value: "{config.max_sequence_length * config.max_batch_size}" }}
332
+ }}'''
333
+
334
+ def _generate_python_backend_config(self, config: TritonConfig) -> str:
335
+ """Generate Python backend configuration"""
336
+ return f'''name: "{config.model_name}"
337
+ backend: "python"
338
+ max_batch_size: {config.max_batch_size}
339
+
340
+ input [
341
+ {{
342
+ name: "input"
343
+ data_type: TYPE_STRING
344
+ dims: [ -1 ]
345
+ }}
346
+ ]
347
+
348
+ output [
349
+ {{
350
+ name: "output"
351
+ data_type: TYPE_STRING
352
+ dims: [ -1 ]
353
+ }}
354
+ ]
355
+
356
+ instance_group [
357
+ {{
358
+ count: {config.instance_group_count}
359
+ kind: {config.instance_group_kind}
360
+ }}
361
+ ]'''
362
+
363
+ async def _deploy_container(self, config: TritonConfig, workspace: Path) -> Dict[str, Any]:
364
+ """Deploy Triton container"""
365
+ if not self.docker_client:
366
+ raise RuntimeError("Docker client required for container deployment")
367
+
368
+ # Generate docker-compose.yml
369
+ await self._generate_docker_compose(config, workspace)
370
+
371
+ # Deploy using docker-compose
372
+ compose_file = workspace / "docker-compose.yml"
373
+
374
+ try:
375
+ # Run docker-compose up
376
+ cmd = f"cd {workspace} && docker-compose up -d"
377
+ result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
378
+
379
+ if result.returncode != 0:
380
+ raise RuntimeError(f"Docker compose failed: {result.stderr}")
381
+
382
+ logger.info("Triton container deployed successfully")
383
+
384
+ return {
385
+ "container_id": f"triton-{config.service_name}",
386
+ "compose_file": str(compose_file)
387
+ }
388
+
389
+ except Exception as e:
390
+ logger.error(f"Container deployment failed: {e}")
391
+ raise
392
+
393
+ async def _generate_docker_compose(self, config: TritonConfig, workspace: Path):
394
+ """Generate docker-compose.yml for Triton deployment"""
395
+ compose_content = f'''version: '3.8'
396
+
397
+ services:
398
+ triton-{config.service_name}:
399
+ image: {config.container_image}
400
+ ports:
401
+ - "{config.http_port}:{config.http_port}"
402
+ - "{config.grpc_port}:{config.grpc_port}"
403
+ - "{config.metrics_port}:{config.metrics_port}"
404
+ volumes:
405
+ - ./model_repository:/models
406
+ environment:
407
+ - CUDA_VISIBLE_DEVICES=0
408
+ {self._format_env_vars(config.environment)}
409
+ command: >
410
+ tritonserver
411
+ --model-repository=/models
412
+ --allow-http=true
413
+ --allow-grpc=true
414
+ --allow-metrics=true
415
+ --http-port={config.http_port}
416
+ --grpc-port={config.grpc_port}
417
+ --metrics-port={config.metrics_port}
418
+ --log-verbose=1
419
+ deploy:
420
+ resources:
421
+ reservations:
422
+ devices:
423
+ - driver: nvidia
424
+ count: {config.gpu_count}
425
+ capabilities: [gpu]
426
+ healthcheck:
427
+ test: ["CMD", "curl", "-f", "http://localhost:{config.http_port}/v2/health/ready"]
428
+ interval: 30s
429
+ timeout: 10s
430
+ retries: 3
431
+ start_period: 60s
432
+ '''
433
+
434
+ with open(workspace / "docker-compose.yml", 'w') as f:
435
+ f.write(compose_content)
436
+
437
+ logger.info("Docker compose configuration generated")
438
+
439
+ def _format_env_vars(self, env_vars: Dict[str, str]) -> str:
440
+ """Format environment variables for docker-compose"""
441
+ if not env_vars:
442
+ return ""
443
+
444
+ formatted = []
445
+ for key, value in env_vars.items():
446
+ formatted.append(f" - {key}={value}")
447
+
448
+ return "\n" + "\n".join(formatted)
449
+
450
+ async def _verify_deployment(self, config: TritonConfig, container_info: Dict[str, Any]) -> str:
451
+ """Verify deployment is healthy"""
452
+ import time
453
+ import requests
454
+
455
+ endpoint_url = f"http://localhost:{config.http_port}"
456
+ health_url = f"{endpoint_url}/v2/health/ready"
457
+
458
+ # Wait for service to be ready
459
+ max_retries = 30
460
+ for i in range(max_retries):
461
+ try:
462
+ response = requests.get(health_url, timeout=5)
463
+ if response.status_code == 200:
464
+ logger.info("Triton service is healthy and ready")
465
+ return endpoint_url
466
+ except Exception:
467
+ pass
468
+
469
+ if i < max_retries - 1:
470
+ logger.info(f"Waiting for Triton service... ({i+1}/{max_retries})")
471
+ time.sleep(10)
472
+
473
+ raise RuntimeError("Triton service failed to become ready")
474
+
475
+ async def list_deployments(self) -> List[Dict[str, Any]]:
476
+ """List all Triton deployments"""
477
+ return [
478
+ {
479
+ "deployment_id": deployment_id,
480
+ **info
481
+ }
482
+ for deployment_id, info in self.deployments.items()
483
+ ]
484
+
485
+ async def delete_deployment(self, deployment_id: str) -> bool:
486
+ """Delete a Triton deployment"""
487
+ if deployment_id not in self.deployments:
488
+ return False
489
+
490
+ try:
491
+ deployment_info = self.deployments[deployment_id]
492
+ workspace = Path(deployment_info["workspace"])
493
+
494
+ # Stop docker-compose services
495
+ if (workspace / "docker-compose.yml").exists():
496
+ cmd = f"cd {workspace} && docker-compose down"
497
+ subprocess.run(cmd, shell=True, capture_output=True)
498
+
499
+ # Clean up workspace
500
+ import shutil
501
+ if workspace.exists():
502
+ shutil.rmtree(workspace)
503
+
504
+ # Remove from tracking
505
+ del self.deployments[deployment_id]
506
+
507
+ logger.info(f"Triton deployment deleted: {deployment_id}")
508
+ return True
509
+
510
+ except Exception as e:
511
+ logger.error(f"Failed to delete Triton deployment {deployment_id}: {e}")
512
+ return False
@@ -0,0 +1 @@
1
+ """Triton deployment scripts"""
@@ -0,0 +1 @@
1
+ """Triton deployment templates"""
@@ -8,4 +8,50 @@ This module provides the main inference components for the IsA Model system.
8
8
  from .ai_factory import AIFactory
9
9
  from .base import ModelType, Capability, RoutingStrategy
10
10
 
11
- __all__ = ["AIFactory", "ModelType", "Capability", "RoutingStrategy"]
11
+ # Import legacy model services (migrated from isA_MCP)
12
+ try:
13
+ from .legacy_services import (
14
+ ModelTrainingService,
15
+ TrainingConfig,
16
+ TrainingResult,
17
+ ModelEvaluationService,
18
+ EvaluationResult,
19
+ ModelServingService,
20
+ ServingResult,
21
+ ModelService,
22
+ ModelConfig,
23
+ ModelResult
24
+ )
25
+ LEGACY_SERVICES_AVAILABLE = True
26
+ except ImportError:
27
+ LEGACY_SERVICES_AVAILABLE = False
28
+ ModelTrainingService = None
29
+ TrainingConfig = None
30
+ TrainingResult = None
31
+ ModelEvaluationService = None
32
+ EvaluationResult = None
33
+ ModelServingService = None
34
+ ServingResult = None
35
+ ModelService = None
36
+ ModelConfig = None
37
+ ModelResult = None
38
+
39
+ __all__ = [
40
+ "AIFactory",
41
+ "ModelType",
42
+ "Capability",
43
+ "RoutingStrategy",
44
+
45
+ # Legacy model services (migrated from isA_MCP)
46
+ 'ModelTrainingService',
47
+ 'TrainingConfig',
48
+ 'TrainingResult',
49
+ 'ModelEvaluationService',
50
+ 'EvaluationResult',
51
+ 'ModelServingService',
52
+ 'ServingResult',
53
+ 'ModelService',
54
+ 'ModelConfig',
55
+ 'ModelResult',
56
+ 'LEGACY_SERVICES_AVAILABLE'
57
+ ]