isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. isa_model/client.py +1166 -584
  2. isa_model/core/cache/redis_cache.py +410 -0
  3. isa_model/core/config/config_manager.py +282 -12
  4. isa_model/core/config.py +91 -1
  5. isa_model/core/database/__init__.py +1 -0
  6. isa_model/core/database/direct_db_client.py +114 -0
  7. isa_model/core/database/migration_manager.py +563 -0
  8. isa_model/core/database/migrations.py +297 -0
  9. isa_model/core/database/supabase_client.py +258 -0
  10. isa_model/core/dependencies.py +316 -0
  11. isa_model/core/discovery/__init__.py +19 -0
  12. isa_model/core/discovery/consul_discovery.py +190 -0
  13. isa_model/core/logging/__init__.py +54 -0
  14. isa_model/core/logging/influx_logger.py +523 -0
  15. isa_model/core/logging/loki_logger.py +160 -0
  16. isa_model/core/models/__init__.py +46 -0
  17. isa_model/core/models/config_models.py +625 -0
  18. isa_model/core/models/deployment_billing_tracker.py +430 -0
  19. isa_model/core/models/model_billing_tracker.py +60 -88
  20. isa_model/core/models/model_manager.py +66 -25
  21. isa_model/core/models/model_metadata.py +690 -0
  22. isa_model/core/models/model_repo.py +217 -55
  23. isa_model/core/models/model_statistics_tracker.py +234 -0
  24. isa_model/core/models/model_storage.py +0 -1
  25. isa_model/core/models/model_version_manager.py +959 -0
  26. isa_model/core/models/system_models.py +857 -0
  27. isa_model/core/pricing_manager.py +2 -249
  28. isa_model/core/repositories/__init__.py +9 -0
  29. isa_model/core/repositories/config_repository.py +912 -0
  30. isa_model/core/resilience/circuit_breaker.py +366 -0
  31. isa_model/core/security/secrets.py +358 -0
  32. isa_model/core/services/__init__.py +2 -4
  33. isa_model/core/services/intelligent_model_selector.py +479 -370
  34. isa_model/core/storage/hf_storage.py +2 -2
  35. isa_model/core/types.py +8 -0
  36. isa_model/deployment/__init__.py +5 -48
  37. isa_model/deployment/core/__init__.py +2 -31
  38. isa_model/deployment/core/deployment_manager.py +1278 -368
  39. isa_model/deployment/local/__init__.py +31 -0
  40. isa_model/deployment/local/config.py +248 -0
  41. isa_model/deployment/local/gpu_gateway.py +607 -0
  42. isa_model/deployment/local/health_checker.py +428 -0
  43. isa_model/deployment/local/provider.py +586 -0
  44. isa_model/deployment/local/tensorrt_service.py +621 -0
  45. isa_model/deployment/local/transformers_service.py +644 -0
  46. isa_model/deployment/local/vllm_service.py +527 -0
  47. isa_model/deployment/modal/__init__.py +8 -0
  48. isa_model/deployment/modal/config.py +136 -0
  49. isa_model/deployment/modal/deployer.py +894 -0
  50. isa_model/deployment/modal/services/__init__.py +3 -0
  51. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  52. isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
  53. isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
  54. isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
  55. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  56. isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
  57. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  58. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  59. isa_model/deployment/modal/services/video/__init__.py +1 -0
  60. isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
  61. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  62. isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
  63. isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
  64. isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
  65. isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
  66. isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
  67. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  68. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  69. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  70. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  71. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  72. isa_model/deployment/storage/__init__.py +5 -0
  73. isa_model/deployment/storage/deployment_repository.py +824 -0
  74. isa_model/deployment/triton/__init__.py +10 -0
  75. isa_model/deployment/triton/config.py +196 -0
  76. isa_model/deployment/triton/configs/__init__.py +1 -0
  77. isa_model/deployment/triton/provider.py +512 -0
  78. isa_model/deployment/triton/scripts/__init__.py +1 -0
  79. isa_model/deployment/triton/templates/__init__.py +1 -0
  80. isa_model/inference/__init__.py +47 -1
  81. isa_model/inference/ai_factory.py +179 -16
  82. isa_model/inference/legacy_services/__init__.py +21 -0
  83. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  84. isa_model/inference/legacy_services/model_service.py +573 -0
  85. isa_model/inference/legacy_services/model_serving.py +717 -0
  86. isa_model/inference/legacy_services/model_training.py +561 -0
  87. isa_model/inference/models/__init__.py +21 -0
  88. isa_model/inference/models/inference_config.py +551 -0
  89. isa_model/inference/models/inference_record.py +675 -0
  90. isa_model/inference/models/performance_models.py +714 -0
  91. isa_model/inference/repositories/__init__.py +9 -0
  92. isa_model/inference/repositories/inference_repository.py +828 -0
  93. isa_model/inference/services/audio/__init__.py +21 -0
  94. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  95. isa_model/inference/services/audio/base_stt_service.py +184 -11
  96. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  97. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  98. isa_model/inference/services/audio/openai_stt_service.py +53 -11
  99. isa_model/inference/services/base_service.py +17 -1
  100. isa_model/inference/services/custom_model_manager.py +277 -0
  101. isa_model/inference/services/embedding/__init__.py +13 -0
  102. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  103. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  104. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  105. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  106. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  107. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  108. isa_model/inference/services/img/__init__.py +2 -2
  109. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  110. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  111. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  112. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  113. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  114. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  115. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  116. isa_model/inference/services/llm/__init__.py +10 -2
  117. isa_model/inference/services/llm/base_llm_service.py +361 -26
  118. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  119. isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
  120. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  121. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  122. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  123. isa_model/inference/services/llm/local_llm_service.py +747 -0
  124. isa_model/inference/services/llm/ollama_llm_service.py +11 -3
  125. isa_model/inference/services/llm/openai_llm_service.py +670 -56
  126. isa_model/inference/services/llm/yyds_llm_service.py +10 -3
  127. isa_model/inference/services/vision/__init__.py +27 -6
  128. isa_model/inference/services/vision/base_vision_service.py +118 -185
  129. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  130. isa_model/inference/services/vision/helpers/image_utils.py +19 -10
  131. isa_model/inference/services/vision/isa_vision_service.py +634 -0
  132. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  133. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  134. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  135. isa_model/serving/api/cache_manager.py +245 -0
  136. isa_model/serving/api/dependencies/__init__.py +1 -0
  137. isa_model/serving/api/dependencies/auth.py +194 -0
  138. isa_model/serving/api/dependencies/database.py +139 -0
  139. isa_model/serving/api/error_handlers.py +284 -0
  140. isa_model/serving/api/fastapi_server.py +240 -18
  141. isa_model/serving/api/middleware/auth.py +317 -0
  142. isa_model/serving/api/middleware/security.py +268 -0
  143. isa_model/serving/api/middleware/tenant_context.py +414 -0
  144. isa_model/serving/api/routes/analytics.py +489 -0
  145. isa_model/serving/api/routes/config.py +645 -0
  146. isa_model/serving/api/routes/deployment_billing.py +315 -0
  147. isa_model/serving/api/routes/deployments.py +475 -0
  148. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  149. isa_model/serving/api/routes/health.py +32 -12
  150. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  151. isa_model/serving/api/routes/local_deployments.py +448 -0
  152. isa_model/serving/api/routes/logs.py +430 -0
  153. isa_model/serving/api/routes/settings.py +582 -0
  154. isa_model/serving/api/routes/tenants.py +575 -0
  155. isa_model/serving/api/routes/unified.py +992 -171
  156. isa_model/serving/api/routes/webhooks.py +479 -0
  157. isa_model/serving/api/startup.py +318 -0
  158. isa_model/serving/modal_proxy_server.py +249 -0
  159. isa_model/utils/gpu_utils.py +311 -0
  160. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
  161. isa_model-0.4.3.dist-info/RECORD +193 -0
  162. isa_model/deployment/cloud/__init__.py +0 -9
  163. isa_model/deployment/cloud/modal/__init__.py +0 -10
  164. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  165. isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
  166. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
  167. isa_model/deployment/cloud/modal/register_models.py +0 -321
  168. isa_model/deployment/core/deployment_config.py +0 -356
  169. isa_model/deployment/core/isa_deployment_service.py +0 -401
  170. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  171. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  172. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  173. isa_model/deployment/runtime/deployed_service.py +0 -338
  174. isa_model/deployment/services/__init__.py +0 -9
  175. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  176. isa_model/deployment/services/model_service.py +0 -332
  177. isa_model/deployment/services/service_monitor.py +0 -356
  178. isa_model/deployment/services/service_registry.py +0 -527
  179. isa_model/eval/__init__.py +0 -92
  180. isa_model/eval/benchmarks.py +0 -469
  181. isa_model/eval/config/__init__.py +0 -10
  182. isa_model/eval/config/evaluation_config.py +0 -108
  183. isa_model/eval/evaluators/__init__.py +0 -18
  184. isa_model/eval/evaluators/base_evaluator.py +0 -503
  185. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  186. isa_model/eval/factory.py +0 -531
  187. isa_model/eval/infrastructure/__init__.py +0 -24
  188. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  189. isa_model/eval/metrics.py +0 -798
  190. isa_model/inference/adapter/unified_api.py +0 -248
  191. isa_model/inference/services/helpers/stacked_config.py +0 -148
  192. isa_model/inference/services/img/flux_professional_service.py +0 -603
  193. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  194. isa_model/inference/services/others/table_transformer_service.py +0 -61
  195. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  196. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  197. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  198. isa_model/scripts/inference_tracker.py +0 -283
  199. isa_model/scripts/mlflow_manager.py +0 -379
  200. isa_model/scripts/model_registry.py +0 -465
  201. isa_model/scripts/register_models.py +0 -370
  202. isa_model/scripts/register_models_with_embeddings.py +0 -510
  203. isa_model/scripts/start_mlflow.py +0 -95
  204. isa_model/scripts/training_tracker.py +0 -257
  205. isa_model/training/__init__.py +0 -74
  206. isa_model/training/annotation/annotation_schema.py +0 -47
  207. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  208. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  209. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  210. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  211. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  212. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  213. isa_model/training/annotation/views/annotation_controller.py +0 -158
  214. isa_model/training/cloud/__init__.py +0 -22
  215. isa_model/training/cloud/job_orchestrator.py +0 -402
  216. isa_model/training/cloud/runpod_trainer.py +0 -454
  217. isa_model/training/cloud/storage_manager.py +0 -482
  218. isa_model/training/core/__init__.py +0 -23
  219. isa_model/training/core/config.py +0 -181
  220. isa_model/training/core/dataset.py +0 -222
  221. isa_model/training/core/trainer.py +0 -720
  222. isa_model/training/core/utils.py +0 -213
  223. isa_model/training/factory.py +0 -424
  224. isa_model-0.3.91.dist-info/RECORD +0 -138
  225. /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
  226. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  227. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  228. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,586 @@
1
+ """
2
+ Local GPU deployment provider
3
+
4
+ Unified provider for local GPU model deployment with support for multiple backends:
5
+ - vLLM for high-performance LLM inference
6
+ - TensorRT-LLM for maximum optimization
7
+ - HuggingFace Transformers for universal compatibility
8
+ """
9
+
10
+ import os
11
+ import json
12
+ import logging
13
+ import asyncio
14
+ from typing import Dict, List, Optional, Any, Union
15
+ from pathlib import Path
16
+ from datetime import datetime
17
+
18
+ from .config import LocalGPUConfig, LocalServiceType, LocalBackend
19
+ from .vllm_service import VLLMService
20
+ from .tensorrt_service import TensorRTLLMService
21
+ from .transformers_service import TransformersService
22
+ from .health_checker import get_health_checker, ServiceStatus
23
+ from ...utils.gpu_utils import get_gpu_manager
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ class LocalGPUProvider:
29
+ """
30
+ Unified local GPU deployment provider.
31
+
32
+ This provider manages local GPU model deployments with support for:
33
+ - Multiple inference backends (vLLM, TensorRT-LLM, Transformers)
34
+ - Automatic GPU resource management
35
+ - Service health monitoring
36
+ - Performance optimization
37
+
38
+ Example:
39
+ ```python
40
+ from isa_model.deployment.local import LocalGPUProvider, create_vllm_config
41
+
42
+ # Initialize provider
43
+ provider = LocalGPUProvider()
44
+
45
+ # Create service configuration
46
+ config = create_vllm_config(
47
+ service_name="llama2-7b",
48
+ model_id="meta-llama/Llama-2-7b-chat-hf"
49
+ )
50
+
51
+ # Deploy service
52
+ result = await provider.deploy(config)
53
+ print(f"Service deployed: {result['service_url']}")
54
+
55
+ # Use the service
56
+ response = await provider.generate_text(
57
+ service_name="llama2-7b",
58
+ prompt="Hello, how are you?"
59
+ )
60
+ ```
61
+ """
62
+
63
+ def __init__(self, workspace_dir: str = "./local_deployments"):
64
+ """
65
+ Initialize local GPU provider.
66
+
67
+ Args:
68
+ workspace_dir: Directory for deployment artifacts and logs
69
+ """
70
+ self.workspace_dir = Path(workspace_dir)
71
+ self.workspace_dir.mkdir(parents=True, exist_ok=True)
72
+
73
+ # Component managers
74
+ self.gpu_manager = get_gpu_manager()
75
+ self.health_checker = get_health_checker()
76
+
77
+ # Service tracking
78
+ self.services: Dict[str, Any] = {} # service_name -> service instance
79
+ self.configs: Dict[str, LocalGPUConfig] = {} # service_name -> config
80
+ self.deployments: Dict[str, Dict[str, Any]] = {} # deployment tracking
81
+
82
+ # Service registry file
83
+ self.registry_file = self.workspace_dir / "service_registry.json"
84
+ self._load_registry()
85
+
86
+ logger.info("Local GPU provider initialized")
87
+ logger.info(f"Workspace directory: {self.workspace_dir}")
88
+ logger.info(f"Available GPUs: {len(self.gpu_manager.gpus)}")
89
+
90
+ async def deploy(self, config: LocalGPUConfig) -> Dict[str, Any]:
91
+ """
92
+ Deploy a model service with the specified configuration.
93
+
94
+ Args:
95
+ config: Local GPU deployment configuration
96
+
97
+ Returns:
98
+ Deployment result with service information
99
+ """
100
+ service_name = config.service_name
101
+
102
+ logger.info("=" * 60)
103
+ logger.info(f"STARTING LOCAL DEPLOYMENT: {service_name}")
104
+ logger.info(f"MODEL: {config.model_id}")
105
+ logger.info(f"BACKEND: {config.backend.value}")
106
+ logger.info("=" * 60)
107
+
108
+ try:
109
+ # Check if service already exists
110
+ if service_name in self.services:
111
+ return {
112
+ "success": False,
113
+ "error": f"Service {service_name} already deployed",
114
+ "existing_service": self.get_service_info(service_name)
115
+ }
116
+
117
+ # Validate configuration
118
+ validation_result = await self._validate_config(config)
119
+ if not validation_result["valid"]:
120
+ return {
121
+ "success": False,
122
+ "error": f"Configuration validation failed: {validation_result['error']}",
123
+ "validation_details": validation_result
124
+ }
125
+
126
+ # Create service instance
127
+ service = await self._create_service(config)
128
+ if not service:
129
+ return {
130
+ "success": False,
131
+ "error": f"Failed to create service for backend: {config.backend.value}"
132
+ }
133
+
134
+ # Deploy based on backend type
135
+ deployment_start_time = datetime.now()
136
+
137
+ if config.backend == LocalBackend.VLLM:
138
+ deploy_result = await self._deploy_vllm_service(service, config)
139
+ elif config.backend == LocalBackend.TENSORRT_LLM:
140
+ deploy_result = await self._deploy_tensorrt_service(service, config)
141
+ elif config.backend == LocalBackend.TRANSFORMERS:
142
+ deploy_result = await self._deploy_transformers_service(service, config)
143
+ else:
144
+ return {
145
+ "success": False,
146
+ "error": f"Unsupported backend: {config.backend.value}"
147
+ }
148
+
149
+ if deploy_result["success"]:
150
+ # Register service
151
+ self.services[service_name] = service
152
+ self.configs[service_name] = config
153
+
154
+ # Track deployment
155
+ deployment_info = {
156
+ "service_name": service_name,
157
+ "config": config.to_dict(),
158
+ "backend": config.backend.value,
159
+ "deployed_at": deployment_start_time.isoformat(),
160
+ "status": "deployed",
161
+ **deploy_result
162
+ }
163
+ self.deployments[service_name] = deployment_info
164
+
165
+ # Register with health checker
166
+ self.health_checker.register_service(service_name, service)
167
+ await self.health_checker.start_monitoring(service_name)
168
+
169
+ # Save registry
170
+ self._save_registry()
171
+
172
+ logger.info("=" * 60)
173
+ logger.info("LOCAL DEPLOYMENT COMPLETED SUCCESSFULLY!")
174
+ logger.info("=" * 60)
175
+ logger.info(f"Service: {service_name}")
176
+ logger.info(f"Backend: {config.backend.value}")
177
+
178
+ return {
179
+ "success": True,
180
+ "service_name": service_name,
181
+ "backend": config.backend.value,
182
+ "deployment_info": deployment_info,
183
+ **deploy_result
184
+ }
185
+ else:
186
+ return deploy_result
187
+
188
+ except Exception as e:
189
+ logger.error("=" * 60)
190
+ logger.error("LOCAL DEPLOYMENT FAILED!")
191
+ logger.error("=" * 60)
192
+ logger.error(f"Error: {e}")
193
+
194
+ return {
195
+ "success": False,
196
+ "error": str(e),
197
+ "service_name": service_name
198
+ }
199
+
200
+ async def undeploy(self, service_name: str) -> Dict[str, Any]:
201
+ """
202
+ Stop and remove a deployed service.
203
+
204
+ Args:
205
+ service_name: Name of service to undeploy
206
+
207
+ Returns:
208
+ Undeploy result
209
+ """
210
+ if service_name not in self.services:
211
+ return {
212
+ "success": False,
213
+ "error": f"Service {service_name} not found"
214
+ }
215
+
216
+ try:
217
+ logger.info(f"Undeploying service: {service_name}")
218
+
219
+ service = self.services[service_name]
220
+
221
+ # Stop monitoring
222
+ await self.health_checker.stop_monitoring(service_name)
223
+ self.health_checker.unregister_service(service_name)
224
+
225
+ # Stop service
226
+ if hasattr(service, 'stop'):
227
+ stop_result = await service.stop()
228
+ elif hasattr(service, 'unload_model'):
229
+ stop_result = await service.unload_model()
230
+ else:
231
+ stop_result = {"success": True}
232
+
233
+ # Clean up
234
+ if hasattr(service, 'cleanup'):
235
+ await service.cleanup()
236
+
237
+ # Remove from tracking
238
+ del self.services[service_name]
239
+ del self.configs[service_name]
240
+ if service_name in self.deployments:
241
+ del self.deployments[service_name]
242
+
243
+ # Save registry
244
+ self._save_registry()
245
+
246
+ logger.info(f"Service undeployed: {service_name}")
247
+
248
+ return {
249
+ "success": True,
250
+ "service_name": service_name,
251
+ "stop_result": stop_result
252
+ }
253
+
254
+ except Exception as e:
255
+ logger.error(f"Failed to undeploy service {service_name}: {e}")
256
+ return {
257
+ "success": False,
258
+ "error": str(e)
259
+ }
260
+
261
+ async def list_services(self) -> List[Dict[str, Any]]:
262
+ """List all deployed services"""
263
+ services = []
264
+
265
+ for service_name, service in self.services.items():
266
+ try:
267
+ config = self.configs[service_name]
268
+ health = await self.health_checker.check_service_health(service_name)
269
+ metrics = self.health_checker.get_service_metrics(service_name)
270
+
271
+ service_info = {
272
+ "service_name": service_name,
273
+ "model_id": config.model_id,
274
+ "backend": config.backend.value,
275
+ "service_type": config.service_type.value,
276
+ "status": health.get("status", "unknown"),
277
+ "healthy": health.get("healthy", False),
278
+ "response_time_ms": health.get("response_time_ms"),
279
+ "error_count": metrics.error_count if metrics else 0,
280
+ "uptime_seconds": metrics.uptime_seconds if metrics else None,
281
+ "deployed_at": self.deployments.get(service_name, {}).get("deployed_at")
282
+ }
283
+
284
+ # Add service-specific info
285
+ if hasattr(service, 'get_service_info'):
286
+ service_info.update(service.get_service_info())
287
+
288
+ services.append(service_info)
289
+
290
+ except Exception as e:
291
+ logger.error(f"Error getting info for service {service_name}: {e}")
292
+ services.append({
293
+ "service_name": service_name,
294
+ "status": "error",
295
+ "error": str(e)
296
+ })
297
+
298
+ return services
299
+
300
+ async def get_service_info(self, service_name: str) -> Optional[Dict[str, Any]]:
301
+ """Get detailed information about a specific service"""
302
+ if service_name not in self.services:
303
+ return None
304
+
305
+ try:
306
+ service = self.services[service_name]
307
+ config = self.configs[service_name]
308
+ health = await self.health_checker.check_service_health(service_name)
309
+ metrics = self.health_checker.get_service_metrics(service_name)
310
+
311
+ info = {
312
+ "service_name": service_name,
313
+ "config": config.to_dict(),
314
+ "health": health,
315
+ "metrics": {
316
+ "status": metrics.status.value if metrics else "unknown",
317
+ "last_check": metrics.last_check.isoformat() if metrics else None,
318
+ "error_count": metrics.error_count if metrics else 0,
319
+ "consecutive_failures": metrics.consecutive_failures if metrics else 0,
320
+ "uptime_seconds": metrics.uptime_seconds if metrics else None,
321
+ "last_error": metrics.last_error if metrics else None
322
+ } if metrics else {},
323
+ "deployment_info": self.deployments.get(service_name, {})
324
+ }
325
+
326
+ # Add service-specific info
327
+ if hasattr(service, 'get_service_info'):
328
+ info["service_details"] = service.get_service_info()
329
+
330
+ return info
331
+
332
+ except Exception as e:
333
+ logger.error(f"Error getting service info for {service_name}: {e}")
334
+ return {
335
+ "service_name": service_name,
336
+ "error": str(e)
337
+ }
338
+
339
+ async def generate_text(self, service_name: str, prompt: str, **kwargs) -> Dict[str, Any]:
340
+ """Generate text using a deployed service"""
341
+ if service_name not in self.services:
342
+ return {
343
+ "success": False,
344
+ "error": f"Service {service_name} not found"
345
+ }
346
+
347
+ try:
348
+ service = self.services[service_name]
349
+
350
+ # Check service health
351
+ health = await self.health_checker.check_service_health(service_name)
352
+ if not health.get("healthy", False):
353
+ return {
354
+ "success": False,
355
+ "error": f"Service {service_name} is not healthy: {health.get('error', 'Unknown error')}"
356
+ }
357
+
358
+ # Generate text
359
+ if hasattr(service, 'generate'):
360
+ return await service.generate(prompt, **kwargs)
361
+ elif hasattr(service, 'generate_text'):
362
+ return await service.generate_text(prompt, **kwargs)
363
+ else:
364
+ return {
365
+ "success": False,
366
+ "error": f"Service {service_name} does not support text generation"
367
+ }
368
+
369
+ except Exception as e:
370
+ logger.error(f"Text generation failed for service {service_name}: {e}")
371
+ return {
372
+ "success": False,
373
+ "error": str(e)
374
+ }
375
+
376
+ async def chat_completion(self, service_name: str, messages: List[Dict[str, str]], **kwargs) -> Dict[str, Any]:
377
+ """Generate chat completion using a deployed service"""
378
+ if service_name not in self.services:
379
+ return {
380
+ "success": False,
381
+ "error": f"Service {service_name} not found"
382
+ }
383
+
384
+ try:
385
+ service = self.services[service_name]
386
+
387
+ # Check service health
388
+ health = await self.health_checker.check_service_health(service_name)
389
+ if not health.get("healthy", False):
390
+ return {
391
+ "success": False,
392
+ "error": f"Service {service_name} is not healthy"
393
+ }
394
+
395
+ # Generate chat completion
396
+ if hasattr(service, 'chat_completions'):
397
+ return await service.chat_completions(messages, **kwargs)
398
+ elif hasattr(service, 'chat_completion'):
399
+ return await service.chat_completion(messages, **kwargs)
400
+ else:
401
+ return {
402
+ "success": False,
403
+ "error": f"Service {service_name} does not support chat completion"
404
+ }
405
+
406
+ except Exception as e:
407
+ logger.error(f"Chat completion failed for service {service_name}: {e}")
408
+ return {
409
+ "success": False,
410
+ "error": str(e)
411
+ }
412
+
413
+ async def get_system_status(self) -> Dict[str, Any]:
414
+ """Get overall system status"""
415
+ system_health = self.health_checker.get_system_health()
416
+
417
+ return {
418
+ **system_health,
419
+ "provider": "local_gpu",
420
+ "workspace_dir": str(self.workspace_dir),
421
+ "total_deployments": len(self.services),
422
+ "available_backends": [backend.value for backend in LocalBackend],
423
+ "gpu_status": {
424
+ "cuda_available": self.gpu_manager.cuda_available,
425
+ "nvidia_smi_available": self.gpu_manager.nvidia_smi_available,
426
+ "gpu_count": len(self.gpu_manager.gpus)
427
+ }
428
+ }
429
+
430
+ async def _validate_config(self, config: LocalGPUConfig) -> Dict[str, Any]:
431
+ """Validate deployment configuration"""
432
+ try:
433
+ # Check GPU requirements
434
+ compatibility = self.gpu_manager.check_gpu_compatibility(
435
+ config.model_id,
436
+ config.model_precision
437
+ )
438
+
439
+ if not compatibility[0]:
440
+ return {
441
+ "valid": False,
442
+ "error": f"GPU compatibility check failed: {', '.join(compatibility[1])}"
443
+ }
444
+
445
+ # Check backend availability
446
+ backend_available = await self._check_backend_availability(config.backend)
447
+ if not backend_available["available"]:
448
+ return {
449
+ "valid": False,
450
+ "error": f"Backend {config.backend.value} not available: {backend_available['error']}"
451
+ }
452
+
453
+ # Check port availability
454
+ if config.backend == LocalBackend.VLLM:
455
+ port_available = await self._check_port_available(config.port)
456
+ if not port_available:
457
+ return {
458
+ "valid": False,
459
+ "error": f"Port {config.port} is not available"
460
+ }
461
+
462
+ return {
463
+ "valid": True,
464
+ "gpu_compatibility": compatibility,
465
+ "backend_check": backend_available
466
+ }
467
+
468
+ except Exception as e:
469
+ return {
470
+ "valid": False,
471
+ "error": str(e)
472
+ }
473
+
474
+ async def _check_backend_availability(self, backend: LocalBackend) -> Dict[str, Any]:
475
+ """Check if a backend is available"""
476
+ try:
477
+ if backend == LocalBackend.VLLM:
478
+ try:
479
+ import vllm
480
+ return {"available": True}
481
+ except ImportError:
482
+ return {"available": False, "error": "vLLM not installed"}
483
+
484
+ elif backend == LocalBackend.TENSORRT_LLM:
485
+ try:
486
+ import tensorrt_llm
487
+ return {"available": True}
488
+ except ImportError:
489
+ return {"available": False, "error": "TensorRT-LLM not installed"}
490
+
491
+ elif backend == LocalBackend.TRANSFORMERS:
492
+ try:
493
+ import transformers
494
+ return {"available": True}
495
+ except ImportError:
496
+ return {"available": False, "error": "Transformers not installed"}
497
+
498
+ else:
499
+ return {"available": False, "error": f"Unknown backend: {backend.value}"}
500
+
501
+ except Exception as e:
502
+ return {"available": False, "error": str(e)}
503
+
504
+ async def _check_port_available(self, port: int) -> bool:
505
+ """Check if a port is available"""
506
+ try:
507
+ import socket
508
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
509
+ result = s.connect_ex(('127.0.0.1', port))
510
+ return result != 0 # Port is available if connection fails
511
+ except:
512
+ return False
513
+
514
+ async def _create_service(self, config: LocalGPUConfig) -> Optional[Any]:
515
+ """Create service instance based on backend"""
516
+ try:
517
+ if config.backend == LocalBackend.VLLM:
518
+ return VLLMService(config)
519
+ elif config.backend == LocalBackend.TENSORRT_LLM:
520
+ return TensorRTLLMService(config)
521
+ elif config.backend == LocalBackend.TRANSFORMERS:
522
+ return TransformersService(config)
523
+ else:
524
+ logger.error(f"Unsupported backend: {config.backend.value}")
525
+ return None
526
+
527
+ except Exception as e:
528
+ logger.error(f"Failed to create service: {e}")
529
+ return None
530
+
531
+ async def _deploy_vllm_service(self, service: VLLMService, config: LocalGPUConfig) -> Dict[str, Any]:
532
+ """Deploy vLLM service"""
533
+ result = await service.start()
534
+ if result["success"]:
535
+ return {
536
+ **result,
537
+ "service_url": f"http://{config.host}:{config.port}",
538
+ "api_base": f"http://{config.host}:{config.port}/v1"
539
+ }
540
+ return result
541
+
542
+ async def _deploy_tensorrt_service(self, service: TensorRTLLMService, config: LocalGPUConfig) -> Dict[str, Any]:
543
+ """Deploy TensorRT-LLM service"""
544
+ # Build engine first
545
+ build_result = await service.build_engine()
546
+ if not build_result["success"]:
547
+ return build_result
548
+
549
+ # Load model
550
+ load_result = await service.load_model()
551
+ return load_result
552
+
553
+ async def _deploy_transformers_service(self, service: TransformersService, config: LocalGPUConfig) -> Dict[str, Any]:
554
+ """Deploy Transformers service"""
555
+ return await service.load_model()
556
+
557
+ def _load_registry(self):
558
+ """Load service registry from file"""
559
+ if self.registry_file.exists():
560
+ try:
561
+ with open(self.registry_file, 'r') as f:
562
+ registry_data = json.load(f)
563
+
564
+ # Note: We don't automatically reload services on startup
565
+ # This would require more complex state management
566
+ logger.info(f"Service registry loaded: {len(registry_data)} entries")
567
+
568
+ except Exception as e:
569
+ logger.warning(f"Failed to load service registry: {e}")
570
+
571
+ def _save_registry(self):
572
+ """Save service registry to file"""
573
+ try:
574
+ registry_data = {}
575
+ for service_name, deployment in self.deployments.items():
576
+ registry_data[service_name] = {
577
+ "config": deployment["config"],
578
+ "deployed_at": deployment["deployed_at"],
579
+ "backend": deployment["backend"]
580
+ }
581
+
582
+ with open(self.registry_file, 'w') as f:
583
+ json.dump(registry_data, f, indent=2)
584
+
585
+ except Exception as e:
586
+ logger.error(f"Failed to save service registry: {e}")