isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. isa_model/client.py +1166 -584
  2. isa_model/core/cache/redis_cache.py +410 -0
  3. isa_model/core/config/config_manager.py +282 -12
  4. isa_model/core/config.py +91 -1
  5. isa_model/core/database/__init__.py +1 -0
  6. isa_model/core/database/direct_db_client.py +114 -0
  7. isa_model/core/database/migration_manager.py +563 -0
  8. isa_model/core/database/migrations.py +297 -0
  9. isa_model/core/database/supabase_client.py +258 -0
  10. isa_model/core/dependencies.py +316 -0
  11. isa_model/core/discovery/__init__.py +19 -0
  12. isa_model/core/discovery/consul_discovery.py +190 -0
  13. isa_model/core/logging/__init__.py +54 -0
  14. isa_model/core/logging/influx_logger.py +523 -0
  15. isa_model/core/logging/loki_logger.py +160 -0
  16. isa_model/core/models/__init__.py +46 -0
  17. isa_model/core/models/config_models.py +625 -0
  18. isa_model/core/models/deployment_billing_tracker.py +430 -0
  19. isa_model/core/models/model_billing_tracker.py +60 -88
  20. isa_model/core/models/model_manager.py +66 -25
  21. isa_model/core/models/model_metadata.py +690 -0
  22. isa_model/core/models/model_repo.py +217 -55
  23. isa_model/core/models/model_statistics_tracker.py +234 -0
  24. isa_model/core/models/model_storage.py +0 -1
  25. isa_model/core/models/model_version_manager.py +959 -0
  26. isa_model/core/models/system_models.py +857 -0
  27. isa_model/core/pricing_manager.py +2 -249
  28. isa_model/core/repositories/__init__.py +9 -0
  29. isa_model/core/repositories/config_repository.py +912 -0
  30. isa_model/core/resilience/circuit_breaker.py +366 -0
  31. isa_model/core/security/secrets.py +358 -0
  32. isa_model/core/services/__init__.py +2 -4
  33. isa_model/core/services/intelligent_model_selector.py +479 -370
  34. isa_model/core/storage/hf_storage.py +2 -2
  35. isa_model/core/types.py +8 -0
  36. isa_model/deployment/__init__.py +5 -48
  37. isa_model/deployment/core/__init__.py +2 -31
  38. isa_model/deployment/core/deployment_manager.py +1278 -368
  39. isa_model/deployment/local/__init__.py +31 -0
  40. isa_model/deployment/local/config.py +248 -0
  41. isa_model/deployment/local/gpu_gateway.py +607 -0
  42. isa_model/deployment/local/health_checker.py +428 -0
  43. isa_model/deployment/local/provider.py +586 -0
  44. isa_model/deployment/local/tensorrt_service.py +621 -0
  45. isa_model/deployment/local/transformers_service.py +644 -0
  46. isa_model/deployment/local/vllm_service.py +527 -0
  47. isa_model/deployment/modal/__init__.py +8 -0
  48. isa_model/deployment/modal/config.py +136 -0
  49. isa_model/deployment/modal/deployer.py +894 -0
  50. isa_model/deployment/modal/services/__init__.py +3 -0
  51. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  52. isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
  53. isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
  54. isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
  55. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  56. isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
  57. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  58. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  59. isa_model/deployment/modal/services/video/__init__.py +1 -0
  60. isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
  61. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  62. isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
  63. isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
  64. isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
  65. isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
  66. isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
  67. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  68. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  69. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  70. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  71. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  72. isa_model/deployment/storage/__init__.py +5 -0
  73. isa_model/deployment/storage/deployment_repository.py +824 -0
  74. isa_model/deployment/triton/__init__.py +10 -0
  75. isa_model/deployment/triton/config.py +196 -0
  76. isa_model/deployment/triton/configs/__init__.py +1 -0
  77. isa_model/deployment/triton/provider.py +512 -0
  78. isa_model/deployment/triton/scripts/__init__.py +1 -0
  79. isa_model/deployment/triton/templates/__init__.py +1 -0
  80. isa_model/inference/__init__.py +47 -1
  81. isa_model/inference/ai_factory.py +179 -16
  82. isa_model/inference/legacy_services/__init__.py +21 -0
  83. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  84. isa_model/inference/legacy_services/model_service.py +573 -0
  85. isa_model/inference/legacy_services/model_serving.py +717 -0
  86. isa_model/inference/legacy_services/model_training.py +561 -0
  87. isa_model/inference/models/__init__.py +21 -0
  88. isa_model/inference/models/inference_config.py +551 -0
  89. isa_model/inference/models/inference_record.py +675 -0
  90. isa_model/inference/models/performance_models.py +714 -0
  91. isa_model/inference/repositories/__init__.py +9 -0
  92. isa_model/inference/repositories/inference_repository.py +828 -0
  93. isa_model/inference/services/audio/__init__.py +21 -0
  94. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  95. isa_model/inference/services/audio/base_stt_service.py +184 -11
  96. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  97. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  98. isa_model/inference/services/audio/openai_stt_service.py +53 -11
  99. isa_model/inference/services/base_service.py +17 -1
  100. isa_model/inference/services/custom_model_manager.py +277 -0
  101. isa_model/inference/services/embedding/__init__.py +13 -0
  102. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  103. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  104. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  105. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  106. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  107. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  108. isa_model/inference/services/img/__init__.py +2 -2
  109. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  110. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  111. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  112. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  113. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  114. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  115. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  116. isa_model/inference/services/llm/__init__.py +10 -2
  117. isa_model/inference/services/llm/base_llm_service.py +361 -26
  118. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  119. isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
  120. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  121. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  122. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  123. isa_model/inference/services/llm/local_llm_service.py +747 -0
  124. isa_model/inference/services/llm/ollama_llm_service.py +11 -3
  125. isa_model/inference/services/llm/openai_llm_service.py +670 -56
  126. isa_model/inference/services/llm/yyds_llm_service.py +10 -3
  127. isa_model/inference/services/vision/__init__.py +27 -6
  128. isa_model/inference/services/vision/base_vision_service.py +118 -185
  129. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  130. isa_model/inference/services/vision/helpers/image_utils.py +19 -10
  131. isa_model/inference/services/vision/isa_vision_service.py +634 -0
  132. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  133. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  134. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  135. isa_model/serving/api/cache_manager.py +245 -0
  136. isa_model/serving/api/dependencies/__init__.py +1 -0
  137. isa_model/serving/api/dependencies/auth.py +194 -0
  138. isa_model/serving/api/dependencies/database.py +139 -0
  139. isa_model/serving/api/error_handlers.py +284 -0
  140. isa_model/serving/api/fastapi_server.py +240 -18
  141. isa_model/serving/api/middleware/auth.py +317 -0
  142. isa_model/serving/api/middleware/security.py +268 -0
  143. isa_model/serving/api/middleware/tenant_context.py +414 -0
  144. isa_model/serving/api/routes/analytics.py +489 -0
  145. isa_model/serving/api/routes/config.py +645 -0
  146. isa_model/serving/api/routes/deployment_billing.py +315 -0
  147. isa_model/serving/api/routes/deployments.py +475 -0
  148. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  149. isa_model/serving/api/routes/health.py +32 -12
  150. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  151. isa_model/serving/api/routes/local_deployments.py +448 -0
  152. isa_model/serving/api/routes/logs.py +430 -0
  153. isa_model/serving/api/routes/settings.py +582 -0
  154. isa_model/serving/api/routes/tenants.py +575 -0
  155. isa_model/serving/api/routes/unified.py +992 -171
  156. isa_model/serving/api/routes/webhooks.py +479 -0
  157. isa_model/serving/api/startup.py +318 -0
  158. isa_model/serving/modal_proxy_server.py +249 -0
  159. isa_model/utils/gpu_utils.py +311 -0
  160. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
  161. isa_model-0.4.3.dist-info/RECORD +193 -0
  162. isa_model/deployment/cloud/__init__.py +0 -9
  163. isa_model/deployment/cloud/modal/__init__.py +0 -10
  164. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  165. isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
  166. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
  167. isa_model/deployment/cloud/modal/register_models.py +0 -321
  168. isa_model/deployment/core/deployment_config.py +0 -356
  169. isa_model/deployment/core/isa_deployment_service.py +0 -401
  170. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  171. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  172. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  173. isa_model/deployment/runtime/deployed_service.py +0 -338
  174. isa_model/deployment/services/__init__.py +0 -9
  175. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  176. isa_model/deployment/services/model_service.py +0 -332
  177. isa_model/deployment/services/service_monitor.py +0 -356
  178. isa_model/deployment/services/service_registry.py +0 -527
  179. isa_model/eval/__init__.py +0 -92
  180. isa_model/eval/benchmarks.py +0 -469
  181. isa_model/eval/config/__init__.py +0 -10
  182. isa_model/eval/config/evaluation_config.py +0 -108
  183. isa_model/eval/evaluators/__init__.py +0 -18
  184. isa_model/eval/evaluators/base_evaluator.py +0 -503
  185. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  186. isa_model/eval/factory.py +0 -531
  187. isa_model/eval/infrastructure/__init__.py +0 -24
  188. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  189. isa_model/eval/metrics.py +0 -798
  190. isa_model/inference/adapter/unified_api.py +0 -248
  191. isa_model/inference/services/helpers/stacked_config.py +0 -148
  192. isa_model/inference/services/img/flux_professional_service.py +0 -603
  193. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  194. isa_model/inference/services/others/table_transformer_service.py +0 -61
  195. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  196. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  197. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  198. isa_model/scripts/inference_tracker.py +0 -283
  199. isa_model/scripts/mlflow_manager.py +0 -379
  200. isa_model/scripts/model_registry.py +0 -465
  201. isa_model/scripts/register_models.py +0 -370
  202. isa_model/scripts/register_models_with_embeddings.py +0 -510
  203. isa_model/scripts/start_mlflow.py +0 -95
  204. isa_model/scripts/training_tracker.py +0 -257
  205. isa_model/training/__init__.py +0 -74
  206. isa_model/training/annotation/annotation_schema.py +0 -47
  207. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  208. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  209. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  210. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  211. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  212. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  213. isa_model/training/annotation/views/annotation_controller.py +0 -158
  214. isa_model/training/cloud/__init__.py +0 -22
  215. isa_model/training/cloud/job_orchestrator.py +0 -402
  216. isa_model/training/cloud/runpod_trainer.py +0 -454
  217. isa_model/training/cloud/storage_manager.py +0 -482
  218. isa_model/training/core/__init__.py +0 -23
  219. isa_model/training/core/config.py +0 -181
  220. isa_model/training/core/dataset.py +0 -222
  221. isa_model/training/core/trainer.py +0 -720
  222. isa_model/training/core/utils.py +0 -213
  223. isa_model/training/factory.py +0 -424
  224. isa_model-0.3.91.dist-info/RECORD +0 -138
  225. /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
  226. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  227. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  228. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,475 @@
1
+ """
2
+ Deployments API Routes
3
+
4
+ Handles automated HuggingFace model deployment to Modal
5
+ """
6
+
7
+ from fastapi import APIRouter, HTTPException, BackgroundTasks, Request
8
+ from pydantic import BaseModel
9
+ from typing import Optional, List, Dict, Any
10
+ import logging
11
+ import asyncio
12
+ import json
13
+ import time
14
+ from datetime import datetime
15
+ from pathlib import Path
16
+
17
+ from isa_model.deployment.modal.deployer import ModalDeployer as HuggingFaceModalDeployer
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ router = APIRouter()
22
+
23
+ # Request/Response models
24
+ class AnalyzeModelRequest(BaseModel):
25
+ model_id: str
26
+
27
+ class DeployModelRequest(BaseModel):
28
+ model_id: str
29
+ service_name: Optional[str] = None
30
+ auto_deploy: bool = False
31
+
32
+ class DeploymentResponse(BaseModel):
33
+ success: bool
34
+ deployment_id: Optional[str] = None
35
+ model_id: str
36
+ config: Optional[Dict[str, Any]] = None
37
+ service_file: Optional[str] = None
38
+ deployment_command: Optional[str] = None
39
+ estimated_cost_per_hour: Optional[float] = None
40
+ deployed: bool = False
41
+ error: Optional[str] = None
42
+
43
+ # Global deployer instance
44
+ deployer = HuggingFaceModalDeployer()
45
+
46
+ # In-memory deployment tracking (in production, use a database)
47
+ deployments = {}
48
+
49
+ @router.post("/analyze", response_model=Dict[str, Any])
50
+ async def analyze_model(request: AnalyzeModelRequest):
51
+ """
52
+ Analyze a HuggingFace model for deployment compatibility
53
+ """
54
+ try:
55
+ logger.info(f"Analyzing model: {request.model_id}")
56
+
57
+ # Analyze the model
58
+ config = deployer.analyze_model(request.model_id)
59
+
60
+ return {
61
+ "success": True,
62
+ "model_id": config.model_id,
63
+ "model_type": config.model_type,
64
+ "architecture": config.architecture,
65
+ "parameters": config.parameters,
66
+ "gpu_requirements": config.gpu_requirements,
67
+ "memory_gb": config.memory_gb,
68
+ "container_memory_mb": config.container_memory_mb,
69
+ "dependencies": config.dependencies,
70
+ "capabilities": config.capabilities,
71
+ "estimated_cost_per_hour": config.estimated_cost_per_hour
72
+ }
73
+
74
+ except Exception as e:
75
+ logger.error(f"Model analysis failed for {request.model_id}: {e}")
76
+ raise HTTPException(status_code=400, detail=f"Model analysis failed: {str(e)}")
77
+
78
+ @router.post("/deploy", response_model=DeploymentResponse)
79
+ async def deploy_model(request: DeployModelRequest, background_tasks: BackgroundTasks):
80
+ """
81
+ Deploy a HuggingFace model to Modal
82
+ """
83
+ try:
84
+ logger.info(f"Starting deployment for model: {request.model_id}")
85
+
86
+ # Generate unique deployment ID
87
+ import time
88
+ import uuid
89
+ deployment_id = f"deploy_{uuid.uuid4().hex[:8]}_{int(time.time())}"
90
+
91
+ # Add to deployments tracking
92
+ deployments[deployment_id] = {
93
+ "id": deployment_id,
94
+ "model_id": request.model_id,
95
+ "service_name": request.service_name,
96
+ "status": "pending",
97
+ "created_at": time.time(),
98
+ "auto_deploy": request.auto_deploy
99
+ }
100
+
101
+ # Start deployment in background
102
+ background_tasks.add_task(
103
+ perform_deployment,
104
+ deployment_id,
105
+ request.model_id,
106
+ request.service_name,
107
+ request.auto_deploy
108
+ )
109
+
110
+ return DeploymentResponse(
111
+ success=True,
112
+ deployment_id=deployment_id,
113
+ model_id=request.model_id,
114
+ deployed=False
115
+ )
116
+
117
+ except Exception as e:
118
+ logger.error(f"Deployment initiation failed for {request.model_id}: {e}")
119
+ raise HTTPException(status_code=500, detail=f"Deployment failed: {str(e)}")
120
+
121
+ async def perform_deployment(deployment_id: str, model_id: str, service_name: Optional[str], auto_deploy: bool):
122
+ """
123
+ Perform the actual deployment in the background
124
+ """
125
+ import time
126
+
127
+ try:
128
+ logger.info(f"Performing deployment {deployment_id} for model {model_id}")
129
+
130
+ # Update status
131
+ deployments[deployment_id]["status"] = "deploying"
132
+ deployments[deployment_id]["progress"] = "Analyzing model"
133
+
134
+ # Deploy the model
135
+ result = deployer.deploy_model(model_id, deploy=auto_deploy)
136
+
137
+ if result["success"]:
138
+ deployments[deployment_id].update({
139
+ "status": "completed" if result.get("deployed") else "generated",
140
+ "progress": "Deployment completed",
141
+ "config": result["config"],
142
+ "service_file": result["service_file"],
143
+ "deployment_command": result["deployment_command"],
144
+ "estimated_cost_per_hour": result["estimated_cost_per_hour"],
145
+ "deployed": result.get("deployed", False),
146
+ "completed_at": time.time()
147
+ })
148
+ else:
149
+ deployments[deployment_id].update({
150
+ "status": "failed",
151
+ "progress": "Deployment failed",
152
+ "error": result.get("error", "Unknown error"),
153
+ "failed_at": time.time()
154
+ })
155
+
156
+ except Exception as e:
157
+ logger.error(f"Deployment {deployment_id} failed: {e}")
158
+ deployments[deployment_id].update({
159
+ "status": "failed",
160
+ "progress": "Deployment failed",
161
+ "error": str(e),
162
+ "failed_at": time.time()
163
+ })
164
+
165
+ @router.get("/")
166
+ async def list_deployments():
167
+ """
168
+ List all deployments
169
+ """
170
+ try:
171
+ # Convert deployments to list format
172
+ deployment_list = []
173
+
174
+ for deployment_id, deployment in deployments.items():
175
+ deployment_list.append({
176
+ "id": deployment_id,
177
+ "name": deployment.get("service_name") or f"{deployment['model_id'].split('/')[-1]} Service",
178
+ "model_id": deployment["model_id"],
179
+ "model_type": "text", # Would be determined from analysis
180
+ "status": deployment["status"],
181
+ "gpu": "A10G", # Would be from config
182
+ "cost_per_hour": "1.20", # Would be from config
183
+ "created_at": deployment["created_at"],
184
+ "deployed_at": deployment.get("completed_at"),
185
+ "error": deployment.get("error")
186
+ })
187
+
188
+ # Add some fallback deployments for demo
189
+ if not deployment_list:
190
+ deployment_list = [
191
+ {
192
+ "id": "qwen2-vl-7b",
193
+ "name": "Qwen2.5-VL Service",
194
+ "model_id": "Qwen/Qwen2.5-VL-7B-Instruct",
195
+ "model_type": "vision",
196
+ "status": "active",
197
+ "gpu": "A100",
198
+ "cost_per_hour": "4.00",
199
+ "created_at": 1705312200,
200
+ "deployed_at": 1705312800
201
+ },
202
+ {
203
+ "id": "embed-service",
204
+ "name": "BGE Embed Service",
205
+ "model_id": "BAAI/bge-base-en-v1.5",
206
+ "model_type": "embedding",
207
+ "status": "active",
208
+ "gpu": "A10G",
209
+ "cost_per_hour": "1.20",
210
+ "created_at": 1705225800,
211
+ "deployed_at": 1705226400
212
+ }
213
+ ]
214
+
215
+ return deployment_list
216
+
217
+ except Exception as e:
218
+ logger.error(f"Failed to list deployments: {e}")
219
+ raise HTTPException(status_code=500, detail=f"Failed to list deployments: {str(e)}")
220
+
221
+ @router.get("/{deployment_id}")
222
+ async def get_deployment(deployment_id: str):
223
+ """
224
+ Get deployment details
225
+ """
226
+ try:
227
+ if deployment_id not in deployments:
228
+ raise HTTPException(status_code=404, detail="Deployment not found")
229
+
230
+ return deployments[deployment_id]
231
+
232
+ except HTTPException:
233
+ raise
234
+ except Exception as e:
235
+ logger.error(f"Failed to get deployment {deployment_id}: {e}")
236
+ raise HTTPException(status_code=500, detail=f"Failed to get deployment: {str(e)}")
237
+
238
+ @router.get("/{deployment_id}/status")
239
+ async def get_deployment_status(deployment_id: str, request: Request):
240
+ """
241
+ Get real-time deployment status and monitoring information with tenant isolation
242
+ """
243
+ try:
244
+ from isa_model.deployment.core.deployment_manager import DeploymentManager
245
+ from isa_model.serving.api.middleware.tenant_context import get_tenant_context
246
+
247
+ # Get tenant context for isolation
248
+ tenant_context = get_tenant_context()
249
+ tenant_dict = {
250
+ "organization_id": tenant_context.organization_id,
251
+ "user_id": tenant_context.user_id,
252
+ "role": tenant_context.role
253
+ } if tenant_context else None
254
+
255
+ # Initialize deployment manager
256
+ manager = DeploymentManager()
257
+
258
+ # Verify tenant access to deployment first
259
+ deployment = await manager.get_deployment(deployment_id, tenant_dict)
260
+ if not deployment:
261
+ raise HTTPException(status_code=404, detail="Deployment not found or access denied")
262
+
263
+ # Get deployment status
264
+ status_info = await manager.get_modal_service_status(deployment_id)
265
+
266
+ return {
267
+ "success": True,
268
+ "deployment_status": status_info
269
+ }
270
+
271
+ except Exception as e:
272
+ logger.error(f"Failed to get deployment status {deployment_id}: {e}")
273
+ raise HTTPException(status_code=500, detail=f"Failed to get deployment status: {str(e)}")
274
+
275
+ @router.get("/{deployment_id}/monitoring")
276
+ async def get_deployment_monitoring(deployment_id: str, request: Request):
277
+ """
278
+ Get detailed monitoring metrics for Modal deployment with tenant isolation
279
+ """
280
+ try:
281
+ from isa_model.deployment.core.deployment_manager import DeploymentManager
282
+ from isa_model.serving.api.middleware.tenant_context import get_tenant_context
283
+
284
+ # Get tenant context for isolation
285
+ tenant_context = get_tenant_context()
286
+ tenant_dict = {
287
+ "organization_id": tenant_context.organization_id,
288
+ "user_id": tenant_context.user_id,
289
+ "role": tenant_context.role
290
+ } if tenant_context else None
291
+
292
+ manager = DeploymentManager()
293
+
294
+ # Verify tenant access to deployment first
295
+ deployment = await manager.get_deployment(deployment_id, tenant_dict)
296
+ if not deployment:
297
+ raise HTTPException(status_code=404, detail="Deployment not found or access denied")
298
+
299
+ status_info = await manager.get_modal_service_status(deployment_id)
300
+
301
+ if status_info.get("status") == "not_found":
302
+ raise HTTPException(status_code=404, detail="Deployment not found")
303
+
304
+ # Extract detailed monitoring data
305
+ monitoring_data = status_info.get("monitoring", {})
306
+
307
+ return {
308
+ "success": True,
309
+ "deployment_id": deployment_id,
310
+ "monitoring": {
311
+ "health_check": monitoring_data.get("health_check"),
312
+ "resource_usage": monitoring_data.get("resource_usage"),
313
+ "request_metrics": monitoring_data.get("request_metrics"),
314
+ "cost_tracking": monitoring_data.get("cost_tracking"),
315
+ "last_updated": datetime.now().isoformat()
316
+ }
317
+ }
318
+
319
+ except HTTPException:
320
+ raise
321
+ except Exception as e:
322
+ logger.error(f"Failed to get monitoring data {deployment_id}: {e}")
323
+ raise HTTPException(status_code=500, detail=f"Failed to get monitoring data: {str(e)}")
324
+
325
+ @router.post("/{deployment_id}/restart")
326
+ async def restart_deployment(deployment_id: str, request: Request):
327
+ """
328
+ Restart a Modal deployment with tenant isolation
329
+ """
330
+ try:
331
+ from isa_model.deployment.core.deployment_manager import DeploymentManager
332
+ from isa_model.serving.api.middleware.tenant_context import get_tenant_context
333
+
334
+ # Get tenant context for isolation
335
+ tenant_context = get_tenant_context()
336
+ tenant_dict = {
337
+ "organization_id": tenant_context.organization_id,
338
+ "user_id": tenant_context.user_id,
339
+ "role": tenant_context.role
340
+ } if tenant_context else None
341
+
342
+ manager = DeploymentManager()
343
+
344
+ # Check if deployment exists and user has access
345
+ deployment = await manager.get_deployment(deployment_id, tenant_dict)
346
+ if not deployment:
347
+ raise HTTPException(status_code=404, detail="Deployment not found or access denied")
348
+
349
+ # Update status to restarting
350
+ await manager.update_deployment_status(deployment_id, "restarting")
351
+
352
+ # TODO: Implement actual Modal service restart
353
+ # For now, simulate restart process
354
+ await asyncio.sleep(1)
355
+
356
+ # Update status to running
357
+ await manager.update_deployment_status(deployment_id, "running")
358
+
359
+ return {
360
+ "success": True,
361
+ "message": f"Deployment {deployment_id} restarted successfully",
362
+ "deployment_id": deployment_id,
363
+ "status": "running"
364
+ }
365
+
366
+ except HTTPException:
367
+ raise
368
+ except Exception as e:
369
+ logger.error(f"Failed to restart deployment {deployment_id}: {e}")
370
+ raise HTTPException(status_code=500, detail=f"Failed to restart deployment: {str(e)}")
371
+
372
+ @router.delete("/{deployment_id}")
373
+ async def cancel_deployment(deployment_id: str):
374
+ """
375
+ Cancel a pending deployment
376
+ """
377
+ try:
378
+ if deployment_id not in deployments:
379
+ raise HTTPException(status_code=404, detail="Deployment not found")
380
+
381
+ deployment = deployments[deployment_id]
382
+
383
+ if deployment["status"] == "pending":
384
+ deployment["status"] = "cancelled"
385
+ deployment["cancelled_at"] = time.time()
386
+ return {"success": True, "message": "Deployment cancelled"}
387
+ else:
388
+ raise HTTPException(status_code=400, detail="Cannot cancel deployment in current status")
389
+
390
+ except HTTPException:
391
+ raise
392
+ except Exception as e:
393
+ logger.error(f"Failed to cancel deployment {deployment_id}: {e}")
394
+ raise HTTPException(status_code=500, detail=f"Failed to cancel deployment: {str(e)}")
395
+
396
+ @router.post("/{deployment_id}/retry")
397
+ async def retry_deployment(deployment_id: str, background_tasks: BackgroundTasks):
398
+ """
399
+ Retry a failed deployment
400
+ """
401
+ try:
402
+ if deployment_id not in deployments:
403
+ raise HTTPException(status_code=404, detail="Deployment not found")
404
+
405
+ deployment = deployments[deployment_id]
406
+
407
+ if deployment["status"] == "failed":
408
+ # Reset deployment status
409
+ deployment["status"] = "pending"
410
+ deployment["error"] = None
411
+ deployment["progress"] = "Retrying deployment"
412
+
413
+ # Start deployment in background
414
+ background_tasks.add_task(
415
+ perform_deployment,
416
+ deployment_id,
417
+ deployment["model_id"],
418
+ deployment.get("service_name"),
419
+ deployment.get("auto_deploy", False)
420
+ )
421
+
422
+ return {"success": True, "message": "Deployment retry started"}
423
+ else:
424
+ raise HTTPException(status_code=400, detail="Cannot retry deployment in current status")
425
+
426
+ except HTTPException:
427
+ raise
428
+ except Exception as e:
429
+ logger.error(f"Failed to retry deployment {deployment_id}: {e}")
430
+ raise HTTPException(status_code=500, detail=f"Failed to retry deployment: {str(e)}")
431
+
432
+ @router.get("/{deployment_id}/code")
433
+ async def get_service_code(deployment_id: str):
434
+ """
435
+ Download the generated service code for a deployment
436
+ """
437
+ try:
438
+ if deployment_id not in deployments:
439
+ raise HTTPException(status_code=404, detail="Deployment not found")
440
+
441
+ deployment = deployments[deployment_id]
442
+ service_file = deployment.get("service_file")
443
+
444
+ if not service_file or not Path(service_file).exists():
445
+ raise HTTPException(status_code=404, detail="Service code not found")
446
+
447
+ # Read the service code file
448
+ with open(service_file, 'r') as f:
449
+ service_code = f.read()
450
+
451
+ from fastapi.responses import PlainTextResponse
452
+ return PlainTextResponse(
453
+ content=service_code,
454
+ headers={
455
+ "Content-Disposition": f"attachment; filename={Path(service_file).name}"
456
+ }
457
+ )
458
+
459
+ except HTTPException:
460
+ raise
461
+ except Exception as e:
462
+ logger.error(f"Failed to get service code for {deployment_id}: {e}")
463
+ raise HTTPException(status_code=500, detail=f"Failed to get service code: {str(e)}")
464
+
465
+ # Health check for deployments service
466
+ @router.get("/health")
467
+ async def deployments_health():
468
+ """Health check for deployments service"""
469
+ return {
470
+ "status": "healthy",
471
+ "service": "deployments",
472
+ "active_deployments": len([d for d in deployments.values() if d["status"] == "active"]),
473
+ "pending_deployments": len([d for d in deployments.values() if d["status"] == "pending"]),
474
+ "total_deployments": len(deployments)
475
+ }