isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +35 -80
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/types.py +1 -0
  26. isa_model/deployment/__init__.py +5 -48
  27. isa_model/deployment/core/__init__.py +2 -31
  28. isa_model/deployment/core/deployment_manager.py +1278 -370
  29. isa_model/deployment/modal/__init__.py +8 -0
  30. isa_model/deployment/modal/config.py +136 -0
  31. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  32. isa_model/deployment/modal/services/__init__.py +3 -0
  33. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  34. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  35. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  36. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  37. isa_model/deployment/modal/services/video/__init__.py +1 -0
  38. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  39. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  40. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  41. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  42. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  43. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  44. isa_model/deployment/storage/__init__.py +5 -0
  45. isa_model/deployment/storage/deployment_repository.py +824 -0
  46. isa_model/deployment/triton/__init__.py +10 -0
  47. isa_model/deployment/triton/config.py +196 -0
  48. isa_model/deployment/triton/configs/__init__.py +1 -0
  49. isa_model/deployment/triton/provider.py +512 -0
  50. isa_model/deployment/triton/scripts/__init__.py +1 -0
  51. isa_model/deployment/triton/templates/__init__.py +1 -0
  52. isa_model/inference/__init__.py +47 -1
  53. isa_model/inference/ai_factory.py +137 -10
  54. isa_model/inference/legacy_services/__init__.py +21 -0
  55. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  56. isa_model/inference/legacy_services/model_service.py +573 -0
  57. isa_model/inference/legacy_services/model_serving.py +717 -0
  58. isa_model/inference/legacy_services/model_training.py +561 -0
  59. isa_model/inference/models/__init__.py +21 -0
  60. isa_model/inference/models/inference_config.py +551 -0
  61. isa_model/inference/models/inference_record.py +675 -0
  62. isa_model/inference/models/performance_models.py +714 -0
  63. isa_model/inference/repositories/__init__.py +9 -0
  64. isa_model/inference/repositories/inference_repository.py +828 -0
  65. isa_model/inference/services/audio/base_stt_service.py +184 -11
  66. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  67. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  68. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  69. isa_model/inference/services/llm/__init__.py +10 -2
  70. isa_model/inference/services/llm/base_llm_service.py +335 -24
  71. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  72. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  73. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  74. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  75. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  76. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  77. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  78. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  79. isa_model/inference/services/vision/__init__.py +22 -1
  80. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  81. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  82. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  83. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  84. isa_model/serving/api/cache_manager.py +245 -0
  85. isa_model/serving/api/dependencies/__init__.py +1 -0
  86. isa_model/serving/api/dependencies/auth.py +194 -0
  87. isa_model/serving/api/dependencies/database.py +139 -0
  88. isa_model/serving/api/error_handlers.py +284 -0
  89. isa_model/serving/api/fastapi_server.py +172 -22
  90. isa_model/serving/api/middleware/auth.py +8 -2
  91. isa_model/serving/api/middleware/security.py +23 -33
  92. isa_model/serving/api/middleware/tenant_context.py +414 -0
  93. isa_model/serving/api/routes/analytics.py +4 -1
  94. isa_model/serving/api/routes/config.py +645 -0
  95. isa_model/serving/api/routes/deployment_billing.py +315 -0
  96. isa_model/serving/api/routes/deployments.py +138 -2
  97. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  98. isa_model/serving/api/routes/health.py +32 -12
  99. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  100. isa_model/serving/api/routes/local_deployments.py +448 -0
  101. isa_model/serving/api/routes/tenants.py +575 -0
  102. isa_model/serving/api/routes/unified.py +680 -18
  103. isa_model/serving/api/routes/webhooks.py +479 -0
  104. isa_model/serving/api/startup.py +68 -54
  105. isa_model/utils/gpu_utils.py +311 -0
  106. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
  107. isa_model-0.4.4.dist-info/RECORD +180 -0
  108. isa_model/core/security/secrets.py +0 -358
  109. isa_model/core/storage/hf_storage.py +0 -419
  110. isa_model/core/storage/minio_storage.py +0 -0
  111. isa_model/deployment/cloud/__init__.py +0 -9
  112. isa_model/deployment/cloud/modal/__init__.py +0 -10
  113. isa_model/deployment/core/deployment_config.py +0 -356
  114. isa_model/deployment/core/isa_deployment_service.py +0 -401
  115. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  116. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  117. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  118. isa_model/deployment/runtime/deployed_service.py +0 -338
  119. isa_model/deployment/services/__init__.py +0 -9
  120. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  121. isa_model/deployment/services/model_service.py +0 -332
  122. isa_model/deployment/services/service_monitor.py +0 -356
  123. isa_model/deployment/services/service_registry.py +0 -527
  124. isa_model/eval/__init__.py +0 -92
  125. isa_model/eval/benchmarks/__init__.py +0 -27
  126. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  127. isa_model/eval/benchmarks.py +0 -701
  128. isa_model/eval/config/__init__.py +0 -10
  129. isa_model/eval/config/evaluation_config.py +0 -108
  130. isa_model/eval/evaluators/__init__.py +0 -24
  131. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  132. isa_model/eval/evaluators/base_evaluator.py +0 -503
  133. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  134. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  135. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  136. isa_model/eval/example_evaluation.py +0 -395
  137. isa_model/eval/factory.py +0 -798
  138. isa_model/eval/infrastructure/__init__.py +0 -24
  139. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  140. isa_model/eval/isa_benchmarks.py +0 -700
  141. isa_model/eval/isa_integration.py +0 -582
  142. isa_model/eval/metrics.py +0 -951
  143. isa_model/eval/tests/unit/test_basic.py +0 -396
  144. isa_model/serving/api/routes/evaluations.py +0 -579
  145. isa_model/training/__init__.py +0 -168
  146. isa_model/training/annotation/annotation_schema.py +0 -47
  147. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  148. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  149. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  150. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  151. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  152. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  153. isa_model/training/annotation/views/annotation_controller.py +0 -158
  154. isa_model/training/cloud/__init__.py +0 -22
  155. isa_model/training/cloud/job_orchestrator.py +0 -402
  156. isa_model/training/cloud/runpod_trainer.py +0 -454
  157. isa_model/training/cloud/storage_manager.py +0 -482
  158. isa_model/training/core/__init__.py +0 -26
  159. isa_model/training/core/config.py +0 -181
  160. isa_model/training/core/dataset.py +0 -222
  161. isa_model/training/core/trainer.py +0 -720
  162. isa_model/training/core/utils.py +0 -213
  163. isa_model/training/examples/intelligent_training_example.py +0 -281
  164. isa_model/training/factory.py +0 -424
  165. isa_model/training/intelligent/__init__.py +0 -25
  166. isa_model/training/intelligent/decision_engine.py +0 -643
  167. isa_model/training/intelligent/intelligent_factory.py +0 -888
  168. isa_model/training/intelligent/knowledge_base.py +0 -751
  169. isa_model/training/intelligent/resource_optimizer.py +0 -839
  170. isa_model/training/intelligent/task_classifier.py +0 -576
  171. isa_model/training/storage/__init__.py +0 -24
  172. isa_model/training/storage/core_integration.py +0 -439
  173. isa_model/training/storage/training_repository.py +0 -552
  174. isa_model/training/storage/training_storage.py +0 -628
  175. isa_model-0.4.0.dist-info/RECORD +0 -182
  176. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  177. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  178. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  179. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  180. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  181. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  182. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  183. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  184. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  185. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  186. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  187. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  188. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
  189. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,315 @@
1
+ """
2
+ Deployment Billing API Routes
3
+
4
+ API endpoints for deployment cost estimation, tracking, and billing information.
5
+ """
6
+
7
+ from fastapi import APIRouter, HTTPException, Query, Depends
8
+ from typing import Dict, Any, Optional, List
9
+ from datetime import datetime, timedelta
10
+ import logging
11
+ from pydantic import BaseModel
12
+
13
+ from ..auth import optional_auth
14
+
15
+ logger = logging.getLogger(__name__)
16
+ router = APIRouter(prefix="/deployment", tags=["deployment-billing"])
17
+
18
+
19
+ class CostEstimationRequest(BaseModel):
20
+ """Request model for deployment cost estimation"""
21
+ provider: str
22
+ gpu_type: str
23
+ gpu_count: int = 1
24
+ estimated_hours: float = 1.0
25
+ operation_type: str = "deployment"
26
+
27
+
28
+ class DeploymentBillingQuery(BaseModel):
29
+ """Query parameters for deployment billing"""
30
+ start_date: Optional[str] = None
31
+ end_date: Optional[str] = None
32
+ provider: Optional[str] = None
33
+ gpu_type: Optional[str] = None
34
+ model_id: Optional[str] = None
35
+
36
+
37
+ @router.post("/estimate-cost")
38
+ async def estimate_deployment_cost(
39
+ request: CostEstimationRequest,
40
+ user = Depends(optional_auth)
41
+ ):
42
+ """
43
+ Estimate deployment costs before starting deployment
44
+
45
+ Returns cost breakdown for specified provider, GPU type, and duration.
46
+ """
47
+ try:
48
+ from isa_model.core.models.deployment_billing_tracker import get_deployment_billing_tracker
49
+
50
+ billing_tracker = get_deployment_billing_tracker()
51
+ cost_estimate = billing_tracker.estimate_deployment_cost(
52
+ provider=request.provider,
53
+ gpu_type=request.gpu_type,
54
+ gpu_count=request.gpu_count,
55
+ estimated_hours=request.estimated_hours,
56
+ operation_type=request.operation_type
57
+ )
58
+
59
+ # Add additional cost breakdown details
60
+ hourly_rate = cost_estimate["compute_cost"] / request.estimated_hours if request.estimated_hours > 0 else 0
61
+
62
+ return {
63
+ "success": True,
64
+ "estimation": {
65
+ "provider": request.provider,
66
+ "gpu_type": request.gpu_type,
67
+ "gpu_count": request.gpu_count,
68
+ "estimated_hours": request.estimated_hours,
69
+ "cost_breakdown": cost_estimate,
70
+ "hourly_rate": round(hourly_rate, 6),
71
+ "recommendations": _get_cost_recommendations(request.provider, request.gpu_type, cost_estimate)
72
+ }
73
+ }
74
+
75
+ except Exception as e:
76
+ logger.error(f"Failed to estimate deployment cost: {e}")
77
+ raise HTTPException(status_code=500, detail=f"Cost estimation failed: {str(e)}")
78
+
79
+
80
+ @router.get("/billing/summary")
81
+ async def get_deployment_billing_summary(
82
+ start_date: Optional[str] = Query(None, description="Start date (ISO format)"),
83
+ end_date: Optional[str] = Query(None, description="End date (ISO format)"),
84
+ provider: Optional[str] = Query(None, description="Filter by provider"),
85
+ gpu_type: Optional[str] = Query(None, description="Filter by GPU type"),
86
+ model_id: Optional[str] = Query(None, description="Filter by model ID"),
87
+ user = Depends(optional_auth)
88
+ ):
89
+ """
90
+ Get deployment billing summary with optional filters
91
+
92
+ Returns comprehensive billing information for deployments within specified period.
93
+ """
94
+ try:
95
+ from isa_model.core.models.deployment_billing_tracker import get_deployment_billing_tracker
96
+
97
+ # Parse dates
98
+ start_dt = None
99
+ end_dt = None
100
+ if start_date:
101
+ start_dt = datetime.fromisoformat(start_date.replace('Z', '+00:00'))
102
+ if end_date:
103
+ end_dt = datetime.fromisoformat(end_date.replace('Z', '+00:00'))
104
+
105
+ billing_tracker = get_deployment_billing_tracker()
106
+
107
+ # Get deployment summary
108
+ deployment_summary = billing_tracker.get_deployment_summary(
109
+ start_date=start_dt,
110
+ end_date=end_dt,
111
+ provider=provider,
112
+ gpu_type=gpu_type
113
+ )
114
+
115
+ # If model_id filter is specified, get model-specific data
116
+ model_summary = None
117
+ if model_id:
118
+ model_summary = billing_tracker.get_model_usage_summary(model_id)
119
+
120
+ return {
121
+ "success": True,
122
+ "filters": {
123
+ "start_date": start_date,
124
+ "end_date": end_date,
125
+ "provider": provider,
126
+ "gpu_type": gpu_type,
127
+ "model_id": model_id
128
+ },
129
+ "deployment_summary": deployment_summary,
130
+ "model_summary": model_summary,
131
+ "recommendations": _get_billing_recommendations(deployment_summary)
132
+ }
133
+
134
+ except Exception as e:
135
+ logger.error(f"Failed to get deployment billing summary: {e}")
136
+ raise HTTPException(status_code=500, detail=f"Failed to get billing summary: {str(e)}")
137
+
138
+
139
+ @router.get("/pricing")
140
+ async def get_deployment_pricing(
141
+ user = Depends(optional_auth)
142
+ ):
143
+ """
144
+ Get current deployment pricing for all providers and GPU types
145
+
146
+ Returns up-to-date pricing information for cost planning.
147
+ """
148
+ try:
149
+ from isa_model.core.models.deployment_billing_tracker import get_deployment_billing_tracker
150
+
151
+ billing_tracker = get_deployment_billing_tracker()
152
+ pricing_data = billing_tracker.pricing_data
153
+
154
+ # Add provider descriptions and recommendations
155
+ enhanced_pricing = {}
156
+ for provider, pricing in pricing_data.items():
157
+ enhanced_pricing[provider] = {
158
+ "pricing": pricing,
159
+ "description": _get_provider_description(provider),
160
+ "best_for": _get_provider_recommendations(provider),
161
+ "availability": _check_provider_availability(provider)
162
+ }
163
+
164
+ return {
165
+ "success": True,
166
+ "pricing": enhanced_pricing,
167
+ "currency": "USD",
168
+ "unit": "per hour",
169
+ "last_updated": datetime.now().isoformat()
170
+ }
171
+
172
+ except Exception as e:
173
+ logger.error(f"Failed to get deployment pricing: {e}")
174
+ raise HTTPException(status_code=500, detail=f"Failed to get pricing: {str(e)}")
175
+
176
+
177
+ @router.get("/providers/compare")
178
+ async def compare_providers(
179
+ gpu_type: str = Query(..., description="GPU type to compare"),
180
+ hours: float = Query(1.0, description="Number of hours for comparison"),
181
+ user = Depends(optional_auth)
182
+ ):
183
+ """
184
+ Compare costs across different providers for the same GPU type
185
+
186
+ Helps users choose the most cost-effective deployment option.
187
+ """
188
+ try:
189
+ from isa_model.core.models.deployment_billing_tracker import get_deployment_billing_tracker
190
+
191
+ billing_tracker = get_deployment_billing_tracker()
192
+ comparisons = []
193
+
194
+ providers = ["modal", "runpod", "lambda_labs", "coreweave"]
195
+
196
+ for provider in providers:
197
+ try:
198
+ cost_estimate = billing_tracker.estimate_deployment_cost(
199
+ provider=provider,
200
+ gpu_type=gpu_type,
201
+ gpu_count=1,
202
+ estimated_hours=hours
203
+ )
204
+
205
+ comparisons.append({
206
+ "provider": provider,
207
+ "total_cost": cost_estimate["total_cost"],
208
+ "hourly_rate": cost_estimate["compute_cost"] / hours if hours > 0 else 0,
209
+ "breakdown": cost_estimate,
210
+ "description": _get_provider_description(provider),
211
+ "availability": _check_provider_availability(provider)
212
+ })
213
+ except Exception as e:
214
+ logger.warning(f"Could not get pricing for {provider}: {e}")
215
+
216
+ # Sort by total cost
217
+ comparisons.sort(key=lambda x: x["total_cost"])
218
+
219
+ return {
220
+ "success": True,
221
+ "comparison": {
222
+ "gpu_type": gpu_type,
223
+ "duration_hours": hours,
224
+ "providers": comparisons,
225
+ "cheapest": comparisons[0] if comparisons else None,
226
+ "savings": {
227
+ "max_savings": comparisons[-1]["total_cost"] - comparisons[0]["total_cost"] if len(comparisons) > 1 else 0,
228
+ "percentage": ((comparisons[-1]["total_cost"] - comparisons[0]["total_cost"]) / comparisons[-1]["total_cost"] * 100) if len(comparisons) > 1 and comparisons[-1]["total_cost"] > 0 else 0
229
+ }
230
+ }
231
+ }
232
+
233
+ except Exception as e:
234
+ logger.error(f"Failed to compare providers: {e}")
235
+ raise HTTPException(status_code=500, detail=f"Provider comparison failed: {str(e)}")
236
+
237
+
238
+ def _get_cost_recommendations(provider: str, gpu_type: str, cost_estimate: Dict[str, float]) -> List[str]:
239
+ """Generate cost optimization recommendations"""
240
+ recommendations = []
241
+
242
+ if cost_estimate["total_cost"] > 10.0:
243
+ recommendations.append("Consider using spot instances if available for significant savings")
244
+
245
+ if gpu_type in ["h100", "a100_80gb"]:
246
+ recommendations.append("High-end GPU selected - ensure workload requires this performance")
247
+
248
+ if provider == "modal":
249
+ recommendations.append("Modal offers automatic scaling - costs only incurred during active use")
250
+
251
+ if provider in ["runpod", "lambda_labs"]:
252
+ recommendations.append("Consider longer-term contracts for better rates on extended deployments")
253
+
254
+ return recommendations
255
+
256
+
257
+ def _get_billing_recommendations(summary: Dict[str, Any]) -> List[str]:
258
+ """Generate billing optimization recommendations based on usage patterns"""
259
+ recommendations = []
260
+
261
+ if summary["total_cost"] > 100.0:
262
+ recommendations.append("High usage detected - consider reserved instances for cost savings")
263
+
264
+ # Analyze provider distribution
265
+ providers = summary.get("by_provider", {})
266
+ if len(providers) > 1:
267
+ costs = [(p, data["cost"]) for p, data in providers.items()]
268
+ costs.sort(key=lambda x: x[1])
269
+ if len(costs) > 1 and costs[-1][1] > costs[0][1] * 2:
270
+ recommendations.append(f"Consider migrating from {costs[-1][0]} to {costs[0][0]} for potential savings")
271
+
272
+ # Analyze GPU usage
273
+ gpu_types = summary.get("by_gpu_type", {})
274
+ if "h100" in gpu_types and gpu_types["h100"]["gpu_hours"] < 10:
275
+ recommendations.append("Low H100 usage - consider A100 for similar performance at lower cost")
276
+
277
+ return recommendations
278
+
279
+
280
+ def _get_provider_description(provider: str) -> str:
281
+ """Get description for deployment provider"""
282
+ descriptions = {
283
+ "modal": "Serverless GPU platform with automatic scaling and pay-per-use billing",
284
+ "triton_local": "Local deployment using your own hardware with electricity costs",
285
+ "runpod": "Cloud GPU rental with competitive pricing and flexible instances",
286
+ "lambda_labs": "Professional GPU cloud with reliable infrastructure and support",
287
+ "coreweave": "High-performance GPU infrastructure optimized for AI workloads"
288
+ }
289
+ return descriptions.get(provider, "Unknown provider")
290
+
291
+
292
+ def _get_provider_recommendations(provider: str) -> List[str]:
293
+ """Get recommendations for when to use each provider"""
294
+ recommendations = {
295
+ "modal": ["Development and testing", "Variable workloads", "Automatic scaling needs"],
296
+ "triton_local": ["Long-term deployments", "Data privacy requirements", "Cost optimization"],
297
+ "runpod": ["Budget-conscious deployments", "Flexible scaling", "Spot instance savings"],
298
+ "lambda_labs": ["Production workloads", "Reliable performance", "Enterprise support"],
299
+ "coreweave": ["High-performance requirements", "Large-scale deployments", "Bare metal access"]
300
+ }
301
+ return recommendations.get(provider, [])
302
+
303
+
304
+ def _check_provider_availability(provider: str) -> str:
305
+ """Check if provider is currently available"""
306
+ # This would implement actual availability checking
307
+ # For now, return static status
308
+ availability = {
309
+ "modal": "Available",
310
+ "triton_local": "Available (requires local setup)",
311
+ "runpod": "Available",
312
+ "lambda_labs": "Available",
313
+ "coreweave": "Available (requires signup)"
314
+ }
315
+ return availability.get(provider, "Unknown")
@@ -4,15 +4,17 @@ Deployments API Routes
4
4
  Handles automated HuggingFace model deployment to Modal
5
5
  """
6
6
 
7
- from fastapi import APIRouter, HTTPException, BackgroundTasks
7
+ from fastapi import APIRouter, HTTPException, BackgroundTasks, Request
8
8
  from pydantic import BaseModel
9
9
  from typing import Optional, List, Dict, Any
10
10
  import logging
11
11
  import asyncio
12
12
  import json
13
+ import time
14
+ from datetime import datetime
13
15
  from pathlib import Path
14
16
 
15
- from isa_model.deployment.services.auto_hf_modal_deployer import HuggingFaceModalDeployer
17
+ from isa_model.deployment.modal.deployer import ModalDeployer as HuggingFaceModalDeployer
16
18
 
17
19
  logger = logging.getLogger(__name__)
18
20
 
@@ -233,6 +235,140 @@ async def get_deployment(deployment_id: str):
233
235
  logger.error(f"Failed to get deployment {deployment_id}: {e}")
234
236
  raise HTTPException(status_code=500, detail=f"Failed to get deployment: {str(e)}")
235
237
 
238
+ @router.get("/{deployment_id}/status")
239
+ async def get_deployment_status(deployment_id: str, request: Request):
240
+ """
241
+ Get real-time deployment status and monitoring information with tenant isolation
242
+ """
243
+ try:
244
+ from isa_model.deployment.core.deployment_manager import DeploymentManager
245
+ from isa_model.serving.api.middleware.tenant_context import get_tenant_context
246
+
247
+ # Get tenant context for isolation
248
+ tenant_context = get_tenant_context()
249
+ tenant_dict = {
250
+ "organization_id": tenant_context.organization_id,
251
+ "user_id": tenant_context.user_id,
252
+ "role": tenant_context.role
253
+ } if tenant_context else None
254
+
255
+ # Initialize deployment manager
256
+ manager = DeploymentManager()
257
+
258
+ # Verify tenant access to deployment first
259
+ deployment = await manager.get_deployment(deployment_id, tenant_dict)
260
+ if not deployment:
261
+ raise HTTPException(status_code=404, detail="Deployment not found or access denied")
262
+
263
+ # Get deployment status
264
+ status_info = await manager.get_modal_service_status(deployment_id)
265
+
266
+ return {
267
+ "success": True,
268
+ "deployment_status": status_info
269
+ }
270
+
271
+ except Exception as e:
272
+ logger.error(f"Failed to get deployment status {deployment_id}: {e}")
273
+ raise HTTPException(status_code=500, detail=f"Failed to get deployment status: {str(e)}")
274
+
275
+ @router.get("/{deployment_id}/monitoring")
276
+ async def get_deployment_monitoring(deployment_id: str, request: Request):
277
+ """
278
+ Get detailed monitoring metrics for Modal deployment with tenant isolation
279
+ """
280
+ try:
281
+ from isa_model.deployment.core.deployment_manager import DeploymentManager
282
+ from isa_model.serving.api.middleware.tenant_context import get_tenant_context
283
+
284
+ # Get tenant context for isolation
285
+ tenant_context = get_tenant_context()
286
+ tenant_dict = {
287
+ "organization_id": tenant_context.organization_id,
288
+ "user_id": tenant_context.user_id,
289
+ "role": tenant_context.role
290
+ } if tenant_context else None
291
+
292
+ manager = DeploymentManager()
293
+
294
+ # Verify tenant access to deployment first
295
+ deployment = await manager.get_deployment(deployment_id, tenant_dict)
296
+ if not deployment:
297
+ raise HTTPException(status_code=404, detail="Deployment not found or access denied")
298
+
299
+ status_info = await manager.get_modal_service_status(deployment_id)
300
+
301
+ if status_info.get("status") == "not_found":
302
+ raise HTTPException(status_code=404, detail="Deployment not found")
303
+
304
+ # Extract detailed monitoring data
305
+ monitoring_data = status_info.get("monitoring", {})
306
+
307
+ return {
308
+ "success": True,
309
+ "deployment_id": deployment_id,
310
+ "monitoring": {
311
+ "health_check": monitoring_data.get("health_check"),
312
+ "resource_usage": monitoring_data.get("resource_usage"),
313
+ "request_metrics": monitoring_data.get("request_metrics"),
314
+ "cost_tracking": monitoring_data.get("cost_tracking"),
315
+ "last_updated": datetime.now().isoformat()
316
+ }
317
+ }
318
+
319
+ except HTTPException:
320
+ raise
321
+ except Exception as e:
322
+ logger.error(f"Failed to get monitoring data {deployment_id}: {e}")
323
+ raise HTTPException(status_code=500, detail=f"Failed to get monitoring data: {str(e)}")
324
+
325
+ @router.post("/{deployment_id}/restart")
326
+ async def restart_deployment(deployment_id: str, request: Request):
327
+ """
328
+ Restart a Modal deployment with tenant isolation
329
+ """
330
+ try:
331
+ from isa_model.deployment.core.deployment_manager import DeploymentManager
332
+ from isa_model.serving.api.middleware.tenant_context import get_tenant_context
333
+
334
+ # Get tenant context for isolation
335
+ tenant_context = get_tenant_context()
336
+ tenant_dict = {
337
+ "organization_id": tenant_context.organization_id,
338
+ "user_id": tenant_context.user_id,
339
+ "role": tenant_context.role
340
+ } if tenant_context else None
341
+
342
+ manager = DeploymentManager()
343
+
344
+ # Check if deployment exists and user has access
345
+ deployment = await manager.get_deployment(deployment_id, tenant_dict)
346
+ if not deployment:
347
+ raise HTTPException(status_code=404, detail="Deployment not found or access denied")
348
+
349
+ # Update status to restarting
350
+ await manager.update_deployment_status(deployment_id, "restarting")
351
+
352
+ # TODO: Implement actual Modal service restart
353
+ # For now, simulate restart process
354
+ await asyncio.sleep(1)
355
+
356
+ # Update status to running
357
+ await manager.update_deployment_status(deployment_id, "running")
358
+
359
+ return {
360
+ "success": True,
361
+ "message": f"Deployment {deployment_id} restarted successfully",
362
+ "deployment_id": deployment_id,
363
+ "status": "running"
364
+ }
365
+
366
+ except HTTPException:
367
+ raise
368
+ except Exception as e:
369
+ logger.error(f"Failed to restart deployment {deployment_id}: {e}")
370
+ raise HTTPException(status_code=500, detail=f"Failed to restart deployment: {str(e)}")
371
+
236
372
  @router.delete("/{deployment_id}")
237
373
  async def cancel_deployment(deployment_id: str):
238
374
  """