isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. isa_model/client.py +1166 -584
  2. isa_model/core/cache/redis_cache.py +410 -0
  3. isa_model/core/config/config_manager.py +282 -12
  4. isa_model/core/config.py +91 -1
  5. isa_model/core/database/__init__.py +1 -0
  6. isa_model/core/database/direct_db_client.py +114 -0
  7. isa_model/core/database/migration_manager.py +563 -0
  8. isa_model/core/database/migrations.py +297 -0
  9. isa_model/core/database/supabase_client.py +258 -0
  10. isa_model/core/dependencies.py +316 -0
  11. isa_model/core/discovery/__init__.py +19 -0
  12. isa_model/core/discovery/consul_discovery.py +190 -0
  13. isa_model/core/logging/__init__.py +54 -0
  14. isa_model/core/logging/influx_logger.py +523 -0
  15. isa_model/core/logging/loki_logger.py +160 -0
  16. isa_model/core/models/__init__.py +46 -0
  17. isa_model/core/models/config_models.py +625 -0
  18. isa_model/core/models/deployment_billing_tracker.py +430 -0
  19. isa_model/core/models/model_billing_tracker.py +60 -88
  20. isa_model/core/models/model_manager.py +66 -25
  21. isa_model/core/models/model_metadata.py +690 -0
  22. isa_model/core/models/model_repo.py +217 -55
  23. isa_model/core/models/model_statistics_tracker.py +234 -0
  24. isa_model/core/models/model_storage.py +0 -1
  25. isa_model/core/models/model_version_manager.py +959 -0
  26. isa_model/core/models/system_models.py +857 -0
  27. isa_model/core/pricing_manager.py +2 -249
  28. isa_model/core/repositories/__init__.py +9 -0
  29. isa_model/core/repositories/config_repository.py +912 -0
  30. isa_model/core/resilience/circuit_breaker.py +366 -0
  31. isa_model/core/security/secrets.py +358 -0
  32. isa_model/core/services/__init__.py +2 -4
  33. isa_model/core/services/intelligent_model_selector.py +479 -370
  34. isa_model/core/storage/hf_storage.py +2 -2
  35. isa_model/core/types.py +8 -0
  36. isa_model/deployment/__init__.py +5 -48
  37. isa_model/deployment/core/__init__.py +2 -31
  38. isa_model/deployment/core/deployment_manager.py +1278 -368
  39. isa_model/deployment/local/__init__.py +31 -0
  40. isa_model/deployment/local/config.py +248 -0
  41. isa_model/deployment/local/gpu_gateway.py +607 -0
  42. isa_model/deployment/local/health_checker.py +428 -0
  43. isa_model/deployment/local/provider.py +586 -0
  44. isa_model/deployment/local/tensorrt_service.py +621 -0
  45. isa_model/deployment/local/transformers_service.py +644 -0
  46. isa_model/deployment/local/vllm_service.py +527 -0
  47. isa_model/deployment/modal/__init__.py +8 -0
  48. isa_model/deployment/modal/config.py +136 -0
  49. isa_model/deployment/modal/deployer.py +894 -0
  50. isa_model/deployment/modal/services/__init__.py +3 -0
  51. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  52. isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
  53. isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
  54. isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
  55. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  56. isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
  57. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  58. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  59. isa_model/deployment/modal/services/video/__init__.py +1 -0
  60. isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
  61. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  62. isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
  63. isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
  64. isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
  65. isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
  66. isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
  67. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  68. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  69. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  70. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  71. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  72. isa_model/deployment/storage/__init__.py +5 -0
  73. isa_model/deployment/storage/deployment_repository.py +824 -0
  74. isa_model/deployment/triton/__init__.py +10 -0
  75. isa_model/deployment/triton/config.py +196 -0
  76. isa_model/deployment/triton/configs/__init__.py +1 -0
  77. isa_model/deployment/triton/provider.py +512 -0
  78. isa_model/deployment/triton/scripts/__init__.py +1 -0
  79. isa_model/deployment/triton/templates/__init__.py +1 -0
  80. isa_model/inference/__init__.py +47 -1
  81. isa_model/inference/ai_factory.py +179 -16
  82. isa_model/inference/legacy_services/__init__.py +21 -0
  83. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  84. isa_model/inference/legacy_services/model_service.py +573 -0
  85. isa_model/inference/legacy_services/model_serving.py +717 -0
  86. isa_model/inference/legacy_services/model_training.py +561 -0
  87. isa_model/inference/models/__init__.py +21 -0
  88. isa_model/inference/models/inference_config.py +551 -0
  89. isa_model/inference/models/inference_record.py +675 -0
  90. isa_model/inference/models/performance_models.py +714 -0
  91. isa_model/inference/repositories/__init__.py +9 -0
  92. isa_model/inference/repositories/inference_repository.py +828 -0
  93. isa_model/inference/services/audio/__init__.py +21 -0
  94. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  95. isa_model/inference/services/audio/base_stt_service.py +184 -11
  96. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  97. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  98. isa_model/inference/services/audio/openai_stt_service.py +53 -11
  99. isa_model/inference/services/base_service.py +17 -1
  100. isa_model/inference/services/custom_model_manager.py +277 -0
  101. isa_model/inference/services/embedding/__init__.py +13 -0
  102. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  103. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  104. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  105. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  106. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  107. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  108. isa_model/inference/services/img/__init__.py +2 -2
  109. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  110. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  111. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  112. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  113. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  114. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  115. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  116. isa_model/inference/services/llm/__init__.py +10 -2
  117. isa_model/inference/services/llm/base_llm_service.py +361 -26
  118. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  119. isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
  120. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  121. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  122. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  123. isa_model/inference/services/llm/local_llm_service.py +747 -0
  124. isa_model/inference/services/llm/ollama_llm_service.py +11 -3
  125. isa_model/inference/services/llm/openai_llm_service.py +670 -56
  126. isa_model/inference/services/llm/yyds_llm_service.py +10 -3
  127. isa_model/inference/services/vision/__init__.py +27 -6
  128. isa_model/inference/services/vision/base_vision_service.py +118 -185
  129. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  130. isa_model/inference/services/vision/helpers/image_utils.py +19 -10
  131. isa_model/inference/services/vision/isa_vision_service.py +634 -0
  132. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  133. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  134. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  135. isa_model/serving/api/cache_manager.py +245 -0
  136. isa_model/serving/api/dependencies/__init__.py +1 -0
  137. isa_model/serving/api/dependencies/auth.py +194 -0
  138. isa_model/serving/api/dependencies/database.py +139 -0
  139. isa_model/serving/api/error_handlers.py +284 -0
  140. isa_model/serving/api/fastapi_server.py +240 -18
  141. isa_model/serving/api/middleware/auth.py +317 -0
  142. isa_model/serving/api/middleware/security.py +268 -0
  143. isa_model/serving/api/middleware/tenant_context.py +414 -0
  144. isa_model/serving/api/routes/analytics.py +489 -0
  145. isa_model/serving/api/routes/config.py +645 -0
  146. isa_model/serving/api/routes/deployment_billing.py +315 -0
  147. isa_model/serving/api/routes/deployments.py +475 -0
  148. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  149. isa_model/serving/api/routes/health.py +32 -12
  150. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  151. isa_model/serving/api/routes/local_deployments.py +448 -0
  152. isa_model/serving/api/routes/logs.py +430 -0
  153. isa_model/serving/api/routes/settings.py +582 -0
  154. isa_model/serving/api/routes/tenants.py +575 -0
  155. isa_model/serving/api/routes/unified.py +992 -171
  156. isa_model/serving/api/routes/webhooks.py +479 -0
  157. isa_model/serving/api/startup.py +318 -0
  158. isa_model/serving/modal_proxy_server.py +249 -0
  159. isa_model/utils/gpu_utils.py +311 -0
  160. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
  161. isa_model-0.4.3.dist-info/RECORD +193 -0
  162. isa_model/deployment/cloud/__init__.py +0 -9
  163. isa_model/deployment/cloud/modal/__init__.py +0 -10
  164. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  165. isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
  166. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
  167. isa_model/deployment/cloud/modal/register_models.py +0 -321
  168. isa_model/deployment/core/deployment_config.py +0 -356
  169. isa_model/deployment/core/isa_deployment_service.py +0 -401
  170. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  171. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  172. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  173. isa_model/deployment/runtime/deployed_service.py +0 -338
  174. isa_model/deployment/services/__init__.py +0 -9
  175. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  176. isa_model/deployment/services/model_service.py +0 -332
  177. isa_model/deployment/services/service_monitor.py +0 -356
  178. isa_model/deployment/services/service_registry.py +0 -527
  179. isa_model/eval/__init__.py +0 -92
  180. isa_model/eval/benchmarks.py +0 -469
  181. isa_model/eval/config/__init__.py +0 -10
  182. isa_model/eval/config/evaluation_config.py +0 -108
  183. isa_model/eval/evaluators/__init__.py +0 -18
  184. isa_model/eval/evaluators/base_evaluator.py +0 -503
  185. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  186. isa_model/eval/factory.py +0 -531
  187. isa_model/eval/infrastructure/__init__.py +0 -24
  188. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  189. isa_model/eval/metrics.py +0 -798
  190. isa_model/inference/adapter/unified_api.py +0 -248
  191. isa_model/inference/services/helpers/stacked_config.py +0 -148
  192. isa_model/inference/services/img/flux_professional_service.py +0 -603
  193. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  194. isa_model/inference/services/others/table_transformer_service.py +0 -61
  195. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  196. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  197. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  198. isa_model/scripts/inference_tracker.py +0 -283
  199. isa_model/scripts/mlflow_manager.py +0 -379
  200. isa_model/scripts/model_registry.py +0 -465
  201. isa_model/scripts/register_models.py +0 -370
  202. isa_model/scripts/register_models_with_embeddings.py +0 -510
  203. isa_model/scripts/start_mlflow.py +0 -95
  204. isa_model/scripts/training_tracker.py +0 -257
  205. isa_model/training/__init__.py +0 -74
  206. isa_model/training/annotation/annotation_schema.py +0 -47
  207. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  208. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  209. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  210. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  211. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  212. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  213. isa_model/training/annotation/views/annotation_controller.py +0 -158
  214. isa_model/training/cloud/__init__.py +0 -22
  215. isa_model/training/cloud/job_orchestrator.py +0 -402
  216. isa_model/training/cloud/runpod_trainer.py +0 -454
  217. isa_model/training/cloud/storage_manager.py +0 -482
  218. isa_model/training/core/__init__.py +0 -23
  219. isa_model/training/core/config.py +0 -181
  220. isa_model/training/core/dataset.py +0 -222
  221. isa_model/training/core/trainer.py +0 -720
  222. isa_model/training/core/utils.py +0 -213
  223. isa_model/training/factory.py +0 -424
  224. isa_model-0.3.91.dist-info/RECORD +0 -138
  225. /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
  226. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  227. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  228. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,315 @@
1
+ """
2
+ Deployment Billing API Routes
3
+
4
+ API endpoints for deployment cost estimation, tracking, and billing information.
5
+ """
6
+
7
+ from fastapi import APIRouter, HTTPException, Query, Depends
8
+ from typing import Dict, Any, Optional, List
9
+ from datetime import datetime, timedelta
10
+ import logging
11
+ from pydantic import BaseModel
12
+
13
+ from ..auth import optional_auth
14
+
15
+ logger = logging.getLogger(__name__)
16
+ router = APIRouter(prefix="/deployment", tags=["deployment-billing"])
17
+
18
+
19
+ class CostEstimationRequest(BaseModel):
20
+ """Request model for deployment cost estimation"""
21
+ provider: str
22
+ gpu_type: str
23
+ gpu_count: int = 1
24
+ estimated_hours: float = 1.0
25
+ operation_type: str = "deployment"
26
+
27
+
28
+ class DeploymentBillingQuery(BaseModel):
29
+ """Query parameters for deployment billing"""
30
+ start_date: Optional[str] = None
31
+ end_date: Optional[str] = None
32
+ provider: Optional[str] = None
33
+ gpu_type: Optional[str] = None
34
+ model_id: Optional[str] = None
35
+
36
+
37
+ @router.post("/estimate-cost")
38
+ async def estimate_deployment_cost(
39
+ request: CostEstimationRequest,
40
+ user = Depends(optional_auth)
41
+ ):
42
+ """
43
+ Estimate deployment costs before starting deployment
44
+
45
+ Returns cost breakdown for specified provider, GPU type, and duration.
46
+ """
47
+ try:
48
+ from isa_model.core.models.deployment_billing_tracker import get_deployment_billing_tracker
49
+
50
+ billing_tracker = get_deployment_billing_tracker()
51
+ cost_estimate = billing_tracker.estimate_deployment_cost(
52
+ provider=request.provider,
53
+ gpu_type=request.gpu_type,
54
+ gpu_count=request.gpu_count,
55
+ estimated_hours=request.estimated_hours,
56
+ operation_type=request.operation_type
57
+ )
58
+
59
+ # Add additional cost breakdown details
60
+ hourly_rate = cost_estimate["compute_cost"] / request.estimated_hours if request.estimated_hours > 0 else 0
61
+
62
+ return {
63
+ "success": True,
64
+ "estimation": {
65
+ "provider": request.provider,
66
+ "gpu_type": request.gpu_type,
67
+ "gpu_count": request.gpu_count,
68
+ "estimated_hours": request.estimated_hours,
69
+ "cost_breakdown": cost_estimate,
70
+ "hourly_rate": round(hourly_rate, 6),
71
+ "recommendations": _get_cost_recommendations(request.provider, request.gpu_type, cost_estimate)
72
+ }
73
+ }
74
+
75
+ except Exception as e:
76
+ logger.error(f"Failed to estimate deployment cost: {e}")
77
+ raise HTTPException(status_code=500, detail=f"Cost estimation failed: {str(e)}")
78
+
79
+
80
+ @router.get("/billing/summary")
81
+ async def get_deployment_billing_summary(
82
+ start_date: Optional[str] = Query(None, description="Start date (ISO format)"),
83
+ end_date: Optional[str] = Query(None, description="End date (ISO format)"),
84
+ provider: Optional[str] = Query(None, description="Filter by provider"),
85
+ gpu_type: Optional[str] = Query(None, description="Filter by GPU type"),
86
+ model_id: Optional[str] = Query(None, description="Filter by model ID"),
87
+ user = Depends(optional_auth)
88
+ ):
89
+ """
90
+ Get deployment billing summary with optional filters
91
+
92
+ Returns comprehensive billing information for deployments within specified period.
93
+ """
94
+ try:
95
+ from isa_model.core.models.deployment_billing_tracker import get_deployment_billing_tracker
96
+
97
+ # Parse dates
98
+ start_dt = None
99
+ end_dt = None
100
+ if start_date:
101
+ start_dt = datetime.fromisoformat(start_date.replace('Z', '+00:00'))
102
+ if end_date:
103
+ end_dt = datetime.fromisoformat(end_date.replace('Z', '+00:00'))
104
+
105
+ billing_tracker = get_deployment_billing_tracker()
106
+
107
+ # Get deployment summary
108
+ deployment_summary = billing_tracker.get_deployment_summary(
109
+ start_date=start_dt,
110
+ end_date=end_dt,
111
+ provider=provider,
112
+ gpu_type=gpu_type
113
+ )
114
+
115
+ # If model_id filter is specified, get model-specific data
116
+ model_summary = None
117
+ if model_id:
118
+ model_summary = billing_tracker.get_model_usage_summary(model_id)
119
+
120
+ return {
121
+ "success": True,
122
+ "filters": {
123
+ "start_date": start_date,
124
+ "end_date": end_date,
125
+ "provider": provider,
126
+ "gpu_type": gpu_type,
127
+ "model_id": model_id
128
+ },
129
+ "deployment_summary": deployment_summary,
130
+ "model_summary": model_summary,
131
+ "recommendations": _get_billing_recommendations(deployment_summary)
132
+ }
133
+
134
+ except Exception as e:
135
+ logger.error(f"Failed to get deployment billing summary: {e}")
136
+ raise HTTPException(status_code=500, detail=f"Failed to get billing summary: {str(e)}")
137
+
138
+
139
+ @router.get("/pricing")
140
+ async def get_deployment_pricing(
141
+ user = Depends(optional_auth)
142
+ ):
143
+ """
144
+ Get current deployment pricing for all providers and GPU types
145
+
146
+ Returns up-to-date pricing information for cost planning.
147
+ """
148
+ try:
149
+ from isa_model.core.models.deployment_billing_tracker import get_deployment_billing_tracker
150
+
151
+ billing_tracker = get_deployment_billing_tracker()
152
+ pricing_data = billing_tracker.pricing_data
153
+
154
+ # Add provider descriptions and recommendations
155
+ enhanced_pricing = {}
156
+ for provider, pricing in pricing_data.items():
157
+ enhanced_pricing[provider] = {
158
+ "pricing": pricing,
159
+ "description": _get_provider_description(provider),
160
+ "best_for": _get_provider_recommendations(provider),
161
+ "availability": _check_provider_availability(provider)
162
+ }
163
+
164
+ return {
165
+ "success": True,
166
+ "pricing": enhanced_pricing,
167
+ "currency": "USD",
168
+ "unit": "per hour",
169
+ "last_updated": datetime.now().isoformat()
170
+ }
171
+
172
+ except Exception as e:
173
+ logger.error(f"Failed to get deployment pricing: {e}")
174
+ raise HTTPException(status_code=500, detail=f"Failed to get pricing: {str(e)}")
175
+
176
+
177
+ @router.get("/providers/compare")
178
+ async def compare_providers(
179
+ gpu_type: str = Query(..., description="GPU type to compare"),
180
+ hours: float = Query(1.0, description="Number of hours for comparison"),
181
+ user = Depends(optional_auth)
182
+ ):
183
+ """
184
+ Compare costs across different providers for the same GPU type
185
+
186
+ Helps users choose the most cost-effective deployment option.
187
+ """
188
+ try:
189
+ from isa_model.core.models.deployment_billing_tracker import get_deployment_billing_tracker
190
+
191
+ billing_tracker = get_deployment_billing_tracker()
192
+ comparisons = []
193
+
194
+ providers = ["modal", "runpod", "lambda_labs", "coreweave"]
195
+
196
+ for provider in providers:
197
+ try:
198
+ cost_estimate = billing_tracker.estimate_deployment_cost(
199
+ provider=provider,
200
+ gpu_type=gpu_type,
201
+ gpu_count=1,
202
+ estimated_hours=hours
203
+ )
204
+
205
+ comparisons.append({
206
+ "provider": provider,
207
+ "total_cost": cost_estimate["total_cost"],
208
+ "hourly_rate": cost_estimate["compute_cost"] / hours if hours > 0 else 0,
209
+ "breakdown": cost_estimate,
210
+ "description": _get_provider_description(provider),
211
+ "availability": _check_provider_availability(provider)
212
+ })
213
+ except Exception as e:
214
+ logger.warning(f"Could not get pricing for {provider}: {e}")
215
+
216
+ # Sort by total cost
217
+ comparisons.sort(key=lambda x: x["total_cost"])
218
+
219
+ return {
220
+ "success": True,
221
+ "comparison": {
222
+ "gpu_type": gpu_type,
223
+ "duration_hours": hours,
224
+ "providers": comparisons,
225
+ "cheapest": comparisons[0] if comparisons else None,
226
+ "savings": {
227
+ "max_savings": comparisons[-1]["total_cost"] - comparisons[0]["total_cost"] if len(comparisons) > 1 else 0,
228
+ "percentage": ((comparisons[-1]["total_cost"] - comparisons[0]["total_cost"]) / comparisons[-1]["total_cost"] * 100) if len(comparisons) > 1 and comparisons[-1]["total_cost"] > 0 else 0
229
+ }
230
+ }
231
+ }
232
+
233
+ except Exception as e:
234
+ logger.error(f"Failed to compare providers: {e}")
235
+ raise HTTPException(status_code=500, detail=f"Provider comparison failed: {str(e)}")
236
+
237
+
238
+ def _get_cost_recommendations(provider: str, gpu_type: str, cost_estimate: Dict[str, float]) -> List[str]:
239
+ """Generate cost optimization recommendations"""
240
+ recommendations = []
241
+
242
+ if cost_estimate["total_cost"] > 10.0:
243
+ recommendations.append("Consider using spot instances if available for significant savings")
244
+
245
+ if gpu_type in ["h100", "a100_80gb"]:
246
+ recommendations.append("High-end GPU selected - ensure workload requires this performance")
247
+
248
+ if provider == "modal":
249
+ recommendations.append("Modal offers automatic scaling - costs only incurred during active use")
250
+
251
+ if provider in ["runpod", "lambda_labs"]:
252
+ recommendations.append("Consider longer-term contracts for better rates on extended deployments")
253
+
254
+ return recommendations
255
+
256
+
257
+ def _get_billing_recommendations(summary: Dict[str, Any]) -> List[str]:
258
+ """Generate billing optimization recommendations based on usage patterns"""
259
+ recommendations = []
260
+
261
+ if summary["total_cost"] > 100.0:
262
+ recommendations.append("High usage detected - consider reserved instances for cost savings")
263
+
264
+ # Analyze provider distribution
265
+ providers = summary.get("by_provider", {})
266
+ if len(providers) > 1:
267
+ costs = [(p, data["cost"]) for p, data in providers.items()]
268
+ costs.sort(key=lambda x: x[1])
269
+ if len(costs) > 1 and costs[-1][1] > costs[0][1] * 2:
270
+ recommendations.append(f"Consider migrating from {costs[-1][0]} to {costs[0][0]} for potential savings")
271
+
272
+ # Analyze GPU usage
273
+ gpu_types = summary.get("by_gpu_type", {})
274
+ if "h100" in gpu_types and gpu_types["h100"]["gpu_hours"] < 10:
275
+ recommendations.append("Low H100 usage - consider A100 for similar performance at lower cost")
276
+
277
+ return recommendations
278
+
279
+
280
+ def _get_provider_description(provider: str) -> str:
281
+ """Get description for deployment provider"""
282
+ descriptions = {
283
+ "modal": "Serverless GPU platform with automatic scaling and pay-per-use billing",
284
+ "triton_local": "Local deployment using your own hardware with electricity costs",
285
+ "runpod": "Cloud GPU rental with competitive pricing and flexible instances",
286
+ "lambda_labs": "Professional GPU cloud with reliable infrastructure and support",
287
+ "coreweave": "High-performance GPU infrastructure optimized for AI workloads"
288
+ }
289
+ return descriptions.get(provider, "Unknown provider")
290
+
291
+
292
+ def _get_provider_recommendations(provider: str) -> List[str]:
293
+ """Get recommendations for when to use each provider"""
294
+ recommendations = {
295
+ "modal": ["Development and testing", "Variable workloads", "Automatic scaling needs"],
296
+ "triton_local": ["Long-term deployments", "Data privacy requirements", "Cost optimization"],
297
+ "runpod": ["Budget-conscious deployments", "Flexible scaling", "Spot instance savings"],
298
+ "lambda_labs": ["Production workloads", "Reliable performance", "Enterprise support"],
299
+ "coreweave": ["High-performance requirements", "Large-scale deployments", "Bare metal access"]
300
+ }
301
+ return recommendations.get(provider, [])
302
+
303
+
304
+ def _check_provider_availability(provider: str) -> str:
305
+ """Check if provider is currently available"""
306
+ # This would implement actual availability checking
307
+ # For now, return static status
308
+ availability = {
309
+ "modal": "Available",
310
+ "triton_local": "Available (requires local setup)",
311
+ "runpod": "Available",
312
+ "lambda_labs": "Available",
313
+ "coreweave": "Available (requires signup)"
314
+ }
315
+ return availability.get(provider, "Unknown")