isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +1166 -584
- isa_model/core/cache/redis_cache.py +410 -0
- isa_model/core/config/config_manager.py +282 -12
- isa_model/core/config.py +91 -1
- isa_model/core/database/__init__.py +1 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +297 -0
- isa_model/core/database/supabase_client.py +258 -0
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +46 -0
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_billing_tracker.py +60 -88
- isa_model/core/models/model_manager.py +66 -25
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +217 -55
- isa_model/core/models/model_statistics_tracker.py +234 -0
- isa_model/core/models/model_storage.py +0 -1
- isa_model/core/models/model_version_manager.py +959 -0
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/pricing_manager.py +2 -249
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/resilience/circuit_breaker.py +366 -0
- isa_model/core/security/secrets.py +358 -0
- isa_model/core/services/__init__.py +2 -4
- isa_model/core/services/intelligent_model_selector.py +479 -370
- isa_model/core/storage/hf_storage.py +2 -2
- isa_model/core/types.py +8 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -368
- isa_model/deployment/local/__init__.py +31 -0
- isa_model/deployment/local/config.py +248 -0
- isa_model/deployment/local/gpu_gateway.py +607 -0
- isa_model/deployment/local/health_checker.py +428 -0
- isa_model/deployment/local/provider.py +586 -0
- isa_model/deployment/local/tensorrt_service.py +621 -0
- isa_model/deployment/local/transformers_service.py +644 -0
- isa_model/deployment/local/vllm_service.py +527 -0
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/modal/deployer.py +894 -0
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
- isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
- isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
- isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
- isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +179 -16
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/__init__.py +21 -0
- isa_model/inference/services/audio/base_realtime_service.py +225 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/isa_tts_service.py +0 -0
- isa_model/inference/services/audio/openai_realtime_service.py +320 -124
- isa_model/inference/services/audio/openai_stt_service.py +53 -11
- isa_model/inference/services/base_service.py +17 -1
- isa_model/inference/services/custom_model_manager.py +277 -0
- isa_model/inference/services/embedding/__init__.py +13 -0
- isa_model/inference/services/embedding/base_embed_service.py +111 -8
- isa_model/inference/services/embedding/isa_embed_service.py +305 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/openai_embed_service.py +2 -4
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
- isa_model/inference/services/img/__init__.py +2 -2
- isa_model/inference/services/img/base_image_gen_service.py +24 -7
- isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
- isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
- isa_model/inference/services/img/services/replicate_flux.py +226 -0
- isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
- isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
- isa_model/inference/services/img/tests/test_img_client.py +297 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +361 -26
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/local_llm_service.py +747 -0
- isa_model/inference/services/llm/ollama_llm_service.py +11 -3
- isa_model/inference/services/llm/openai_llm_service.py +670 -56
- isa_model/inference/services/llm/yyds_llm_service.py +10 -3
- isa_model/inference/services/vision/__init__.py +27 -6
- isa_model/inference/services/vision/base_vision_service.py +118 -185
- isa_model/inference/services/vision/blip_vision_service.py +359 -0
- isa_model/inference/services/vision/helpers/image_utils.py +19 -10
- isa_model/inference/services/vision/isa_vision_service.py +634 -0
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +240 -18
- isa_model/serving/api/middleware/auth.py +317 -0
- isa_model/serving/api/middleware/security.py +268 -0
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +489 -0
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +475 -0
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/logs.py +430 -0
- isa_model/serving/api/routes/settings.py +582 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +992 -171
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +318 -0
- isa_model/serving/modal_proxy_server.py +249 -0
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
- isa_model-0.4.3.dist-info/RECORD +193 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
- isa_model/deployment/cloud/modal/register_models.py +0 -321
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks.py +0 -469
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -18
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/factory.py +0 -531
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/metrics.py +0 -798
- isa_model/inference/adapter/unified_api.py +0 -248
- isa_model/inference/services/helpers/stacked_config.py +0 -148
- isa_model/inference/services/img/flux_professional_service.py +0 -603
- isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/others/table_transformer_service.py +0 -61
- isa_model/inference/services/vision/doc_analysis_service.py +0 -640
- isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/vision/ui_analysis_service.py +0 -823
- isa_model/scripts/inference_tracker.py +0 -283
- isa_model/scripts/mlflow_manager.py +0 -379
- isa_model/scripts/model_registry.py +0 -465
- isa_model/scripts/register_models.py +0 -370
- isa_model/scripts/register_models_with_embeddings.py +0 -510
- isa_model/scripts/start_mlflow.py +0 -95
- isa_model/scripts/training_tracker.py +0 -257
- isa_model/training/__init__.py +0 -74
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -23
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/factory.py +0 -424
- isa_model-0.3.91.dist-info/RECORD +0 -138
- /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,315 @@
|
|
1
|
+
"""
|
2
|
+
Deployment Billing API Routes
|
3
|
+
|
4
|
+
API endpoints for deployment cost estimation, tracking, and billing information.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from fastapi import APIRouter, HTTPException, Query, Depends
|
8
|
+
from typing import Dict, Any, Optional, List
|
9
|
+
from datetime import datetime, timedelta
|
10
|
+
import logging
|
11
|
+
from pydantic import BaseModel
|
12
|
+
|
13
|
+
from ..auth import optional_auth
|
14
|
+
|
15
|
+
logger = logging.getLogger(__name__)
|
16
|
+
router = APIRouter(prefix="/deployment", tags=["deployment-billing"])
|
17
|
+
|
18
|
+
|
19
|
+
class CostEstimationRequest(BaseModel):
|
20
|
+
"""Request model for deployment cost estimation"""
|
21
|
+
provider: str
|
22
|
+
gpu_type: str
|
23
|
+
gpu_count: int = 1
|
24
|
+
estimated_hours: float = 1.0
|
25
|
+
operation_type: str = "deployment"
|
26
|
+
|
27
|
+
|
28
|
+
class DeploymentBillingQuery(BaseModel):
|
29
|
+
"""Query parameters for deployment billing"""
|
30
|
+
start_date: Optional[str] = None
|
31
|
+
end_date: Optional[str] = None
|
32
|
+
provider: Optional[str] = None
|
33
|
+
gpu_type: Optional[str] = None
|
34
|
+
model_id: Optional[str] = None
|
35
|
+
|
36
|
+
|
37
|
+
@router.post("/estimate-cost")
|
38
|
+
async def estimate_deployment_cost(
|
39
|
+
request: CostEstimationRequest,
|
40
|
+
user = Depends(optional_auth)
|
41
|
+
):
|
42
|
+
"""
|
43
|
+
Estimate deployment costs before starting deployment
|
44
|
+
|
45
|
+
Returns cost breakdown for specified provider, GPU type, and duration.
|
46
|
+
"""
|
47
|
+
try:
|
48
|
+
from isa_model.core.models.deployment_billing_tracker import get_deployment_billing_tracker
|
49
|
+
|
50
|
+
billing_tracker = get_deployment_billing_tracker()
|
51
|
+
cost_estimate = billing_tracker.estimate_deployment_cost(
|
52
|
+
provider=request.provider,
|
53
|
+
gpu_type=request.gpu_type,
|
54
|
+
gpu_count=request.gpu_count,
|
55
|
+
estimated_hours=request.estimated_hours,
|
56
|
+
operation_type=request.operation_type
|
57
|
+
)
|
58
|
+
|
59
|
+
# Add additional cost breakdown details
|
60
|
+
hourly_rate = cost_estimate["compute_cost"] / request.estimated_hours if request.estimated_hours > 0 else 0
|
61
|
+
|
62
|
+
return {
|
63
|
+
"success": True,
|
64
|
+
"estimation": {
|
65
|
+
"provider": request.provider,
|
66
|
+
"gpu_type": request.gpu_type,
|
67
|
+
"gpu_count": request.gpu_count,
|
68
|
+
"estimated_hours": request.estimated_hours,
|
69
|
+
"cost_breakdown": cost_estimate,
|
70
|
+
"hourly_rate": round(hourly_rate, 6),
|
71
|
+
"recommendations": _get_cost_recommendations(request.provider, request.gpu_type, cost_estimate)
|
72
|
+
}
|
73
|
+
}
|
74
|
+
|
75
|
+
except Exception as e:
|
76
|
+
logger.error(f"Failed to estimate deployment cost: {e}")
|
77
|
+
raise HTTPException(status_code=500, detail=f"Cost estimation failed: {str(e)}")
|
78
|
+
|
79
|
+
|
80
|
+
@router.get("/billing/summary")
|
81
|
+
async def get_deployment_billing_summary(
|
82
|
+
start_date: Optional[str] = Query(None, description="Start date (ISO format)"),
|
83
|
+
end_date: Optional[str] = Query(None, description="End date (ISO format)"),
|
84
|
+
provider: Optional[str] = Query(None, description="Filter by provider"),
|
85
|
+
gpu_type: Optional[str] = Query(None, description="Filter by GPU type"),
|
86
|
+
model_id: Optional[str] = Query(None, description="Filter by model ID"),
|
87
|
+
user = Depends(optional_auth)
|
88
|
+
):
|
89
|
+
"""
|
90
|
+
Get deployment billing summary with optional filters
|
91
|
+
|
92
|
+
Returns comprehensive billing information for deployments within specified period.
|
93
|
+
"""
|
94
|
+
try:
|
95
|
+
from isa_model.core.models.deployment_billing_tracker import get_deployment_billing_tracker
|
96
|
+
|
97
|
+
# Parse dates
|
98
|
+
start_dt = None
|
99
|
+
end_dt = None
|
100
|
+
if start_date:
|
101
|
+
start_dt = datetime.fromisoformat(start_date.replace('Z', '+00:00'))
|
102
|
+
if end_date:
|
103
|
+
end_dt = datetime.fromisoformat(end_date.replace('Z', '+00:00'))
|
104
|
+
|
105
|
+
billing_tracker = get_deployment_billing_tracker()
|
106
|
+
|
107
|
+
# Get deployment summary
|
108
|
+
deployment_summary = billing_tracker.get_deployment_summary(
|
109
|
+
start_date=start_dt,
|
110
|
+
end_date=end_dt,
|
111
|
+
provider=provider,
|
112
|
+
gpu_type=gpu_type
|
113
|
+
)
|
114
|
+
|
115
|
+
# If model_id filter is specified, get model-specific data
|
116
|
+
model_summary = None
|
117
|
+
if model_id:
|
118
|
+
model_summary = billing_tracker.get_model_usage_summary(model_id)
|
119
|
+
|
120
|
+
return {
|
121
|
+
"success": True,
|
122
|
+
"filters": {
|
123
|
+
"start_date": start_date,
|
124
|
+
"end_date": end_date,
|
125
|
+
"provider": provider,
|
126
|
+
"gpu_type": gpu_type,
|
127
|
+
"model_id": model_id
|
128
|
+
},
|
129
|
+
"deployment_summary": deployment_summary,
|
130
|
+
"model_summary": model_summary,
|
131
|
+
"recommendations": _get_billing_recommendations(deployment_summary)
|
132
|
+
}
|
133
|
+
|
134
|
+
except Exception as e:
|
135
|
+
logger.error(f"Failed to get deployment billing summary: {e}")
|
136
|
+
raise HTTPException(status_code=500, detail=f"Failed to get billing summary: {str(e)}")
|
137
|
+
|
138
|
+
|
139
|
+
@router.get("/pricing")
|
140
|
+
async def get_deployment_pricing(
|
141
|
+
user = Depends(optional_auth)
|
142
|
+
):
|
143
|
+
"""
|
144
|
+
Get current deployment pricing for all providers and GPU types
|
145
|
+
|
146
|
+
Returns up-to-date pricing information for cost planning.
|
147
|
+
"""
|
148
|
+
try:
|
149
|
+
from isa_model.core.models.deployment_billing_tracker import get_deployment_billing_tracker
|
150
|
+
|
151
|
+
billing_tracker = get_deployment_billing_tracker()
|
152
|
+
pricing_data = billing_tracker.pricing_data
|
153
|
+
|
154
|
+
# Add provider descriptions and recommendations
|
155
|
+
enhanced_pricing = {}
|
156
|
+
for provider, pricing in pricing_data.items():
|
157
|
+
enhanced_pricing[provider] = {
|
158
|
+
"pricing": pricing,
|
159
|
+
"description": _get_provider_description(provider),
|
160
|
+
"best_for": _get_provider_recommendations(provider),
|
161
|
+
"availability": _check_provider_availability(provider)
|
162
|
+
}
|
163
|
+
|
164
|
+
return {
|
165
|
+
"success": True,
|
166
|
+
"pricing": enhanced_pricing,
|
167
|
+
"currency": "USD",
|
168
|
+
"unit": "per hour",
|
169
|
+
"last_updated": datetime.now().isoformat()
|
170
|
+
}
|
171
|
+
|
172
|
+
except Exception as e:
|
173
|
+
logger.error(f"Failed to get deployment pricing: {e}")
|
174
|
+
raise HTTPException(status_code=500, detail=f"Failed to get pricing: {str(e)}")
|
175
|
+
|
176
|
+
|
177
|
+
@router.get("/providers/compare")
|
178
|
+
async def compare_providers(
|
179
|
+
gpu_type: str = Query(..., description="GPU type to compare"),
|
180
|
+
hours: float = Query(1.0, description="Number of hours for comparison"),
|
181
|
+
user = Depends(optional_auth)
|
182
|
+
):
|
183
|
+
"""
|
184
|
+
Compare costs across different providers for the same GPU type
|
185
|
+
|
186
|
+
Helps users choose the most cost-effective deployment option.
|
187
|
+
"""
|
188
|
+
try:
|
189
|
+
from isa_model.core.models.deployment_billing_tracker import get_deployment_billing_tracker
|
190
|
+
|
191
|
+
billing_tracker = get_deployment_billing_tracker()
|
192
|
+
comparisons = []
|
193
|
+
|
194
|
+
providers = ["modal", "runpod", "lambda_labs", "coreweave"]
|
195
|
+
|
196
|
+
for provider in providers:
|
197
|
+
try:
|
198
|
+
cost_estimate = billing_tracker.estimate_deployment_cost(
|
199
|
+
provider=provider,
|
200
|
+
gpu_type=gpu_type,
|
201
|
+
gpu_count=1,
|
202
|
+
estimated_hours=hours
|
203
|
+
)
|
204
|
+
|
205
|
+
comparisons.append({
|
206
|
+
"provider": provider,
|
207
|
+
"total_cost": cost_estimate["total_cost"],
|
208
|
+
"hourly_rate": cost_estimate["compute_cost"] / hours if hours > 0 else 0,
|
209
|
+
"breakdown": cost_estimate,
|
210
|
+
"description": _get_provider_description(provider),
|
211
|
+
"availability": _check_provider_availability(provider)
|
212
|
+
})
|
213
|
+
except Exception as e:
|
214
|
+
logger.warning(f"Could not get pricing for {provider}: {e}")
|
215
|
+
|
216
|
+
# Sort by total cost
|
217
|
+
comparisons.sort(key=lambda x: x["total_cost"])
|
218
|
+
|
219
|
+
return {
|
220
|
+
"success": True,
|
221
|
+
"comparison": {
|
222
|
+
"gpu_type": gpu_type,
|
223
|
+
"duration_hours": hours,
|
224
|
+
"providers": comparisons,
|
225
|
+
"cheapest": comparisons[0] if comparisons else None,
|
226
|
+
"savings": {
|
227
|
+
"max_savings": comparisons[-1]["total_cost"] - comparisons[0]["total_cost"] if len(comparisons) > 1 else 0,
|
228
|
+
"percentage": ((comparisons[-1]["total_cost"] - comparisons[0]["total_cost"]) / comparisons[-1]["total_cost"] * 100) if len(comparisons) > 1 and comparisons[-1]["total_cost"] > 0 else 0
|
229
|
+
}
|
230
|
+
}
|
231
|
+
}
|
232
|
+
|
233
|
+
except Exception as e:
|
234
|
+
logger.error(f"Failed to compare providers: {e}")
|
235
|
+
raise HTTPException(status_code=500, detail=f"Provider comparison failed: {str(e)}")
|
236
|
+
|
237
|
+
|
238
|
+
def _get_cost_recommendations(provider: str, gpu_type: str, cost_estimate: Dict[str, float]) -> List[str]:
|
239
|
+
"""Generate cost optimization recommendations"""
|
240
|
+
recommendations = []
|
241
|
+
|
242
|
+
if cost_estimate["total_cost"] > 10.0:
|
243
|
+
recommendations.append("Consider using spot instances if available for significant savings")
|
244
|
+
|
245
|
+
if gpu_type in ["h100", "a100_80gb"]:
|
246
|
+
recommendations.append("High-end GPU selected - ensure workload requires this performance")
|
247
|
+
|
248
|
+
if provider == "modal":
|
249
|
+
recommendations.append("Modal offers automatic scaling - costs only incurred during active use")
|
250
|
+
|
251
|
+
if provider in ["runpod", "lambda_labs"]:
|
252
|
+
recommendations.append("Consider longer-term contracts for better rates on extended deployments")
|
253
|
+
|
254
|
+
return recommendations
|
255
|
+
|
256
|
+
|
257
|
+
def _get_billing_recommendations(summary: Dict[str, Any]) -> List[str]:
|
258
|
+
"""Generate billing optimization recommendations based on usage patterns"""
|
259
|
+
recommendations = []
|
260
|
+
|
261
|
+
if summary["total_cost"] > 100.0:
|
262
|
+
recommendations.append("High usage detected - consider reserved instances for cost savings")
|
263
|
+
|
264
|
+
# Analyze provider distribution
|
265
|
+
providers = summary.get("by_provider", {})
|
266
|
+
if len(providers) > 1:
|
267
|
+
costs = [(p, data["cost"]) for p, data in providers.items()]
|
268
|
+
costs.sort(key=lambda x: x[1])
|
269
|
+
if len(costs) > 1 and costs[-1][1] > costs[0][1] * 2:
|
270
|
+
recommendations.append(f"Consider migrating from {costs[-1][0]} to {costs[0][0]} for potential savings")
|
271
|
+
|
272
|
+
# Analyze GPU usage
|
273
|
+
gpu_types = summary.get("by_gpu_type", {})
|
274
|
+
if "h100" in gpu_types and gpu_types["h100"]["gpu_hours"] < 10:
|
275
|
+
recommendations.append("Low H100 usage - consider A100 for similar performance at lower cost")
|
276
|
+
|
277
|
+
return recommendations
|
278
|
+
|
279
|
+
|
280
|
+
def _get_provider_description(provider: str) -> str:
|
281
|
+
"""Get description for deployment provider"""
|
282
|
+
descriptions = {
|
283
|
+
"modal": "Serverless GPU platform with automatic scaling and pay-per-use billing",
|
284
|
+
"triton_local": "Local deployment using your own hardware with electricity costs",
|
285
|
+
"runpod": "Cloud GPU rental with competitive pricing and flexible instances",
|
286
|
+
"lambda_labs": "Professional GPU cloud with reliable infrastructure and support",
|
287
|
+
"coreweave": "High-performance GPU infrastructure optimized for AI workloads"
|
288
|
+
}
|
289
|
+
return descriptions.get(provider, "Unknown provider")
|
290
|
+
|
291
|
+
|
292
|
+
def _get_provider_recommendations(provider: str) -> List[str]:
|
293
|
+
"""Get recommendations for when to use each provider"""
|
294
|
+
recommendations = {
|
295
|
+
"modal": ["Development and testing", "Variable workloads", "Automatic scaling needs"],
|
296
|
+
"triton_local": ["Long-term deployments", "Data privacy requirements", "Cost optimization"],
|
297
|
+
"runpod": ["Budget-conscious deployments", "Flexible scaling", "Spot instance savings"],
|
298
|
+
"lambda_labs": ["Production workloads", "Reliable performance", "Enterprise support"],
|
299
|
+
"coreweave": ["High-performance requirements", "Large-scale deployments", "Bare metal access"]
|
300
|
+
}
|
301
|
+
return recommendations.get(provider, [])
|
302
|
+
|
303
|
+
|
304
|
+
def _check_provider_availability(provider: str) -> str:
|
305
|
+
"""Check if provider is currently available"""
|
306
|
+
# This would implement actual availability checking
|
307
|
+
# For now, return static status
|
308
|
+
availability = {
|
309
|
+
"modal": "Available",
|
310
|
+
"triton_local": "Available (requires local setup)",
|
311
|
+
"runpod": "Available",
|
312
|
+
"lambda_labs": "Available",
|
313
|
+
"coreweave": "Available (requires signup)"
|
314
|
+
}
|
315
|
+
return availability.get(provider, "Unknown")
|