isa-model 0.4.0__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +466 -43
- isa_model/core/cache/redis_cache.py +12 -3
- isa_model/core/config/config_manager.py +230 -3
- isa_model/core/config.py +90 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +21 -1
- isa_model/core/database/supabase_client.py +154 -19
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +27 -18
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_manager.py +40 -17
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +174 -18
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/services/intelligent_model_selector.py +399 -21
- isa_model/core/storage/hf_storage.py +1 -1
- isa_model/core/types.py +1 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -370
- isa_model/deployment/local/__init__.py +31 -0
- isa_model/deployment/local/config.py +248 -0
- isa_model/deployment/local/gpu_gateway.py +607 -0
- isa_model/deployment/local/health_checker.py +428 -0
- isa_model/deployment/local/provider.py +586 -0
- isa_model/deployment/local/tensorrt_service.py +621 -0
- isa_model/deployment/local/transformers_service.py +644 -0
- isa_model/deployment/local/vllm_service.py +527 -0
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +137 -10
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/openai_stt_service.py +22 -6
- isa_model/inference/services/custom_model_manager.py +277 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +335 -24
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/local_llm_service.py +747 -0
- isa_model/inference/services/llm/ollama_llm_service.py +9 -2
- isa_model/inference/services/llm/openai_llm_service.py +33 -16
- isa_model/inference/services/llm/yyds_llm_service.py +8 -2
- isa_model/inference/services/vision/__init__.py +22 -1
- isa_model/inference/services/vision/blip_vision_service.py +359 -0
- isa_model/inference/services/vision/helpers/image_utils.py +8 -5
- isa_model/inference/services/vision/isa_vision_service.py +65 -4
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +172 -22
- isa_model/serving/api/middleware/auth.py +8 -2
- isa_model/serving/api/middleware/security.py +23 -33
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +4 -1
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +138 -2
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +680 -18
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +68 -54
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/METADATA +66 -24
- isa_model-0.4.3.dist-info/RECORD +193 -0
- isa_model/core/storage/minio_storage.py +0 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks/__init__.py +0 -27
- isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
- isa_model/eval/benchmarks.py +0 -701
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -24
- isa_model/eval/evaluators/audio_evaluator.py +0 -727
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/embedding_evaluator.py +0 -742
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/evaluators/vision_evaluator.py +0 -564
- isa_model/eval/example_evaluation.py +0 -395
- isa_model/eval/factory.py +0 -798
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/isa_benchmarks.py +0 -700
- isa_model/eval/isa_integration.py +0 -582
- isa_model/eval/metrics.py +0 -951
- isa_model/eval/tests/unit/test_basic.py +0 -396
- isa_model/serving/api/routes/evaluations.py +0 -579
- isa_model/training/__init__.py +0 -168
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -26
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/examples/intelligent_training_example.py +0 -281
- isa_model/training/factory.py +0 -424
- isa_model/training/intelligent/__init__.py +0 -25
- isa_model/training/intelligent/decision_engine.py +0 -643
- isa_model/training/intelligent/intelligent_factory.py +0 -888
- isa_model/training/intelligent/knowledge_base.py +0 -751
- isa_model/training/intelligent/resource_optimizer.py +0 -839
- isa_model/training/intelligent/task_classifier.py +0 -576
- isa_model/training/storage/__init__.py +0 -24
- isa_model/training/storage/core_integration.py +0 -439
- isa_model/training/storage/training_repository.py +0 -552
- isa_model/training/storage/training_storage.py +0 -628
- isa_model-0.4.0.dist-info/RECORD +0 -182
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,430 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
"""
|
5
|
+
Deployment Billing Tracker - Specialized billing for deployment and training operations
|
6
|
+
|
7
|
+
Extends the core ModelBillingTracker with deployment-specific metrics:
|
8
|
+
- GPU runtime hours
|
9
|
+
- Instance type costs
|
10
|
+
- Training epochs/steps billing
|
11
|
+
- Deployment lifecycle costs
|
12
|
+
"""
|
13
|
+
|
14
|
+
from typing import Dict, List, Optional, Any, Union
|
15
|
+
from datetime import datetime, timezone, timedelta
|
16
|
+
from dataclasses import dataclass, asdict
|
17
|
+
import json
|
18
|
+
import logging
|
19
|
+
from enum import Enum
|
20
|
+
from .model_billing_tracker import ModelBillingTracker, ModelUsageRecord, ModelOperationType
|
21
|
+
|
22
|
+
logger = logging.getLogger(__name__)
|
23
|
+
|
24
|
+
class DeploymentProvider(Enum):
|
25
|
+
"""Deployment providers"""
|
26
|
+
MODAL = "modal"
|
27
|
+
TRITON_LOCAL = "triton_local"
|
28
|
+
TRITON_CLOUD = "triton_cloud"
|
29
|
+
RUNPOD = "runpod"
|
30
|
+
LAMBDA_LABS = "lambda_labs"
|
31
|
+
COREWEAVE = "coreweave"
|
32
|
+
|
33
|
+
class GPUType(Enum):
|
34
|
+
"""GPU types for cost calculation"""
|
35
|
+
RTX_4090 = "rtx_4090"
|
36
|
+
RTX_A6000 = "rtx_a6000"
|
37
|
+
A100_40GB = "a100_40gb"
|
38
|
+
A100_80GB = "a100_80gb"
|
39
|
+
H100 = "h100"
|
40
|
+
T4 = "t4"
|
41
|
+
V100 = "v100"
|
42
|
+
|
43
|
+
@dataclass
|
44
|
+
class DeploymentUsageRecord(ModelUsageRecord):
|
45
|
+
"""Extended usage record for deployment operations"""
|
46
|
+
# GPU/Infrastructure metrics
|
47
|
+
gpu_type: Optional[str] = None
|
48
|
+
gpu_count: Optional[int] = None
|
49
|
+
runtime_hours: Optional[float] = None
|
50
|
+
cpu_cores: Optional[int] = None
|
51
|
+
memory_gb: Optional[int] = None
|
52
|
+
|
53
|
+
# Training-specific metrics
|
54
|
+
training_epochs: Optional[int] = None
|
55
|
+
training_steps: Optional[int] = None
|
56
|
+
dataset_size: Optional[int] = None
|
57
|
+
|
58
|
+
# Deployment-specific metrics
|
59
|
+
deployment_duration_hours: Optional[float] = None
|
60
|
+
requests_served: Optional[int] = None
|
61
|
+
avg_latency_ms: Optional[float] = None
|
62
|
+
|
63
|
+
# Infrastructure costs
|
64
|
+
compute_cost_usd: Optional[float] = None
|
65
|
+
storage_cost_usd: Optional[float] = None
|
66
|
+
network_cost_usd: Optional[float] = None
|
67
|
+
|
68
|
+
class DeploymentBillingTracker(ModelBillingTracker):
|
69
|
+
"""
|
70
|
+
Specialized billing tracker for deployment and training operations
|
71
|
+
|
72
|
+
Extends ModelBillingTracker with deployment-specific cost calculations
|
73
|
+
and metrics tracking for GPU-based operations.
|
74
|
+
"""
|
75
|
+
|
76
|
+
def __init__(self, model_registry=None, storage_path: Optional[str] = None):
|
77
|
+
super().__init__(model_registry, storage_path)
|
78
|
+
|
79
|
+
# Load pricing data for deployment providers
|
80
|
+
self.pricing_data = self._load_deployment_pricing()
|
81
|
+
|
82
|
+
def _load_deployment_pricing(self) -> Dict[str, Dict[str, float]]:
|
83
|
+
"""Load pricing data for different deployment providers and GPU types"""
|
84
|
+
return {
|
85
|
+
"modal": {
|
86
|
+
"t4": 0.50, # $/hour
|
87
|
+
"rtx_4090": 0.80,
|
88
|
+
"a100_40gb": 2.50,
|
89
|
+
"a100_80gb": 4.00,
|
90
|
+
"h100": 8.00,
|
91
|
+
"base_compute": 0.10 # $/hour base compute
|
92
|
+
},
|
93
|
+
"triton_local": {
|
94
|
+
"electricity": 0.12, # $/kWh
|
95
|
+
"gpu_tdp": {
|
96
|
+
"rtx_4090": 450, # Watts
|
97
|
+
"a100_40gb": 400,
|
98
|
+
"a100_80gb": 400,
|
99
|
+
"h100": 700
|
100
|
+
}
|
101
|
+
},
|
102
|
+
"runpod": {
|
103
|
+
"rtx_4090": 0.44,
|
104
|
+
"rtx_a6000": 0.79,
|
105
|
+
"a100_40gb": 1.69,
|
106
|
+
"a100_80gb": 2.89,
|
107
|
+
"h100": 4.89
|
108
|
+
},
|
109
|
+
"lambda_labs": {
|
110
|
+
"rtx_4090": 0.50,
|
111
|
+
"a100_40gb": 1.50,
|
112
|
+
"a100_80gb": 2.50,
|
113
|
+
"h100": 4.50
|
114
|
+
},
|
115
|
+
"coreweave": {
|
116
|
+
"rtx_4090": 0.57,
|
117
|
+
"a100_40gb": 2.06,
|
118
|
+
"a100_80gb": 2.23,
|
119
|
+
"h100": 4.76
|
120
|
+
}
|
121
|
+
}
|
122
|
+
|
123
|
+
def track_deployment_usage(
|
124
|
+
self,
|
125
|
+
model_id: str,
|
126
|
+
provider: Union[str, DeploymentProvider],
|
127
|
+
operation_type: Union[str, ModelOperationType],
|
128
|
+
service_type: str,
|
129
|
+
operation: str,
|
130
|
+
|
131
|
+
# GPU/Infrastructure metrics
|
132
|
+
gpu_type: Optional[Union[str, GPUType]] = None,
|
133
|
+
gpu_count: Optional[int] = None,
|
134
|
+
runtime_hours: Optional[float] = None,
|
135
|
+
cpu_cores: Optional[int] = None,
|
136
|
+
memory_gb: Optional[int] = None,
|
137
|
+
|
138
|
+
# Training-specific
|
139
|
+
training_epochs: Optional[int] = None,
|
140
|
+
training_steps: Optional[int] = None,
|
141
|
+
dataset_size: Optional[int] = None,
|
142
|
+
|
143
|
+
# Deployment-specific
|
144
|
+
deployment_duration_hours: Optional[float] = None,
|
145
|
+
requests_served: Optional[int] = None,
|
146
|
+
avg_latency_ms: Optional[float] = None,
|
147
|
+
|
148
|
+
# Standard billing
|
149
|
+
input_tokens: Optional[int] = None,
|
150
|
+
output_tokens: Optional[int] = None,
|
151
|
+
cost_usd: Optional[float] = None,
|
152
|
+
metadata: Optional[Dict[str, Any]] = None
|
153
|
+
) -> DeploymentUsageRecord:
|
154
|
+
"""
|
155
|
+
Track deployment/training usage with specialized metrics
|
156
|
+
|
157
|
+
Args:
|
158
|
+
model_id: Model identifier
|
159
|
+
provider: Deployment provider
|
160
|
+
operation_type: Type of operation (training, deployment, inference)
|
161
|
+
service_type: Service type (llm, vision, etc.)
|
162
|
+
operation: Specific operation
|
163
|
+
gpu_type: Type of GPU used
|
164
|
+
gpu_count: Number of GPUs
|
165
|
+
runtime_hours: Hours of runtime
|
166
|
+
training_epochs: Number of training epochs
|
167
|
+
deployment_duration_hours: Hours deployment was active
|
168
|
+
... (other parameters as documented)
|
169
|
+
|
170
|
+
Returns:
|
171
|
+
DeploymentUsageRecord with calculated costs
|
172
|
+
"""
|
173
|
+
# Convert enums to strings
|
174
|
+
if isinstance(provider, DeploymentProvider):
|
175
|
+
provider = provider.value
|
176
|
+
if isinstance(operation_type, ModelOperationType):
|
177
|
+
operation_type = operation_type.value
|
178
|
+
if isinstance(gpu_type, GPUType):
|
179
|
+
gpu_type = gpu_type.value
|
180
|
+
|
181
|
+
# Calculate deployment-specific costs
|
182
|
+
if cost_usd is None:
|
183
|
+
cost_breakdown = self._calculate_deployment_cost(
|
184
|
+
provider, gpu_type, gpu_count, runtime_hours,
|
185
|
+
deployment_duration_hours, training_epochs, training_steps
|
186
|
+
)
|
187
|
+
cost_usd = cost_breakdown["total_cost"]
|
188
|
+
compute_cost = cost_breakdown["compute_cost"]
|
189
|
+
storage_cost = cost_breakdown["storage_cost"]
|
190
|
+
network_cost = cost_breakdown["network_cost"]
|
191
|
+
else:
|
192
|
+
compute_cost = cost_usd # If provided, assume it's compute cost
|
193
|
+
storage_cost = 0.0
|
194
|
+
network_cost = 0.0
|
195
|
+
|
196
|
+
# Create deployment usage record
|
197
|
+
record = DeploymentUsageRecord(
|
198
|
+
timestamp=datetime.now(timezone.utc).isoformat(),
|
199
|
+
model_id=model_id,
|
200
|
+
operation_type=operation_type,
|
201
|
+
provider=provider,
|
202
|
+
service_type=service_type,
|
203
|
+
operation=operation,
|
204
|
+
input_tokens=input_tokens,
|
205
|
+
output_tokens=output_tokens,
|
206
|
+
total_tokens=(input_tokens or 0) + (output_tokens or 0) if input_tokens or output_tokens else None,
|
207
|
+
cost_usd=cost_usd,
|
208
|
+
metadata=metadata or {},
|
209
|
+
|
210
|
+
# Deployment-specific fields
|
211
|
+
gpu_type=gpu_type,
|
212
|
+
gpu_count=gpu_count,
|
213
|
+
runtime_hours=runtime_hours,
|
214
|
+
cpu_cores=cpu_cores,
|
215
|
+
memory_gb=memory_gb,
|
216
|
+
training_epochs=training_epochs,
|
217
|
+
training_steps=training_steps,
|
218
|
+
dataset_size=dataset_size,
|
219
|
+
deployment_duration_hours=deployment_duration_hours,
|
220
|
+
requests_served=requests_served,
|
221
|
+
avg_latency_ms=avg_latency_ms,
|
222
|
+
compute_cost_usd=compute_cost,
|
223
|
+
storage_cost_usd=storage_cost,
|
224
|
+
network_cost_usd=network_cost
|
225
|
+
)
|
226
|
+
|
227
|
+
# Add to records and save
|
228
|
+
self.usage_records.append(record)
|
229
|
+
self._save_data()
|
230
|
+
|
231
|
+
logger.info(f"Tracked deployment usage: {model_id} - {provider} - {gpu_type} - ${cost_usd:.4f}")
|
232
|
+
return record
|
233
|
+
|
234
|
+
def _calculate_deployment_cost(
|
235
|
+
self,
|
236
|
+
provider: str,
|
237
|
+
gpu_type: Optional[str],
|
238
|
+
gpu_count: Optional[int],
|
239
|
+
runtime_hours: Optional[float],
|
240
|
+
deployment_duration_hours: Optional[float],
|
241
|
+
training_epochs: Optional[int],
|
242
|
+
training_steps: Optional[int]
|
243
|
+
) -> Dict[str, float]:
|
244
|
+
"""Calculate deployment costs based on provider and usage"""
|
245
|
+
|
246
|
+
gpu_count = gpu_count or 1
|
247
|
+
runtime_hours = runtime_hours or deployment_duration_hours or 1.0
|
248
|
+
|
249
|
+
compute_cost = 0.0
|
250
|
+
storage_cost = 0.0
|
251
|
+
network_cost = 0.0
|
252
|
+
|
253
|
+
try:
|
254
|
+
if provider in self.pricing_data:
|
255
|
+
pricing = self.pricing_data[provider]
|
256
|
+
|
257
|
+
if provider == "modal":
|
258
|
+
# Modal pricing: per-GPU hourly rate
|
259
|
+
if gpu_type and gpu_type in pricing:
|
260
|
+
compute_cost = pricing[gpu_type] * gpu_count * runtime_hours
|
261
|
+
else:
|
262
|
+
compute_cost = pricing.get("base_compute", 0.10) * runtime_hours
|
263
|
+
|
264
|
+
elif provider == "triton_local":
|
265
|
+
# Local deployment: electricity costs
|
266
|
+
if gpu_type and gpu_type in pricing["gpu_tdp"]:
|
267
|
+
power_watts = pricing["gpu_tdp"][gpu_type] * gpu_count
|
268
|
+
kwh_used = (power_watts / 1000) * runtime_hours
|
269
|
+
compute_cost = kwh_used * pricing["electricity"]
|
270
|
+
|
271
|
+
elif provider in ["runpod", "lambda_labs", "coreweave"]:
|
272
|
+
# Cloud GPU providers: per-GPU hourly rates
|
273
|
+
if gpu_type and gpu_type in pricing:
|
274
|
+
compute_cost = pricing[gpu_type] * gpu_count * runtime_hours
|
275
|
+
|
276
|
+
# Add storage costs (simplified)
|
277
|
+
storage_cost = runtime_hours * 0.01 # $0.01/hour for storage
|
278
|
+
|
279
|
+
# Add network costs for training (data transfer)
|
280
|
+
if training_epochs and training_epochs > 0:
|
281
|
+
network_cost = training_epochs * 0.05 # $0.05 per epoch for data
|
282
|
+
|
283
|
+
except Exception as e:
|
284
|
+
logger.error(f"Error calculating deployment cost: {e}")
|
285
|
+
compute_cost = 0.0
|
286
|
+
|
287
|
+
total_cost = compute_cost + storage_cost + network_cost
|
288
|
+
|
289
|
+
return {
|
290
|
+
"total_cost": round(total_cost, 6),
|
291
|
+
"compute_cost": round(compute_cost, 6),
|
292
|
+
"storage_cost": round(storage_cost, 6),
|
293
|
+
"network_cost": round(network_cost, 6)
|
294
|
+
}
|
295
|
+
|
296
|
+
def estimate_deployment_cost(
|
297
|
+
self,
|
298
|
+
provider: str,
|
299
|
+
gpu_type: str,
|
300
|
+
gpu_count: int = 1,
|
301
|
+
estimated_hours: float = 1.0,
|
302
|
+
operation_type: str = "deployment"
|
303
|
+
) -> Dict[str, float]:
|
304
|
+
"""
|
305
|
+
Estimate deployment costs before starting deployment
|
306
|
+
|
307
|
+
Args:
|
308
|
+
provider: Deployment provider
|
309
|
+
gpu_type: GPU type to use
|
310
|
+
gpu_count: Number of GPUs
|
311
|
+
estimated_hours: Estimated runtime hours
|
312
|
+
operation_type: Type of operation
|
313
|
+
|
314
|
+
Returns:
|
315
|
+
Cost breakdown dictionary
|
316
|
+
"""
|
317
|
+
return self._calculate_deployment_cost(
|
318
|
+
provider, gpu_type, gpu_count, estimated_hours,
|
319
|
+
estimated_hours, None, None
|
320
|
+
)
|
321
|
+
|
322
|
+
def get_deployment_summary(
|
323
|
+
self,
|
324
|
+
start_date: Optional[datetime] = None,
|
325
|
+
end_date: Optional[datetime] = None,
|
326
|
+
provider: Optional[str] = None,
|
327
|
+
gpu_type: Optional[str] = None
|
328
|
+
) -> Dict[str, Any]:
|
329
|
+
"""Get deployment cost summary with filters"""
|
330
|
+
|
331
|
+
# Filter records
|
332
|
+
filtered_records = []
|
333
|
+
for record in self.usage_records:
|
334
|
+
# Check if it's a deployment record
|
335
|
+
if not isinstance(record, DeploymentUsageRecord):
|
336
|
+
continue
|
337
|
+
|
338
|
+
# Apply filters
|
339
|
+
if start_date and datetime.fromisoformat(record.timestamp.replace('Z', '+00:00')) < start_date:
|
340
|
+
continue
|
341
|
+
if end_date and datetime.fromisoformat(record.timestamp.replace('Z', '+00:00')) > end_date:
|
342
|
+
continue
|
343
|
+
if provider and record.provider != provider:
|
344
|
+
continue
|
345
|
+
if gpu_type and record.gpu_type != gpu_type:
|
346
|
+
continue
|
347
|
+
|
348
|
+
filtered_records.append(record)
|
349
|
+
|
350
|
+
if not filtered_records:
|
351
|
+
return {
|
352
|
+
"total_cost": 0.0,
|
353
|
+
"total_gpu_hours": 0.0,
|
354
|
+
"deployments": 0,
|
355
|
+
"by_provider": {},
|
356
|
+
"by_gpu_type": {},
|
357
|
+
"by_operation": {}
|
358
|
+
}
|
359
|
+
|
360
|
+
# Calculate summary
|
361
|
+
total_cost = sum(record.cost_usd or 0 for record in filtered_records)
|
362
|
+
total_gpu_hours = sum((record.runtime_hours or 0) * (record.gpu_count or 1) for record in filtered_records)
|
363
|
+
total_deployments = len(filtered_records)
|
364
|
+
|
365
|
+
# Group by provider
|
366
|
+
by_provider = {}
|
367
|
+
for record in filtered_records:
|
368
|
+
if record.provider not in by_provider:
|
369
|
+
by_provider[record.provider] = {"cost": 0.0, "gpu_hours": 0.0, "count": 0}
|
370
|
+
by_provider[record.provider]["cost"] += record.cost_usd or 0
|
371
|
+
by_provider[record.provider]["gpu_hours"] += (record.runtime_hours or 0) * (record.gpu_count or 1)
|
372
|
+
by_provider[record.provider]["count"] += 1
|
373
|
+
|
374
|
+
# Group by GPU type
|
375
|
+
by_gpu_type = {}
|
376
|
+
for record in filtered_records:
|
377
|
+
gpu = record.gpu_type or "unknown"
|
378
|
+
if gpu not in by_gpu_type:
|
379
|
+
by_gpu_type[gpu] = {"cost": 0.0, "gpu_hours": 0.0, "count": 0}
|
380
|
+
by_gpu_type[gpu]["cost"] += record.cost_usd or 0
|
381
|
+
by_gpu_type[gpu]["gpu_hours"] += (record.runtime_hours or 0) * (record.gpu_count or 1)
|
382
|
+
by_gpu_type[gpu]["count"] += 1
|
383
|
+
|
384
|
+
# Group by operation
|
385
|
+
by_operation = {}
|
386
|
+
for record in filtered_records:
|
387
|
+
op = record.operation_type
|
388
|
+
if op not in by_operation:
|
389
|
+
by_operation[op] = {"cost": 0.0, "gpu_hours": 0.0, "count": 0}
|
390
|
+
by_operation[op]["cost"] += record.cost_usd or 0
|
391
|
+
by_operation[op]["gpu_hours"] += (record.runtime_hours or 0) * (record.gpu_count or 1)
|
392
|
+
by_operation[op]["count"] += 1
|
393
|
+
|
394
|
+
return {
|
395
|
+
"total_cost": round(total_cost, 6),
|
396
|
+
"total_gpu_hours": round(total_gpu_hours, 2),
|
397
|
+
"deployments": total_deployments,
|
398
|
+
"avg_cost_per_deployment": round(total_cost / total_deployments, 6) if total_deployments > 0 else 0,
|
399
|
+
"avg_cost_per_gpu_hour": round(total_cost / total_gpu_hours, 6) if total_gpu_hours > 0 else 0,
|
400
|
+
"by_provider": by_provider,
|
401
|
+
"by_gpu_type": by_gpu_type,
|
402
|
+
"by_operation": by_operation,
|
403
|
+
"period": {
|
404
|
+
"start": filtered_records[0].timestamp if filtered_records else None,
|
405
|
+
"end": filtered_records[-1].timestamp if filtered_records else None
|
406
|
+
}
|
407
|
+
}
|
408
|
+
|
409
|
+
# Global deployment billing tracker instance
|
410
|
+
_global_deployment_tracker: Optional[DeploymentBillingTracker] = None
|
411
|
+
|
412
|
+
def get_deployment_billing_tracker() -> DeploymentBillingTracker:
|
413
|
+
"""Get the global deployment billing tracker instance"""
|
414
|
+
global _global_deployment_tracker
|
415
|
+
if _global_deployment_tracker is None:
|
416
|
+
try:
|
417
|
+
from .model_repo import ModelRegistry
|
418
|
+
registry = ModelRegistry()
|
419
|
+
_global_deployment_tracker = DeploymentBillingTracker(model_registry=registry)
|
420
|
+
except Exception:
|
421
|
+
_global_deployment_tracker = DeploymentBillingTracker()
|
422
|
+
return _global_deployment_tracker
|
423
|
+
|
424
|
+
def track_deployment_usage(**kwargs) -> DeploymentUsageRecord:
|
425
|
+
"""Convenience function to track deployment usage"""
|
426
|
+
return get_deployment_billing_tracker().track_deployment_usage(**kwargs)
|
427
|
+
|
428
|
+
def estimate_deployment_cost(**kwargs) -> Dict[str, float]:
|
429
|
+
"""Convenience function to estimate deployment cost"""
|
430
|
+
return get_deployment_billing_tracker().estimate_deployment_cost(**kwargs)
|
@@ -37,20 +37,43 @@ class ModelManager:
|
|
37
37
|
self.config_manager = config_manager or ConfigManager()
|
38
38
|
|
39
39
|
def get_model_pricing(self, provider: str, model_name: str) -> Dict[str, float]:
|
40
|
-
"""
|
40
|
+
"""获取模型定价信息(从数据库)"""
|
41
41
|
try:
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
42
|
+
if not self.registry or not hasattr(self.registry, 'supabase_client'):
|
43
|
+
logger.warning("No database connection for pricing lookup")
|
44
|
+
return {"input": 0.0, "output": 0.0}
|
45
|
+
|
46
|
+
# 查询统一定价表
|
47
|
+
result = self.registry.supabase_client.table('current_model_pricing').select('*').eq(
|
48
|
+
'model_id', model_name
|
49
|
+
).eq('provider', provider).execute()
|
50
|
+
|
51
|
+
if result.data and len(result.data) > 0:
|
52
|
+
pricing = result.data[0]
|
53
|
+
|
54
|
+
# 根据定价模型转换为统一格式
|
55
|
+
pricing_model = pricing.get('pricing_model')
|
56
|
+
unit_size = pricing.get('unit_size', 1)
|
57
|
+
|
58
|
+
if pricing_model == 'per_token':
|
59
|
+
# 转换为每个 token 的成本
|
60
|
+
input_cost = float(pricing.get('input_cost_per_unit', 0)) * unit_size
|
61
|
+
output_cost = float(pricing.get('output_cost_per_unit', 0)) * unit_size
|
62
|
+
elif pricing_model in ['per_character', 'per_minute', 'per_request']:
|
63
|
+
# 这些按原始单位计费
|
64
|
+
input_cost = float(pricing.get('input_cost_per_unit', 0))
|
65
|
+
output_cost = float(pricing.get('output_cost_per_unit', 0))
|
66
|
+
# 如果有基础请求费用,加到 input 成本中
|
67
|
+
if pricing.get('base_cost_per_request', 0) > 0:
|
68
|
+
input_cost += float(pricing.get('base_cost_per_request', 0))
|
69
|
+
else:
|
70
|
+
input_cost = output_cost = 0.0
|
71
|
+
|
72
|
+
return {"input": input_cost, "output": output_cost}
|
73
|
+
|
52
74
|
except Exception as e:
|
53
75
|
logger.warning(f"Failed to get pricing for {provider}/{model_name}: {e}")
|
76
|
+
|
54
77
|
return {"input": 0.0, "output": 0.0}
|
55
78
|
|
56
79
|
def calculate_cost(self, provider: str, model_name: str, input_tokens: int, output_tokens: int) -> float:
|
@@ -112,21 +135,21 @@ class ModelManager:
|
|
112
135
|
logger.info(f"Downloading model {model_id} from {repo_id}")
|
113
136
|
model_dir = Path(f"./models/temp/{model_id}")
|
114
137
|
model_dir.mkdir(parents=True, exist_ok=True)
|
115
|
-
|
138
|
+
|
116
139
|
snapshot_download(
|
117
140
|
repo_id=repo_id,
|
118
141
|
revision=revision,
|
119
142
|
local_dir=model_dir,
|
120
143
|
local_dir_use_symlinks=False
|
121
144
|
)
|
122
|
-
|
145
|
+
|
123
146
|
# Save model and metadata
|
124
147
|
metadata = {
|
125
148
|
"repo_id": repo_id,
|
126
149
|
"revision": revision,
|
127
150
|
"downloaded_at": str(Path(model_dir).stat().st_mtime)
|
128
151
|
}
|
129
|
-
|
152
|
+
|
130
153
|
# Register model
|
131
154
|
self.registry.register_model(
|
132
155
|
model_id=model_id,
|
@@ -134,12 +157,12 @@ class ModelManager:
|
|
134
157
|
capabilities=capabilities,
|
135
158
|
metadata=metadata
|
136
159
|
)
|
137
|
-
|
160
|
+
|
138
161
|
# Save model files
|
139
162
|
await self.storage.save_model(model_id, str(model_dir), metadata)
|
140
|
-
|
163
|
+
|
141
164
|
return await self.storage.load_model(model_id)
|
142
|
-
|
165
|
+
|
143
166
|
except HfHubHTTPError as e:
|
144
167
|
logger.error(f"Failed to download model {model_id}: {e}")
|
145
168
|
return None
|