isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +466 -43
- isa_model/core/cache/redis_cache.py +12 -3
- isa_model/core/config/config_manager.py +230 -3
- isa_model/core/config.py +90 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +21 -1
- isa_model/core/database/supabase_client.py +154 -19
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +27 -18
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_manager.py +35 -80
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +174 -18
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/services/intelligent_model_selector.py +399 -21
- isa_model/core/types.py +1 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -370
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +137 -10
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/openai_stt_service.py +22 -6
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +335 -24
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/ollama_llm_service.py +9 -2
- isa_model/inference/services/llm/openai_llm_service.py +33 -16
- isa_model/inference/services/llm/yyds_llm_service.py +8 -2
- isa_model/inference/services/vision/__init__.py +22 -1
- isa_model/inference/services/vision/helpers/image_utils.py +8 -5
- isa_model/inference/services/vision/isa_vision_service.py +65 -4
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +172 -22
- isa_model/serving/api/middleware/auth.py +8 -2
- isa_model/serving/api/middleware/security.py +23 -33
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +4 -1
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +138 -2
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +680 -18
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +68 -54
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
- isa_model-0.4.4.dist-info/RECORD +180 -0
- isa_model/core/security/secrets.py +0 -358
- isa_model/core/storage/hf_storage.py +0 -419
- isa_model/core/storage/minio_storage.py +0 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks/__init__.py +0 -27
- isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
- isa_model/eval/benchmarks.py +0 -701
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -24
- isa_model/eval/evaluators/audio_evaluator.py +0 -727
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/embedding_evaluator.py +0 -742
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/evaluators/vision_evaluator.py +0 -564
- isa_model/eval/example_evaluation.py +0 -395
- isa_model/eval/factory.py +0 -798
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/isa_benchmarks.py +0 -700
- isa_model/eval/isa_integration.py +0 -582
- isa_model/eval/metrics.py +0 -951
- isa_model/eval/tests/unit/test_basic.py +0 -396
- isa_model/serving/api/routes/evaluations.py +0 -579
- isa_model/training/__init__.py +0 -168
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -26
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/examples/intelligent_training_example.py +0 -281
- isa_model/training/factory.py +0 -424
- isa_model/training/intelligent/__init__.py +0 -25
- isa_model/training/intelligent/decision_engine.py +0 -643
- isa_model/training/intelligent/intelligent_factory.py +0 -888
- isa_model/training/intelligent/knowledge_base.py +0 -751
- isa_model/training/intelligent/resource_optimizer.py +0 -839
- isa_model/training/intelligent/task_classifier.py +0 -576
- isa_model/training/storage/__init__.py +0 -24
- isa_model/training/storage/core_integration.py +0 -439
- isa_model/training/storage/training_repository.py +0 -552
- isa_model/training/storage/training_storage.py +0 -628
- isa_model-0.4.0.dist-info/RECORD +0 -182
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,430 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
"""
|
5
|
+
Deployment Billing Tracker - Specialized billing for deployment and training operations
|
6
|
+
|
7
|
+
Extends the core ModelBillingTracker with deployment-specific metrics:
|
8
|
+
- GPU runtime hours
|
9
|
+
- Instance type costs
|
10
|
+
- Training epochs/steps billing
|
11
|
+
- Deployment lifecycle costs
|
12
|
+
"""
|
13
|
+
|
14
|
+
from typing import Dict, List, Optional, Any, Union
|
15
|
+
from datetime import datetime, timezone, timedelta
|
16
|
+
from dataclasses import dataclass, asdict
|
17
|
+
import json
|
18
|
+
import logging
|
19
|
+
from enum import Enum
|
20
|
+
from .model_billing_tracker import ModelBillingTracker, ModelUsageRecord, ModelOperationType
|
21
|
+
|
22
|
+
logger = logging.getLogger(__name__)
|
23
|
+
|
24
|
+
class DeploymentProvider(Enum):
|
25
|
+
"""Deployment providers"""
|
26
|
+
MODAL = "modal"
|
27
|
+
TRITON_LOCAL = "triton_local"
|
28
|
+
TRITON_CLOUD = "triton_cloud"
|
29
|
+
RUNPOD = "runpod"
|
30
|
+
LAMBDA_LABS = "lambda_labs"
|
31
|
+
COREWEAVE = "coreweave"
|
32
|
+
|
33
|
+
class GPUType(Enum):
|
34
|
+
"""GPU types for cost calculation"""
|
35
|
+
RTX_4090 = "rtx_4090"
|
36
|
+
RTX_A6000 = "rtx_a6000"
|
37
|
+
A100_40GB = "a100_40gb"
|
38
|
+
A100_80GB = "a100_80gb"
|
39
|
+
H100 = "h100"
|
40
|
+
T4 = "t4"
|
41
|
+
V100 = "v100"
|
42
|
+
|
43
|
+
@dataclass
|
44
|
+
class DeploymentUsageRecord(ModelUsageRecord):
|
45
|
+
"""Extended usage record for deployment operations"""
|
46
|
+
# GPU/Infrastructure metrics
|
47
|
+
gpu_type: Optional[str] = None
|
48
|
+
gpu_count: Optional[int] = None
|
49
|
+
runtime_hours: Optional[float] = None
|
50
|
+
cpu_cores: Optional[int] = None
|
51
|
+
memory_gb: Optional[int] = None
|
52
|
+
|
53
|
+
# Training-specific metrics
|
54
|
+
training_epochs: Optional[int] = None
|
55
|
+
training_steps: Optional[int] = None
|
56
|
+
dataset_size: Optional[int] = None
|
57
|
+
|
58
|
+
# Deployment-specific metrics
|
59
|
+
deployment_duration_hours: Optional[float] = None
|
60
|
+
requests_served: Optional[int] = None
|
61
|
+
avg_latency_ms: Optional[float] = None
|
62
|
+
|
63
|
+
# Infrastructure costs
|
64
|
+
compute_cost_usd: Optional[float] = None
|
65
|
+
storage_cost_usd: Optional[float] = None
|
66
|
+
network_cost_usd: Optional[float] = None
|
67
|
+
|
68
|
+
class DeploymentBillingTracker(ModelBillingTracker):
|
69
|
+
"""
|
70
|
+
Specialized billing tracker for deployment and training operations
|
71
|
+
|
72
|
+
Extends ModelBillingTracker with deployment-specific cost calculations
|
73
|
+
and metrics tracking for GPU-based operations.
|
74
|
+
"""
|
75
|
+
|
76
|
+
def __init__(self, model_registry=None, storage_path: Optional[str] = None):
|
77
|
+
super().__init__(model_registry, storage_path)
|
78
|
+
|
79
|
+
# Load pricing data for deployment providers
|
80
|
+
self.pricing_data = self._load_deployment_pricing()
|
81
|
+
|
82
|
+
def _load_deployment_pricing(self) -> Dict[str, Dict[str, float]]:
|
83
|
+
"""Load pricing data for different deployment providers and GPU types"""
|
84
|
+
return {
|
85
|
+
"modal": {
|
86
|
+
"t4": 0.50, # $/hour
|
87
|
+
"rtx_4090": 0.80,
|
88
|
+
"a100_40gb": 2.50,
|
89
|
+
"a100_80gb": 4.00,
|
90
|
+
"h100": 8.00,
|
91
|
+
"base_compute": 0.10 # $/hour base compute
|
92
|
+
},
|
93
|
+
"triton_local": {
|
94
|
+
"electricity": 0.12, # $/kWh
|
95
|
+
"gpu_tdp": {
|
96
|
+
"rtx_4090": 450, # Watts
|
97
|
+
"a100_40gb": 400,
|
98
|
+
"a100_80gb": 400,
|
99
|
+
"h100": 700
|
100
|
+
}
|
101
|
+
},
|
102
|
+
"runpod": {
|
103
|
+
"rtx_4090": 0.44,
|
104
|
+
"rtx_a6000": 0.79,
|
105
|
+
"a100_40gb": 1.69,
|
106
|
+
"a100_80gb": 2.89,
|
107
|
+
"h100": 4.89
|
108
|
+
},
|
109
|
+
"lambda_labs": {
|
110
|
+
"rtx_4090": 0.50,
|
111
|
+
"a100_40gb": 1.50,
|
112
|
+
"a100_80gb": 2.50,
|
113
|
+
"h100": 4.50
|
114
|
+
},
|
115
|
+
"coreweave": {
|
116
|
+
"rtx_4090": 0.57,
|
117
|
+
"a100_40gb": 2.06,
|
118
|
+
"a100_80gb": 2.23,
|
119
|
+
"h100": 4.76
|
120
|
+
}
|
121
|
+
}
|
122
|
+
|
123
|
+
def track_deployment_usage(
|
124
|
+
self,
|
125
|
+
model_id: str,
|
126
|
+
provider: Union[str, DeploymentProvider],
|
127
|
+
operation_type: Union[str, ModelOperationType],
|
128
|
+
service_type: str,
|
129
|
+
operation: str,
|
130
|
+
|
131
|
+
# GPU/Infrastructure metrics
|
132
|
+
gpu_type: Optional[Union[str, GPUType]] = None,
|
133
|
+
gpu_count: Optional[int] = None,
|
134
|
+
runtime_hours: Optional[float] = None,
|
135
|
+
cpu_cores: Optional[int] = None,
|
136
|
+
memory_gb: Optional[int] = None,
|
137
|
+
|
138
|
+
# Training-specific
|
139
|
+
training_epochs: Optional[int] = None,
|
140
|
+
training_steps: Optional[int] = None,
|
141
|
+
dataset_size: Optional[int] = None,
|
142
|
+
|
143
|
+
# Deployment-specific
|
144
|
+
deployment_duration_hours: Optional[float] = None,
|
145
|
+
requests_served: Optional[int] = None,
|
146
|
+
avg_latency_ms: Optional[float] = None,
|
147
|
+
|
148
|
+
# Standard billing
|
149
|
+
input_tokens: Optional[int] = None,
|
150
|
+
output_tokens: Optional[int] = None,
|
151
|
+
cost_usd: Optional[float] = None,
|
152
|
+
metadata: Optional[Dict[str, Any]] = None
|
153
|
+
) -> DeploymentUsageRecord:
|
154
|
+
"""
|
155
|
+
Track deployment/training usage with specialized metrics
|
156
|
+
|
157
|
+
Args:
|
158
|
+
model_id: Model identifier
|
159
|
+
provider: Deployment provider
|
160
|
+
operation_type: Type of operation (training, deployment, inference)
|
161
|
+
service_type: Service type (llm, vision, etc.)
|
162
|
+
operation: Specific operation
|
163
|
+
gpu_type: Type of GPU used
|
164
|
+
gpu_count: Number of GPUs
|
165
|
+
runtime_hours: Hours of runtime
|
166
|
+
training_epochs: Number of training epochs
|
167
|
+
deployment_duration_hours: Hours deployment was active
|
168
|
+
... (other parameters as documented)
|
169
|
+
|
170
|
+
Returns:
|
171
|
+
DeploymentUsageRecord with calculated costs
|
172
|
+
"""
|
173
|
+
# Convert enums to strings
|
174
|
+
if isinstance(provider, DeploymentProvider):
|
175
|
+
provider = provider.value
|
176
|
+
if isinstance(operation_type, ModelOperationType):
|
177
|
+
operation_type = operation_type.value
|
178
|
+
if isinstance(gpu_type, GPUType):
|
179
|
+
gpu_type = gpu_type.value
|
180
|
+
|
181
|
+
# Calculate deployment-specific costs
|
182
|
+
if cost_usd is None:
|
183
|
+
cost_breakdown = self._calculate_deployment_cost(
|
184
|
+
provider, gpu_type, gpu_count, runtime_hours,
|
185
|
+
deployment_duration_hours, training_epochs, training_steps
|
186
|
+
)
|
187
|
+
cost_usd = cost_breakdown["total_cost"]
|
188
|
+
compute_cost = cost_breakdown["compute_cost"]
|
189
|
+
storage_cost = cost_breakdown["storage_cost"]
|
190
|
+
network_cost = cost_breakdown["network_cost"]
|
191
|
+
else:
|
192
|
+
compute_cost = cost_usd # If provided, assume it's compute cost
|
193
|
+
storage_cost = 0.0
|
194
|
+
network_cost = 0.0
|
195
|
+
|
196
|
+
# Create deployment usage record
|
197
|
+
record = DeploymentUsageRecord(
|
198
|
+
timestamp=datetime.now(timezone.utc).isoformat(),
|
199
|
+
model_id=model_id,
|
200
|
+
operation_type=operation_type,
|
201
|
+
provider=provider,
|
202
|
+
service_type=service_type,
|
203
|
+
operation=operation,
|
204
|
+
input_tokens=input_tokens,
|
205
|
+
output_tokens=output_tokens,
|
206
|
+
total_tokens=(input_tokens or 0) + (output_tokens or 0) if input_tokens or output_tokens else None,
|
207
|
+
cost_usd=cost_usd,
|
208
|
+
metadata=metadata or {},
|
209
|
+
|
210
|
+
# Deployment-specific fields
|
211
|
+
gpu_type=gpu_type,
|
212
|
+
gpu_count=gpu_count,
|
213
|
+
runtime_hours=runtime_hours,
|
214
|
+
cpu_cores=cpu_cores,
|
215
|
+
memory_gb=memory_gb,
|
216
|
+
training_epochs=training_epochs,
|
217
|
+
training_steps=training_steps,
|
218
|
+
dataset_size=dataset_size,
|
219
|
+
deployment_duration_hours=deployment_duration_hours,
|
220
|
+
requests_served=requests_served,
|
221
|
+
avg_latency_ms=avg_latency_ms,
|
222
|
+
compute_cost_usd=compute_cost,
|
223
|
+
storage_cost_usd=storage_cost,
|
224
|
+
network_cost_usd=network_cost
|
225
|
+
)
|
226
|
+
|
227
|
+
# Add to records and save
|
228
|
+
self.usage_records.append(record)
|
229
|
+
self._save_data()
|
230
|
+
|
231
|
+
logger.info(f"Tracked deployment usage: {model_id} - {provider} - {gpu_type} - ${cost_usd:.4f}")
|
232
|
+
return record
|
233
|
+
|
234
|
+
def _calculate_deployment_cost(
|
235
|
+
self,
|
236
|
+
provider: str,
|
237
|
+
gpu_type: Optional[str],
|
238
|
+
gpu_count: Optional[int],
|
239
|
+
runtime_hours: Optional[float],
|
240
|
+
deployment_duration_hours: Optional[float],
|
241
|
+
training_epochs: Optional[int],
|
242
|
+
training_steps: Optional[int]
|
243
|
+
) -> Dict[str, float]:
|
244
|
+
"""Calculate deployment costs based on provider and usage"""
|
245
|
+
|
246
|
+
gpu_count = gpu_count or 1
|
247
|
+
runtime_hours = runtime_hours or deployment_duration_hours or 1.0
|
248
|
+
|
249
|
+
compute_cost = 0.0
|
250
|
+
storage_cost = 0.0
|
251
|
+
network_cost = 0.0
|
252
|
+
|
253
|
+
try:
|
254
|
+
if provider in self.pricing_data:
|
255
|
+
pricing = self.pricing_data[provider]
|
256
|
+
|
257
|
+
if provider == "modal":
|
258
|
+
# Modal pricing: per-GPU hourly rate
|
259
|
+
if gpu_type and gpu_type in pricing:
|
260
|
+
compute_cost = pricing[gpu_type] * gpu_count * runtime_hours
|
261
|
+
else:
|
262
|
+
compute_cost = pricing.get("base_compute", 0.10) * runtime_hours
|
263
|
+
|
264
|
+
elif provider == "triton_local":
|
265
|
+
# Local deployment: electricity costs
|
266
|
+
if gpu_type and gpu_type in pricing["gpu_tdp"]:
|
267
|
+
power_watts = pricing["gpu_tdp"][gpu_type] * gpu_count
|
268
|
+
kwh_used = (power_watts / 1000) * runtime_hours
|
269
|
+
compute_cost = kwh_used * pricing["electricity"]
|
270
|
+
|
271
|
+
elif provider in ["runpod", "lambda_labs", "coreweave"]:
|
272
|
+
# Cloud GPU providers: per-GPU hourly rates
|
273
|
+
if gpu_type and gpu_type in pricing:
|
274
|
+
compute_cost = pricing[gpu_type] * gpu_count * runtime_hours
|
275
|
+
|
276
|
+
# Add storage costs (simplified)
|
277
|
+
storage_cost = runtime_hours * 0.01 # $0.01/hour for storage
|
278
|
+
|
279
|
+
# Add network costs for training (data transfer)
|
280
|
+
if training_epochs and training_epochs > 0:
|
281
|
+
network_cost = training_epochs * 0.05 # $0.05 per epoch for data
|
282
|
+
|
283
|
+
except Exception as e:
|
284
|
+
logger.error(f"Error calculating deployment cost: {e}")
|
285
|
+
compute_cost = 0.0
|
286
|
+
|
287
|
+
total_cost = compute_cost + storage_cost + network_cost
|
288
|
+
|
289
|
+
return {
|
290
|
+
"total_cost": round(total_cost, 6),
|
291
|
+
"compute_cost": round(compute_cost, 6),
|
292
|
+
"storage_cost": round(storage_cost, 6),
|
293
|
+
"network_cost": round(network_cost, 6)
|
294
|
+
}
|
295
|
+
|
296
|
+
def estimate_deployment_cost(
|
297
|
+
self,
|
298
|
+
provider: str,
|
299
|
+
gpu_type: str,
|
300
|
+
gpu_count: int = 1,
|
301
|
+
estimated_hours: float = 1.0,
|
302
|
+
operation_type: str = "deployment"
|
303
|
+
) -> Dict[str, float]:
|
304
|
+
"""
|
305
|
+
Estimate deployment costs before starting deployment
|
306
|
+
|
307
|
+
Args:
|
308
|
+
provider: Deployment provider
|
309
|
+
gpu_type: GPU type to use
|
310
|
+
gpu_count: Number of GPUs
|
311
|
+
estimated_hours: Estimated runtime hours
|
312
|
+
operation_type: Type of operation
|
313
|
+
|
314
|
+
Returns:
|
315
|
+
Cost breakdown dictionary
|
316
|
+
"""
|
317
|
+
return self._calculate_deployment_cost(
|
318
|
+
provider, gpu_type, gpu_count, estimated_hours,
|
319
|
+
estimated_hours, None, None
|
320
|
+
)
|
321
|
+
|
322
|
+
def get_deployment_summary(
|
323
|
+
self,
|
324
|
+
start_date: Optional[datetime] = None,
|
325
|
+
end_date: Optional[datetime] = None,
|
326
|
+
provider: Optional[str] = None,
|
327
|
+
gpu_type: Optional[str] = None
|
328
|
+
) -> Dict[str, Any]:
|
329
|
+
"""Get deployment cost summary with filters"""
|
330
|
+
|
331
|
+
# Filter records
|
332
|
+
filtered_records = []
|
333
|
+
for record in self.usage_records:
|
334
|
+
# Check if it's a deployment record
|
335
|
+
if not isinstance(record, DeploymentUsageRecord):
|
336
|
+
continue
|
337
|
+
|
338
|
+
# Apply filters
|
339
|
+
if start_date and datetime.fromisoformat(record.timestamp.replace('Z', '+00:00')) < start_date:
|
340
|
+
continue
|
341
|
+
if end_date and datetime.fromisoformat(record.timestamp.replace('Z', '+00:00')) > end_date:
|
342
|
+
continue
|
343
|
+
if provider and record.provider != provider:
|
344
|
+
continue
|
345
|
+
if gpu_type and record.gpu_type != gpu_type:
|
346
|
+
continue
|
347
|
+
|
348
|
+
filtered_records.append(record)
|
349
|
+
|
350
|
+
if not filtered_records:
|
351
|
+
return {
|
352
|
+
"total_cost": 0.0,
|
353
|
+
"total_gpu_hours": 0.0,
|
354
|
+
"deployments": 0,
|
355
|
+
"by_provider": {},
|
356
|
+
"by_gpu_type": {},
|
357
|
+
"by_operation": {}
|
358
|
+
}
|
359
|
+
|
360
|
+
# Calculate summary
|
361
|
+
total_cost = sum(record.cost_usd or 0 for record in filtered_records)
|
362
|
+
total_gpu_hours = sum((record.runtime_hours or 0) * (record.gpu_count or 1) for record in filtered_records)
|
363
|
+
total_deployments = len(filtered_records)
|
364
|
+
|
365
|
+
# Group by provider
|
366
|
+
by_provider = {}
|
367
|
+
for record in filtered_records:
|
368
|
+
if record.provider not in by_provider:
|
369
|
+
by_provider[record.provider] = {"cost": 0.0, "gpu_hours": 0.0, "count": 0}
|
370
|
+
by_provider[record.provider]["cost"] += record.cost_usd or 0
|
371
|
+
by_provider[record.provider]["gpu_hours"] += (record.runtime_hours or 0) * (record.gpu_count or 1)
|
372
|
+
by_provider[record.provider]["count"] += 1
|
373
|
+
|
374
|
+
# Group by GPU type
|
375
|
+
by_gpu_type = {}
|
376
|
+
for record in filtered_records:
|
377
|
+
gpu = record.gpu_type or "unknown"
|
378
|
+
if gpu not in by_gpu_type:
|
379
|
+
by_gpu_type[gpu] = {"cost": 0.0, "gpu_hours": 0.0, "count": 0}
|
380
|
+
by_gpu_type[gpu]["cost"] += record.cost_usd or 0
|
381
|
+
by_gpu_type[gpu]["gpu_hours"] += (record.runtime_hours or 0) * (record.gpu_count or 1)
|
382
|
+
by_gpu_type[gpu]["count"] += 1
|
383
|
+
|
384
|
+
# Group by operation
|
385
|
+
by_operation = {}
|
386
|
+
for record in filtered_records:
|
387
|
+
op = record.operation_type
|
388
|
+
if op not in by_operation:
|
389
|
+
by_operation[op] = {"cost": 0.0, "gpu_hours": 0.0, "count": 0}
|
390
|
+
by_operation[op]["cost"] += record.cost_usd or 0
|
391
|
+
by_operation[op]["gpu_hours"] += (record.runtime_hours or 0) * (record.gpu_count or 1)
|
392
|
+
by_operation[op]["count"] += 1
|
393
|
+
|
394
|
+
return {
|
395
|
+
"total_cost": round(total_cost, 6),
|
396
|
+
"total_gpu_hours": round(total_gpu_hours, 2),
|
397
|
+
"deployments": total_deployments,
|
398
|
+
"avg_cost_per_deployment": round(total_cost / total_deployments, 6) if total_deployments > 0 else 0,
|
399
|
+
"avg_cost_per_gpu_hour": round(total_cost / total_gpu_hours, 6) if total_gpu_hours > 0 else 0,
|
400
|
+
"by_provider": by_provider,
|
401
|
+
"by_gpu_type": by_gpu_type,
|
402
|
+
"by_operation": by_operation,
|
403
|
+
"period": {
|
404
|
+
"start": filtered_records[0].timestamp if filtered_records else None,
|
405
|
+
"end": filtered_records[-1].timestamp if filtered_records else None
|
406
|
+
}
|
407
|
+
}
|
408
|
+
|
409
|
+
# Global deployment billing tracker instance
|
410
|
+
_global_deployment_tracker: Optional[DeploymentBillingTracker] = None
|
411
|
+
|
412
|
+
def get_deployment_billing_tracker() -> DeploymentBillingTracker:
|
413
|
+
"""Get the global deployment billing tracker instance"""
|
414
|
+
global _global_deployment_tracker
|
415
|
+
if _global_deployment_tracker is None:
|
416
|
+
try:
|
417
|
+
from .model_repo import ModelRegistry
|
418
|
+
registry = ModelRegistry()
|
419
|
+
_global_deployment_tracker = DeploymentBillingTracker(model_registry=registry)
|
420
|
+
except Exception:
|
421
|
+
_global_deployment_tracker = DeploymentBillingTracker()
|
422
|
+
return _global_deployment_tracker
|
423
|
+
|
424
|
+
def track_deployment_usage(**kwargs) -> DeploymentUsageRecord:
|
425
|
+
"""Convenience function to track deployment usage"""
|
426
|
+
return get_deployment_billing_tracker().track_deployment_usage(**kwargs)
|
427
|
+
|
428
|
+
def estimate_deployment_cost(**kwargs) -> Dict[str, float]:
|
429
|
+
"""Convenience function to estimate deployment cost"""
|
430
|
+
return get_deployment_billing_tracker().estimate_deployment_cost(**kwargs)
|
@@ -2,8 +2,6 @@ from typing import Dict, Optional, List, Any
|
|
2
2
|
import logging
|
3
3
|
from pathlib import Path
|
4
4
|
from datetime import datetime
|
5
|
-
from huggingface_hub import hf_hub_download, snapshot_download
|
6
|
-
from huggingface_hub.errors import HfHubHTTPError
|
7
5
|
from .model_storage import ModelStorage, LocalModelStorage
|
8
6
|
from .model_repo import ModelRegistry, ModelType, ModelCapability
|
9
7
|
from .model_billing_tracker import ModelBillingTracker, ModelOperationType
|
@@ -37,20 +35,43 @@ class ModelManager:
|
|
37
35
|
self.config_manager = config_manager or ConfigManager()
|
38
36
|
|
39
37
|
def get_model_pricing(self, provider: str, model_name: str) -> Dict[str, float]:
|
40
|
-
"""
|
38
|
+
"""获取模型定价信息(从数据库)"""
|
41
39
|
try:
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
40
|
+
if not self.registry or not hasattr(self.registry, 'supabase_client'):
|
41
|
+
logger.warning("No database connection for pricing lookup")
|
42
|
+
return {"input": 0.0, "output": 0.0}
|
43
|
+
|
44
|
+
# 查询统一定价表
|
45
|
+
result = self.registry.supabase_client.table('current_model_pricing').select('*').eq(
|
46
|
+
'model_id', model_name
|
47
|
+
).eq('provider', provider).execute()
|
48
|
+
|
49
|
+
if result.data and len(result.data) > 0:
|
50
|
+
pricing = result.data[0]
|
51
|
+
|
52
|
+
# 根据定价模型转换为统一格式
|
53
|
+
pricing_model = pricing.get('pricing_model')
|
54
|
+
unit_size = pricing.get('unit_size', 1)
|
55
|
+
|
56
|
+
if pricing_model == 'per_token':
|
57
|
+
# 转换为每个 token 的成本
|
58
|
+
input_cost = float(pricing.get('input_cost_per_unit', 0)) * unit_size
|
59
|
+
output_cost = float(pricing.get('output_cost_per_unit', 0)) * unit_size
|
60
|
+
elif pricing_model in ['per_character', 'per_minute', 'per_request']:
|
61
|
+
# 这些按原始单位计费
|
62
|
+
input_cost = float(pricing.get('input_cost_per_unit', 0))
|
63
|
+
output_cost = float(pricing.get('output_cost_per_unit', 0))
|
64
|
+
# 如果有基础请求费用,加到 input 成本中
|
65
|
+
if pricing.get('base_cost_per_request', 0) > 0:
|
66
|
+
input_cost += float(pricing.get('base_cost_per_request', 0))
|
67
|
+
else:
|
68
|
+
input_cost = output_cost = 0.0
|
69
|
+
|
70
|
+
return {"input": input_cost, "output": output_cost}
|
71
|
+
|
52
72
|
except Exception as e:
|
53
73
|
logger.warning(f"Failed to get pricing for {provider}/{model_name}: {e}")
|
74
|
+
|
54
75
|
return {"input": 0.0, "output": 0.0}
|
55
76
|
|
56
77
|
def calculate_cost(self, provider: str, model_name: str, input_tokens: int, output_tokens: int) -> float:
|
@@ -79,73 +100,7 @@ class ModelManager:
|
|
79
100
|
logger.warning(f"Failed to find cheapest model for {provider}: {e}")
|
80
101
|
return None
|
81
102
|
|
82
|
-
|
83
|
-
model_id: str,
|
84
|
-
repo_id: str,
|
85
|
-
model_type: ModelType,
|
86
|
-
capabilities: List[ModelCapability],
|
87
|
-
revision: Optional[str] = None,
|
88
|
-
force_download: bool = False) -> Optional[Path]:
|
89
|
-
"""
|
90
|
-
Get model files, downloading if necessary
|
91
|
-
|
92
|
-
Args:
|
93
|
-
model_id: Unique identifier for the model
|
94
|
-
repo_id: Hugging Face repository ID
|
95
|
-
model_type: Type of model (LLM, embedding, etc.)
|
96
|
-
capabilities: List of model capabilities
|
97
|
-
revision: Specific model version/tag
|
98
|
-
force_download: Force re-download even if cached
|
99
|
-
|
100
|
-
Returns:
|
101
|
-
Path to the model files or None if failed
|
102
|
-
"""
|
103
|
-
# Check if model is already downloaded
|
104
|
-
if not force_download:
|
105
|
-
model_path = await self.storage.load_model(model_id)
|
106
|
-
if model_path:
|
107
|
-
logger.info(f"Using cached model {model_id}")
|
108
|
-
return model_path
|
109
|
-
|
110
|
-
try:
|
111
|
-
# Download model files
|
112
|
-
logger.info(f"Downloading model {model_id} from {repo_id}")
|
113
|
-
model_dir = Path(f"./models/temp/{model_id}")
|
114
|
-
model_dir.mkdir(parents=True, exist_ok=True)
|
115
|
-
|
116
|
-
snapshot_download(
|
117
|
-
repo_id=repo_id,
|
118
|
-
revision=revision,
|
119
|
-
local_dir=model_dir,
|
120
|
-
local_dir_use_symlinks=False
|
121
|
-
)
|
122
|
-
|
123
|
-
# Save model and metadata
|
124
|
-
metadata = {
|
125
|
-
"repo_id": repo_id,
|
126
|
-
"revision": revision,
|
127
|
-
"downloaded_at": str(Path(model_dir).stat().st_mtime)
|
128
|
-
}
|
129
|
-
|
130
|
-
# Register model
|
131
|
-
self.registry.register_model(
|
132
|
-
model_id=model_id,
|
133
|
-
model_type=model_type,
|
134
|
-
capabilities=capabilities,
|
135
|
-
metadata=metadata
|
136
|
-
)
|
137
|
-
|
138
|
-
# Save model files
|
139
|
-
await self.storage.save_model(model_id, str(model_dir), metadata)
|
140
|
-
|
141
|
-
return await self.storage.load_model(model_id)
|
142
|
-
|
143
|
-
except HfHubHTTPError as e:
|
144
|
-
logger.error(f"Failed to download model {model_id}: {e}")
|
145
|
-
return None
|
146
|
-
except Exception as e:
|
147
|
-
logger.error(f"Unexpected error downloading model {model_id}: {e}")
|
148
|
-
return None
|
103
|
+
# Local model download functionality removed - use cloud API services only
|
149
104
|
|
150
105
|
async def list_models(self) -> List[Dict[str, Any]]:
|
151
106
|
"""List all downloaded models with their metadata"""
|