isa-model 0.4.0__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +40 -17
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/storage/hf_storage.py +1 -1
  26. isa_model/core/types.py +1 -0
  27. isa_model/deployment/__init__.py +5 -48
  28. isa_model/deployment/core/__init__.py +2 -31
  29. isa_model/deployment/core/deployment_manager.py +1278 -370
  30. isa_model/deployment/local/__init__.py +31 -0
  31. isa_model/deployment/local/config.py +248 -0
  32. isa_model/deployment/local/gpu_gateway.py +607 -0
  33. isa_model/deployment/local/health_checker.py +428 -0
  34. isa_model/deployment/local/provider.py +586 -0
  35. isa_model/deployment/local/tensorrt_service.py +621 -0
  36. isa_model/deployment/local/transformers_service.py +644 -0
  37. isa_model/deployment/local/vllm_service.py +527 -0
  38. isa_model/deployment/modal/__init__.py +8 -0
  39. isa_model/deployment/modal/config.py +136 -0
  40. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  41. isa_model/deployment/modal/services/__init__.py +3 -0
  42. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  43. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  44. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  45. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  46. isa_model/deployment/modal/services/video/__init__.py +1 -0
  47. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  48. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  49. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  50. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  51. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  52. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  53. isa_model/deployment/storage/__init__.py +5 -0
  54. isa_model/deployment/storage/deployment_repository.py +824 -0
  55. isa_model/deployment/triton/__init__.py +10 -0
  56. isa_model/deployment/triton/config.py +196 -0
  57. isa_model/deployment/triton/configs/__init__.py +1 -0
  58. isa_model/deployment/triton/provider.py +512 -0
  59. isa_model/deployment/triton/scripts/__init__.py +1 -0
  60. isa_model/deployment/triton/templates/__init__.py +1 -0
  61. isa_model/inference/__init__.py +47 -1
  62. isa_model/inference/ai_factory.py +137 -10
  63. isa_model/inference/legacy_services/__init__.py +21 -0
  64. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  65. isa_model/inference/legacy_services/model_service.py +573 -0
  66. isa_model/inference/legacy_services/model_serving.py +717 -0
  67. isa_model/inference/legacy_services/model_training.py +561 -0
  68. isa_model/inference/models/__init__.py +21 -0
  69. isa_model/inference/models/inference_config.py +551 -0
  70. isa_model/inference/models/inference_record.py +675 -0
  71. isa_model/inference/models/performance_models.py +714 -0
  72. isa_model/inference/repositories/__init__.py +9 -0
  73. isa_model/inference/repositories/inference_repository.py +828 -0
  74. isa_model/inference/services/audio/base_stt_service.py +184 -11
  75. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  76. isa_model/inference/services/custom_model_manager.py +277 -0
  77. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  78. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  79. isa_model/inference/services/llm/__init__.py +10 -2
  80. isa_model/inference/services/llm/base_llm_service.py +335 -24
  81. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  82. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  83. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  84. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  85. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  86. isa_model/inference/services/llm/local_llm_service.py +747 -0
  87. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  88. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  89. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  90. isa_model/inference/services/vision/__init__.py +22 -1
  91. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  92. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  93. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  94. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  95. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  96. isa_model/serving/api/cache_manager.py +245 -0
  97. isa_model/serving/api/dependencies/__init__.py +1 -0
  98. isa_model/serving/api/dependencies/auth.py +194 -0
  99. isa_model/serving/api/dependencies/database.py +139 -0
  100. isa_model/serving/api/error_handlers.py +284 -0
  101. isa_model/serving/api/fastapi_server.py +172 -22
  102. isa_model/serving/api/middleware/auth.py +8 -2
  103. isa_model/serving/api/middleware/security.py +23 -33
  104. isa_model/serving/api/middleware/tenant_context.py +414 -0
  105. isa_model/serving/api/routes/analytics.py +4 -1
  106. isa_model/serving/api/routes/config.py +645 -0
  107. isa_model/serving/api/routes/deployment_billing.py +315 -0
  108. isa_model/serving/api/routes/deployments.py +138 -2
  109. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  110. isa_model/serving/api/routes/health.py +32 -12
  111. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  112. isa_model/serving/api/routes/local_deployments.py +448 -0
  113. isa_model/serving/api/routes/tenants.py +575 -0
  114. isa_model/serving/api/routes/unified.py +680 -18
  115. isa_model/serving/api/routes/webhooks.py +479 -0
  116. isa_model/serving/api/startup.py +68 -54
  117. isa_model/utils/gpu_utils.py +311 -0
  118. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/METADATA +66 -24
  119. isa_model-0.4.3.dist-info/RECORD +193 -0
  120. isa_model/core/storage/minio_storage.py +0 -0
  121. isa_model/deployment/cloud/__init__.py +0 -9
  122. isa_model/deployment/cloud/modal/__init__.py +0 -10
  123. isa_model/deployment/core/deployment_config.py +0 -356
  124. isa_model/deployment/core/isa_deployment_service.py +0 -401
  125. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  126. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  127. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  128. isa_model/deployment/runtime/deployed_service.py +0 -338
  129. isa_model/deployment/services/__init__.py +0 -9
  130. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  131. isa_model/deployment/services/model_service.py +0 -332
  132. isa_model/deployment/services/service_monitor.py +0 -356
  133. isa_model/deployment/services/service_registry.py +0 -527
  134. isa_model/eval/__init__.py +0 -92
  135. isa_model/eval/benchmarks/__init__.py +0 -27
  136. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  137. isa_model/eval/benchmarks.py +0 -701
  138. isa_model/eval/config/__init__.py +0 -10
  139. isa_model/eval/config/evaluation_config.py +0 -108
  140. isa_model/eval/evaluators/__init__.py +0 -24
  141. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  142. isa_model/eval/evaluators/base_evaluator.py +0 -503
  143. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  144. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  145. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  146. isa_model/eval/example_evaluation.py +0 -395
  147. isa_model/eval/factory.py +0 -798
  148. isa_model/eval/infrastructure/__init__.py +0 -24
  149. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  150. isa_model/eval/isa_benchmarks.py +0 -700
  151. isa_model/eval/isa_integration.py +0 -582
  152. isa_model/eval/metrics.py +0 -951
  153. isa_model/eval/tests/unit/test_basic.py +0 -396
  154. isa_model/serving/api/routes/evaluations.py +0 -579
  155. isa_model/training/__init__.py +0 -168
  156. isa_model/training/annotation/annotation_schema.py +0 -47
  157. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  158. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  159. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  160. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  161. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  162. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  163. isa_model/training/annotation/views/annotation_controller.py +0 -158
  164. isa_model/training/cloud/__init__.py +0 -22
  165. isa_model/training/cloud/job_orchestrator.py +0 -402
  166. isa_model/training/cloud/runpod_trainer.py +0 -454
  167. isa_model/training/cloud/storage_manager.py +0 -482
  168. isa_model/training/core/__init__.py +0 -26
  169. isa_model/training/core/config.py +0 -181
  170. isa_model/training/core/dataset.py +0 -222
  171. isa_model/training/core/trainer.py +0 -720
  172. isa_model/training/core/utils.py +0 -213
  173. isa_model/training/examples/intelligent_training_example.py +0 -281
  174. isa_model/training/factory.py +0 -424
  175. isa_model/training/intelligent/__init__.py +0 -25
  176. isa_model/training/intelligent/decision_engine.py +0 -643
  177. isa_model/training/intelligent/intelligent_factory.py +0 -888
  178. isa_model/training/intelligent/knowledge_base.py +0 -751
  179. isa_model/training/intelligent/resource_optimizer.py +0 -839
  180. isa_model/training/intelligent/task_classifier.py +0 -576
  181. isa_model/training/storage/__init__.py +0 -24
  182. isa_model/training/storage/core_integration.py +0 -439
  183. isa_model/training/storage/training_repository.py +0 -552
  184. isa_model/training/storage/training_storage.py +0 -628
  185. isa_model-0.4.0.dist-info/RECORD +0 -182
  186. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  187. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  188. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  189. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  190. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  191. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  192. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  193. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  194. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  195. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  196. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  197. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  198. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  199. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,430 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Deployment Billing Tracker - Specialized billing for deployment and training operations
6
+
7
+ Extends the core ModelBillingTracker with deployment-specific metrics:
8
+ - GPU runtime hours
9
+ - Instance type costs
10
+ - Training epochs/steps billing
11
+ - Deployment lifecycle costs
12
+ """
13
+
14
+ from typing import Dict, List, Optional, Any, Union
15
+ from datetime import datetime, timezone, timedelta
16
+ from dataclasses import dataclass, asdict
17
+ import json
18
+ import logging
19
+ from enum import Enum
20
+ from .model_billing_tracker import ModelBillingTracker, ModelUsageRecord, ModelOperationType
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+ class DeploymentProvider(Enum):
25
+ """Deployment providers"""
26
+ MODAL = "modal"
27
+ TRITON_LOCAL = "triton_local"
28
+ TRITON_CLOUD = "triton_cloud"
29
+ RUNPOD = "runpod"
30
+ LAMBDA_LABS = "lambda_labs"
31
+ COREWEAVE = "coreweave"
32
+
33
+ class GPUType(Enum):
34
+ """GPU types for cost calculation"""
35
+ RTX_4090 = "rtx_4090"
36
+ RTX_A6000 = "rtx_a6000"
37
+ A100_40GB = "a100_40gb"
38
+ A100_80GB = "a100_80gb"
39
+ H100 = "h100"
40
+ T4 = "t4"
41
+ V100 = "v100"
42
+
43
+ @dataclass
44
+ class DeploymentUsageRecord(ModelUsageRecord):
45
+ """Extended usage record for deployment operations"""
46
+ # GPU/Infrastructure metrics
47
+ gpu_type: Optional[str] = None
48
+ gpu_count: Optional[int] = None
49
+ runtime_hours: Optional[float] = None
50
+ cpu_cores: Optional[int] = None
51
+ memory_gb: Optional[int] = None
52
+
53
+ # Training-specific metrics
54
+ training_epochs: Optional[int] = None
55
+ training_steps: Optional[int] = None
56
+ dataset_size: Optional[int] = None
57
+
58
+ # Deployment-specific metrics
59
+ deployment_duration_hours: Optional[float] = None
60
+ requests_served: Optional[int] = None
61
+ avg_latency_ms: Optional[float] = None
62
+
63
+ # Infrastructure costs
64
+ compute_cost_usd: Optional[float] = None
65
+ storage_cost_usd: Optional[float] = None
66
+ network_cost_usd: Optional[float] = None
67
+
68
+ class DeploymentBillingTracker(ModelBillingTracker):
69
+ """
70
+ Specialized billing tracker for deployment and training operations
71
+
72
+ Extends ModelBillingTracker with deployment-specific cost calculations
73
+ and metrics tracking for GPU-based operations.
74
+ """
75
+
76
+ def __init__(self, model_registry=None, storage_path: Optional[str] = None):
77
+ super().__init__(model_registry, storage_path)
78
+
79
+ # Load pricing data for deployment providers
80
+ self.pricing_data = self._load_deployment_pricing()
81
+
82
+ def _load_deployment_pricing(self) -> Dict[str, Dict[str, float]]:
83
+ """Load pricing data for different deployment providers and GPU types"""
84
+ return {
85
+ "modal": {
86
+ "t4": 0.50, # $/hour
87
+ "rtx_4090": 0.80,
88
+ "a100_40gb": 2.50,
89
+ "a100_80gb": 4.00,
90
+ "h100": 8.00,
91
+ "base_compute": 0.10 # $/hour base compute
92
+ },
93
+ "triton_local": {
94
+ "electricity": 0.12, # $/kWh
95
+ "gpu_tdp": {
96
+ "rtx_4090": 450, # Watts
97
+ "a100_40gb": 400,
98
+ "a100_80gb": 400,
99
+ "h100": 700
100
+ }
101
+ },
102
+ "runpod": {
103
+ "rtx_4090": 0.44,
104
+ "rtx_a6000": 0.79,
105
+ "a100_40gb": 1.69,
106
+ "a100_80gb": 2.89,
107
+ "h100": 4.89
108
+ },
109
+ "lambda_labs": {
110
+ "rtx_4090": 0.50,
111
+ "a100_40gb": 1.50,
112
+ "a100_80gb": 2.50,
113
+ "h100": 4.50
114
+ },
115
+ "coreweave": {
116
+ "rtx_4090": 0.57,
117
+ "a100_40gb": 2.06,
118
+ "a100_80gb": 2.23,
119
+ "h100": 4.76
120
+ }
121
+ }
122
+
123
+ def track_deployment_usage(
124
+ self,
125
+ model_id: str,
126
+ provider: Union[str, DeploymentProvider],
127
+ operation_type: Union[str, ModelOperationType],
128
+ service_type: str,
129
+ operation: str,
130
+
131
+ # GPU/Infrastructure metrics
132
+ gpu_type: Optional[Union[str, GPUType]] = None,
133
+ gpu_count: Optional[int] = None,
134
+ runtime_hours: Optional[float] = None,
135
+ cpu_cores: Optional[int] = None,
136
+ memory_gb: Optional[int] = None,
137
+
138
+ # Training-specific
139
+ training_epochs: Optional[int] = None,
140
+ training_steps: Optional[int] = None,
141
+ dataset_size: Optional[int] = None,
142
+
143
+ # Deployment-specific
144
+ deployment_duration_hours: Optional[float] = None,
145
+ requests_served: Optional[int] = None,
146
+ avg_latency_ms: Optional[float] = None,
147
+
148
+ # Standard billing
149
+ input_tokens: Optional[int] = None,
150
+ output_tokens: Optional[int] = None,
151
+ cost_usd: Optional[float] = None,
152
+ metadata: Optional[Dict[str, Any]] = None
153
+ ) -> DeploymentUsageRecord:
154
+ """
155
+ Track deployment/training usage with specialized metrics
156
+
157
+ Args:
158
+ model_id: Model identifier
159
+ provider: Deployment provider
160
+ operation_type: Type of operation (training, deployment, inference)
161
+ service_type: Service type (llm, vision, etc.)
162
+ operation: Specific operation
163
+ gpu_type: Type of GPU used
164
+ gpu_count: Number of GPUs
165
+ runtime_hours: Hours of runtime
166
+ training_epochs: Number of training epochs
167
+ deployment_duration_hours: Hours deployment was active
168
+ ... (other parameters as documented)
169
+
170
+ Returns:
171
+ DeploymentUsageRecord with calculated costs
172
+ """
173
+ # Convert enums to strings
174
+ if isinstance(provider, DeploymentProvider):
175
+ provider = provider.value
176
+ if isinstance(operation_type, ModelOperationType):
177
+ operation_type = operation_type.value
178
+ if isinstance(gpu_type, GPUType):
179
+ gpu_type = gpu_type.value
180
+
181
+ # Calculate deployment-specific costs
182
+ if cost_usd is None:
183
+ cost_breakdown = self._calculate_deployment_cost(
184
+ provider, gpu_type, gpu_count, runtime_hours,
185
+ deployment_duration_hours, training_epochs, training_steps
186
+ )
187
+ cost_usd = cost_breakdown["total_cost"]
188
+ compute_cost = cost_breakdown["compute_cost"]
189
+ storage_cost = cost_breakdown["storage_cost"]
190
+ network_cost = cost_breakdown["network_cost"]
191
+ else:
192
+ compute_cost = cost_usd # If provided, assume it's compute cost
193
+ storage_cost = 0.0
194
+ network_cost = 0.0
195
+
196
+ # Create deployment usage record
197
+ record = DeploymentUsageRecord(
198
+ timestamp=datetime.now(timezone.utc).isoformat(),
199
+ model_id=model_id,
200
+ operation_type=operation_type,
201
+ provider=provider,
202
+ service_type=service_type,
203
+ operation=operation,
204
+ input_tokens=input_tokens,
205
+ output_tokens=output_tokens,
206
+ total_tokens=(input_tokens or 0) + (output_tokens or 0) if input_tokens or output_tokens else None,
207
+ cost_usd=cost_usd,
208
+ metadata=metadata or {},
209
+
210
+ # Deployment-specific fields
211
+ gpu_type=gpu_type,
212
+ gpu_count=gpu_count,
213
+ runtime_hours=runtime_hours,
214
+ cpu_cores=cpu_cores,
215
+ memory_gb=memory_gb,
216
+ training_epochs=training_epochs,
217
+ training_steps=training_steps,
218
+ dataset_size=dataset_size,
219
+ deployment_duration_hours=deployment_duration_hours,
220
+ requests_served=requests_served,
221
+ avg_latency_ms=avg_latency_ms,
222
+ compute_cost_usd=compute_cost,
223
+ storage_cost_usd=storage_cost,
224
+ network_cost_usd=network_cost
225
+ )
226
+
227
+ # Add to records and save
228
+ self.usage_records.append(record)
229
+ self._save_data()
230
+
231
+ logger.info(f"Tracked deployment usage: {model_id} - {provider} - {gpu_type} - ${cost_usd:.4f}")
232
+ return record
233
+
234
+ def _calculate_deployment_cost(
235
+ self,
236
+ provider: str,
237
+ gpu_type: Optional[str],
238
+ gpu_count: Optional[int],
239
+ runtime_hours: Optional[float],
240
+ deployment_duration_hours: Optional[float],
241
+ training_epochs: Optional[int],
242
+ training_steps: Optional[int]
243
+ ) -> Dict[str, float]:
244
+ """Calculate deployment costs based on provider and usage"""
245
+
246
+ gpu_count = gpu_count or 1
247
+ runtime_hours = runtime_hours or deployment_duration_hours or 1.0
248
+
249
+ compute_cost = 0.0
250
+ storage_cost = 0.0
251
+ network_cost = 0.0
252
+
253
+ try:
254
+ if provider in self.pricing_data:
255
+ pricing = self.pricing_data[provider]
256
+
257
+ if provider == "modal":
258
+ # Modal pricing: per-GPU hourly rate
259
+ if gpu_type and gpu_type in pricing:
260
+ compute_cost = pricing[gpu_type] * gpu_count * runtime_hours
261
+ else:
262
+ compute_cost = pricing.get("base_compute", 0.10) * runtime_hours
263
+
264
+ elif provider == "triton_local":
265
+ # Local deployment: electricity costs
266
+ if gpu_type and gpu_type in pricing["gpu_tdp"]:
267
+ power_watts = pricing["gpu_tdp"][gpu_type] * gpu_count
268
+ kwh_used = (power_watts / 1000) * runtime_hours
269
+ compute_cost = kwh_used * pricing["electricity"]
270
+
271
+ elif provider in ["runpod", "lambda_labs", "coreweave"]:
272
+ # Cloud GPU providers: per-GPU hourly rates
273
+ if gpu_type and gpu_type in pricing:
274
+ compute_cost = pricing[gpu_type] * gpu_count * runtime_hours
275
+
276
+ # Add storage costs (simplified)
277
+ storage_cost = runtime_hours * 0.01 # $0.01/hour for storage
278
+
279
+ # Add network costs for training (data transfer)
280
+ if training_epochs and training_epochs > 0:
281
+ network_cost = training_epochs * 0.05 # $0.05 per epoch for data
282
+
283
+ except Exception as e:
284
+ logger.error(f"Error calculating deployment cost: {e}")
285
+ compute_cost = 0.0
286
+
287
+ total_cost = compute_cost + storage_cost + network_cost
288
+
289
+ return {
290
+ "total_cost": round(total_cost, 6),
291
+ "compute_cost": round(compute_cost, 6),
292
+ "storage_cost": round(storage_cost, 6),
293
+ "network_cost": round(network_cost, 6)
294
+ }
295
+
296
+ def estimate_deployment_cost(
297
+ self,
298
+ provider: str,
299
+ gpu_type: str,
300
+ gpu_count: int = 1,
301
+ estimated_hours: float = 1.0,
302
+ operation_type: str = "deployment"
303
+ ) -> Dict[str, float]:
304
+ """
305
+ Estimate deployment costs before starting deployment
306
+
307
+ Args:
308
+ provider: Deployment provider
309
+ gpu_type: GPU type to use
310
+ gpu_count: Number of GPUs
311
+ estimated_hours: Estimated runtime hours
312
+ operation_type: Type of operation
313
+
314
+ Returns:
315
+ Cost breakdown dictionary
316
+ """
317
+ return self._calculate_deployment_cost(
318
+ provider, gpu_type, gpu_count, estimated_hours,
319
+ estimated_hours, None, None
320
+ )
321
+
322
+ def get_deployment_summary(
323
+ self,
324
+ start_date: Optional[datetime] = None,
325
+ end_date: Optional[datetime] = None,
326
+ provider: Optional[str] = None,
327
+ gpu_type: Optional[str] = None
328
+ ) -> Dict[str, Any]:
329
+ """Get deployment cost summary with filters"""
330
+
331
+ # Filter records
332
+ filtered_records = []
333
+ for record in self.usage_records:
334
+ # Check if it's a deployment record
335
+ if not isinstance(record, DeploymentUsageRecord):
336
+ continue
337
+
338
+ # Apply filters
339
+ if start_date and datetime.fromisoformat(record.timestamp.replace('Z', '+00:00')) < start_date:
340
+ continue
341
+ if end_date and datetime.fromisoformat(record.timestamp.replace('Z', '+00:00')) > end_date:
342
+ continue
343
+ if provider and record.provider != provider:
344
+ continue
345
+ if gpu_type and record.gpu_type != gpu_type:
346
+ continue
347
+
348
+ filtered_records.append(record)
349
+
350
+ if not filtered_records:
351
+ return {
352
+ "total_cost": 0.0,
353
+ "total_gpu_hours": 0.0,
354
+ "deployments": 0,
355
+ "by_provider": {},
356
+ "by_gpu_type": {},
357
+ "by_operation": {}
358
+ }
359
+
360
+ # Calculate summary
361
+ total_cost = sum(record.cost_usd or 0 for record in filtered_records)
362
+ total_gpu_hours = sum((record.runtime_hours or 0) * (record.gpu_count or 1) for record in filtered_records)
363
+ total_deployments = len(filtered_records)
364
+
365
+ # Group by provider
366
+ by_provider = {}
367
+ for record in filtered_records:
368
+ if record.provider not in by_provider:
369
+ by_provider[record.provider] = {"cost": 0.0, "gpu_hours": 0.0, "count": 0}
370
+ by_provider[record.provider]["cost"] += record.cost_usd or 0
371
+ by_provider[record.provider]["gpu_hours"] += (record.runtime_hours or 0) * (record.gpu_count or 1)
372
+ by_provider[record.provider]["count"] += 1
373
+
374
+ # Group by GPU type
375
+ by_gpu_type = {}
376
+ for record in filtered_records:
377
+ gpu = record.gpu_type or "unknown"
378
+ if gpu not in by_gpu_type:
379
+ by_gpu_type[gpu] = {"cost": 0.0, "gpu_hours": 0.0, "count": 0}
380
+ by_gpu_type[gpu]["cost"] += record.cost_usd or 0
381
+ by_gpu_type[gpu]["gpu_hours"] += (record.runtime_hours or 0) * (record.gpu_count or 1)
382
+ by_gpu_type[gpu]["count"] += 1
383
+
384
+ # Group by operation
385
+ by_operation = {}
386
+ for record in filtered_records:
387
+ op = record.operation_type
388
+ if op not in by_operation:
389
+ by_operation[op] = {"cost": 0.0, "gpu_hours": 0.0, "count": 0}
390
+ by_operation[op]["cost"] += record.cost_usd or 0
391
+ by_operation[op]["gpu_hours"] += (record.runtime_hours or 0) * (record.gpu_count or 1)
392
+ by_operation[op]["count"] += 1
393
+
394
+ return {
395
+ "total_cost": round(total_cost, 6),
396
+ "total_gpu_hours": round(total_gpu_hours, 2),
397
+ "deployments": total_deployments,
398
+ "avg_cost_per_deployment": round(total_cost / total_deployments, 6) if total_deployments > 0 else 0,
399
+ "avg_cost_per_gpu_hour": round(total_cost / total_gpu_hours, 6) if total_gpu_hours > 0 else 0,
400
+ "by_provider": by_provider,
401
+ "by_gpu_type": by_gpu_type,
402
+ "by_operation": by_operation,
403
+ "period": {
404
+ "start": filtered_records[0].timestamp if filtered_records else None,
405
+ "end": filtered_records[-1].timestamp if filtered_records else None
406
+ }
407
+ }
408
+
409
+ # Global deployment billing tracker instance
410
+ _global_deployment_tracker: Optional[DeploymentBillingTracker] = None
411
+
412
+ def get_deployment_billing_tracker() -> DeploymentBillingTracker:
413
+ """Get the global deployment billing tracker instance"""
414
+ global _global_deployment_tracker
415
+ if _global_deployment_tracker is None:
416
+ try:
417
+ from .model_repo import ModelRegistry
418
+ registry = ModelRegistry()
419
+ _global_deployment_tracker = DeploymentBillingTracker(model_registry=registry)
420
+ except Exception:
421
+ _global_deployment_tracker = DeploymentBillingTracker()
422
+ return _global_deployment_tracker
423
+
424
+ def track_deployment_usage(**kwargs) -> DeploymentUsageRecord:
425
+ """Convenience function to track deployment usage"""
426
+ return get_deployment_billing_tracker().track_deployment_usage(**kwargs)
427
+
428
+ def estimate_deployment_cost(**kwargs) -> Dict[str, float]:
429
+ """Convenience function to estimate deployment cost"""
430
+ return get_deployment_billing_tracker().estimate_deployment_cost(**kwargs)
@@ -37,20 +37,43 @@ class ModelManager:
37
37
  self.config_manager = config_manager or ConfigManager()
38
38
 
39
39
  def get_model_pricing(self, provider: str, model_name: str) -> Dict[str, float]:
40
- """获取模型定价信息"""
40
+ """获取模型定价信息(从数据库)"""
41
41
  try:
42
- models = self.config_manager.get_models_by_provider(provider)
43
- for model in models:
44
- if model.get("model_id") == model_name:
45
- metadata = model.get("metadata", {})
46
- if "cost_per_1000_tokens" in metadata:
47
- return {"input": metadata["cost_per_1000_tokens"], "output": metadata["cost_per_1000_tokens"]}
48
- elif "cost_per_minute" in metadata:
49
- return {"input": metadata["cost_per_minute"], "output": 0.0}
50
- elif "cost_per_1000_chars" in metadata:
51
- return {"input": metadata["cost_per_1000_chars"], "output": 0.0}
42
+ if not self.registry or not hasattr(self.registry, 'supabase_client'):
43
+ logger.warning("No database connection for pricing lookup")
44
+ return {"input": 0.0, "output": 0.0}
45
+
46
+ # 查询统一定价表
47
+ result = self.registry.supabase_client.table('current_model_pricing').select('*').eq(
48
+ 'model_id', model_name
49
+ ).eq('provider', provider).execute()
50
+
51
+ if result.data and len(result.data) > 0:
52
+ pricing = result.data[0]
53
+
54
+ # 根据定价模型转换为统一格式
55
+ pricing_model = pricing.get('pricing_model')
56
+ unit_size = pricing.get('unit_size', 1)
57
+
58
+ if pricing_model == 'per_token':
59
+ # 转换为每个 token 的成本
60
+ input_cost = float(pricing.get('input_cost_per_unit', 0)) * unit_size
61
+ output_cost = float(pricing.get('output_cost_per_unit', 0)) * unit_size
62
+ elif pricing_model in ['per_character', 'per_minute', 'per_request']:
63
+ # 这些按原始单位计费
64
+ input_cost = float(pricing.get('input_cost_per_unit', 0))
65
+ output_cost = float(pricing.get('output_cost_per_unit', 0))
66
+ # 如果有基础请求费用,加到 input 成本中
67
+ if pricing.get('base_cost_per_request', 0) > 0:
68
+ input_cost += float(pricing.get('base_cost_per_request', 0))
69
+ else:
70
+ input_cost = output_cost = 0.0
71
+
72
+ return {"input": input_cost, "output": output_cost}
73
+
52
74
  except Exception as e:
53
75
  logger.warning(f"Failed to get pricing for {provider}/{model_name}: {e}")
76
+
54
77
  return {"input": 0.0, "output": 0.0}
55
78
 
56
79
  def calculate_cost(self, provider: str, model_name: str, input_tokens: int, output_tokens: int) -> float:
@@ -112,21 +135,21 @@ class ModelManager:
112
135
  logger.info(f"Downloading model {model_id} from {repo_id}")
113
136
  model_dir = Path(f"./models/temp/{model_id}")
114
137
  model_dir.mkdir(parents=True, exist_ok=True)
115
-
138
+
116
139
  snapshot_download(
117
140
  repo_id=repo_id,
118
141
  revision=revision,
119
142
  local_dir=model_dir,
120
143
  local_dir_use_symlinks=False
121
144
  )
122
-
145
+
123
146
  # Save model and metadata
124
147
  metadata = {
125
148
  "repo_id": repo_id,
126
149
  "revision": revision,
127
150
  "downloaded_at": str(Path(model_dir).stat().st_mtime)
128
151
  }
129
-
152
+
130
153
  # Register model
131
154
  self.registry.register_model(
132
155
  model_id=model_id,
@@ -134,12 +157,12 @@ class ModelManager:
134
157
  capabilities=capabilities,
135
158
  metadata=metadata
136
159
  )
137
-
160
+
138
161
  # Save model files
139
162
  await self.storage.save_model(model_id, str(model_dir), metadata)
140
-
163
+
141
164
  return await self.storage.load_model(model_id)
142
-
165
+
143
166
  except HfHubHTTPError as e:
144
167
  logger.error(f"Failed to download model {model_id}: {e}")
145
168
  return None