isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +35 -80
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/types.py +1 -0
  26. isa_model/deployment/__init__.py +5 -48
  27. isa_model/deployment/core/__init__.py +2 -31
  28. isa_model/deployment/core/deployment_manager.py +1278 -370
  29. isa_model/deployment/modal/__init__.py +8 -0
  30. isa_model/deployment/modal/config.py +136 -0
  31. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  32. isa_model/deployment/modal/services/__init__.py +3 -0
  33. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  34. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  35. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  36. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  37. isa_model/deployment/modal/services/video/__init__.py +1 -0
  38. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  39. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  40. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  41. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  42. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  43. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  44. isa_model/deployment/storage/__init__.py +5 -0
  45. isa_model/deployment/storage/deployment_repository.py +824 -0
  46. isa_model/deployment/triton/__init__.py +10 -0
  47. isa_model/deployment/triton/config.py +196 -0
  48. isa_model/deployment/triton/configs/__init__.py +1 -0
  49. isa_model/deployment/triton/provider.py +512 -0
  50. isa_model/deployment/triton/scripts/__init__.py +1 -0
  51. isa_model/deployment/triton/templates/__init__.py +1 -0
  52. isa_model/inference/__init__.py +47 -1
  53. isa_model/inference/ai_factory.py +137 -10
  54. isa_model/inference/legacy_services/__init__.py +21 -0
  55. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  56. isa_model/inference/legacy_services/model_service.py +573 -0
  57. isa_model/inference/legacy_services/model_serving.py +717 -0
  58. isa_model/inference/legacy_services/model_training.py +561 -0
  59. isa_model/inference/models/__init__.py +21 -0
  60. isa_model/inference/models/inference_config.py +551 -0
  61. isa_model/inference/models/inference_record.py +675 -0
  62. isa_model/inference/models/performance_models.py +714 -0
  63. isa_model/inference/repositories/__init__.py +9 -0
  64. isa_model/inference/repositories/inference_repository.py +828 -0
  65. isa_model/inference/services/audio/base_stt_service.py +184 -11
  66. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  67. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  68. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  69. isa_model/inference/services/llm/__init__.py +10 -2
  70. isa_model/inference/services/llm/base_llm_service.py +335 -24
  71. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  72. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  73. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  74. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  75. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  76. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  77. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  78. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  79. isa_model/inference/services/vision/__init__.py +22 -1
  80. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  81. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  82. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  83. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  84. isa_model/serving/api/cache_manager.py +245 -0
  85. isa_model/serving/api/dependencies/__init__.py +1 -0
  86. isa_model/serving/api/dependencies/auth.py +194 -0
  87. isa_model/serving/api/dependencies/database.py +139 -0
  88. isa_model/serving/api/error_handlers.py +284 -0
  89. isa_model/serving/api/fastapi_server.py +172 -22
  90. isa_model/serving/api/middleware/auth.py +8 -2
  91. isa_model/serving/api/middleware/security.py +23 -33
  92. isa_model/serving/api/middleware/tenant_context.py +414 -0
  93. isa_model/serving/api/routes/analytics.py +4 -1
  94. isa_model/serving/api/routes/config.py +645 -0
  95. isa_model/serving/api/routes/deployment_billing.py +315 -0
  96. isa_model/serving/api/routes/deployments.py +138 -2
  97. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  98. isa_model/serving/api/routes/health.py +32 -12
  99. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  100. isa_model/serving/api/routes/local_deployments.py +448 -0
  101. isa_model/serving/api/routes/tenants.py +575 -0
  102. isa_model/serving/api/routes/unified.py +680 -18
  103. isa_model/serving/api/routes/webhooks.py +479 -0
  104. isa_model/serving/api/startup.py +68 -54
  105. isa_model/utils/gpu_utils.py +311 -0
  106. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
  107. isa_model-0.4.4.dist-info/RECORD +180 -0
  108. isa_model/core/security/secrets.py +0 -358
  109. isa_model/core/storage/hf_storage.py +0 -419
  110. isa_model/core/storage/minio_storage.py +0 -0
  111. isa_model/deployment/cloud/__init__.py +0 -9
  112. isa_model/deployment/cloud/modal/__init__.py +0 -10
  113. isa_model/deployment/core/deployment_config.py +0 -356
  114. isa_model/deployment/core/isa_deployment_service.py +0 -401
  115. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  116. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  117. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  118. isa_model/deployment/runtime/deployed_service.py +0 -338
  119. isa_model/deployment/services/__init__.py +0 -9
  120. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  121. isa_model/deployment/services/model_service.py +0 -332
  122. isa_model/deployment/services/service_monitor.py +0 -356
  123. isa_model/deployment/services/service_registry.py +0 -527
  124. isa_model/eval/__init__.py +0 -92
  125. isa_model/eval/benchmarks/__init__.py +0 -27
  126. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  127. isa_model/eval/benchmarks.py +0 -701
  128. isa_model/eval/config/__init__.py +0 -10
  129. isa_model/eval/config/evaluation_config.py +0 -108
  130. isa_model/eval/evaluators/__init__.py +0 -24
  131. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  132. isa_model/eval/evaluators/base_evaluator.py +0 -503
  133. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  134. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  135. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  136. isa_model/eval/example_evaluation.py +0 -395
  137. isa_model/eval/factory.py +0 -798
  138. isa_model/eval/infrastructure/__init__.py +0 -24
  139. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  140. isa_model/eval/isa_benchmarks.py +0 -700
  141. isa_model/eval/isa_integration.py +0 -582
  142. isa_model/eval/metrics.py +0 -951
  143. isa_model/eval/tests/unit/test_basic.py +0 -396
  144. isa_model/serving/api/routes/evaluations.py +0 -579
  145. isa_model/training/__init__.py +0 -168
  146. isa_model/training/annotation/annotation_schema.py +0 -47
  147. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  148. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  149. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  150. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  151. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  152. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  153. isa_model/training/annotation/views/annotation_controller.py +0 -158
  154. isa_model/training/cloud/__init__.py +0 -22
  155. isa_model/training/cloud/job_orchestrator.py +0 -402
  156. isa_model/training/cloud/runpod_trainer.py +0 -454
  157. isa_model/training/cloud/storage_manager.py +0 -482
  158. isa_model/training/core/__init__.py +0 -26
  159. isa_model/training/core/config.py +0 -181
  160. isa_model/training/core/dataset.py +0 -222
  161. isa_model/training/core/trainer.py +0 -720
  162. isa_model/training/core/utils.py +0 -213
  163. isa_model/training/examples/intelligent_training_example.py +0 -281
  164. isa_model/training/factory.py +0 -424
  165. isa_model/training/intelligent/__init__.py +0 -25
  166. isa_model/training/intelligent/decision_engine.py +0 -643
  167. isa_model/training/intelligent/intelligent_factory.py +0 -888
  168. isa_model/training/intelligent/knowledge_base.py +0 -751
  169. isa_model/training/intelligent/resource_optimizer.py +0 -839
  170. isa_model/training/intelligent/task_classifier.py +0 -576
  171. isa_model/training/storage/__init__.py +0 -24
  172. isa_model/training/storage/core_integration.py +0 -439
  173. isa_model/training/storage/training_repository.py +0 -552
  174. isa_model/training/storage/training_storage.py +0 -628
  175. isa_model-0.4.0.dist-info/RECORD +0 -182
  176. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  177. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  178. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  179. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  180. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  181. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  182. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  183. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  184. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  185. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  186. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  187. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  188. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
  189. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,430 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Deployment Billing Tracker - Specialized billing for deployment and training operations
6
+
7
+ Extends the core ModelBillingTracker with deployment-specific metrics:
8
+ - GPU runtime hours
9
+ - Instance type costs
10
+ - Training epochs/steps billing
11
+ - Deployment lifecycle costs
12
+ """
13
+
14
+ from typing import Dict, List, Optional, Any, Union
15
+ from datetime import datetime, timezone, timedelta
16
+ from dataclasses import dataclass, asdict
17
+ import json
18
+ import logging
19
+ from enum import Enum
20
+ from .model_billing_tracker import ModelBillingTracker, ModelUsageRecord, ModelOperationType
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+ class DeploymentProvider(Enum):
25
+ """Deployment providers"""
26
+ MODAL = "modal"
27
+ TRITON_LOCAL = "triton_local"
28
+ TRITON_CLOUD = "triton_cloud"
29
+ RUNPOD = "runpod"
30
+ LAMBDA_LABS = "lambda_labs"
31
+ COREWEAVE = "coreweave"
32
+
33
+ class GPUType(Enum):
34
+ """GPU types for cost calculation"""
35
+ RTX_4090 = "rtx_4090"
36
+ RTX_A6000 = "rtx_a6000"
37
+ A100_40GB = "a100_40gb"
38
+ A100_80GB = "a100_80gb"
39
+ H100 = "h100"
40
+ T4 = "t4"
41
+ V100 = "v100"
42
+
43
+ @dataclass
44
+ class DeploymentUsageRecord(ModelUsageRecord):
45
+ """Extended usage record for deployment operations"""
46
+ # GPU/Infrastructure metrics
47
+ gpu_type: Optional[str] = None
48
+ gpu_count: Optional[int] = None
49
+ runtime_hours: Optional[float] = None
50
+ cpu_cores: Optional[int] = None
51
+ memory_gb: Optional[int] = None
52
+
53
+ # Training-specific metrics
54
+ training_epochs: Optional[int] = None
55
+ training_steps: Optional[int] = None
56
+ dataset_size: Optional[int] = None
57
+
58
+ # Deployment-specific metrics
59
+ deployment_duration_hours: Optional[float] = None
60
+ requests_served: Optional[int] = None
61
+ avg_latency_ms: Optional[float] = None
62
+
63
+ # Infrastructure costs
64
+ compute_cost_usd: Optional[float] = None
65
+ storage_cost_usd: Optional[float] = None
66
+ network_cost_usd: Optional[float] = None
67
+
68
+ class DeploymentBillingTracker(ModelBillingTracker):
69
+ """
70
+ Specialized billing tracker for deployment and training operations
71
+
72
+ Extends ModelBillingTracker with deployment-specific cost calculations
73
+ and metrics tracking for GPU-based operations.
74
+ """
75
+
76
+ def __init__(self, model_registry=None, storage_path: Optional[str] = None):
77
+ super().__init__(model_registry, storage_path)
78
+
79
+ # Load pricing data for deployment providers
80
+ self.pricing_data = self._load_deployment_pricing()
81
+
82
+ def _load_deployment_pricing(self) -> Dict[str, Dict[str, float]]:
83
+ """Load pricing data for different deployment providers and GPU types"""
84
+ return {
85
+ "modal": {
86
+ "t4": 0.50, # $/hour
87
+ "rtx_4090": 0.80,
88
+ "a100_40gb": 2.50,
89
+ "a100_80gb": 4.00,
90
+ "h100": 8.00,
91
+ "base_compute": 0.10 # $/hour base compute
92
+ },
93
+ "triton_local": {
94
+ "electricity": 0.12, # $/kWh
95
+ "gpu_tdp": {
96
+ "rtx_4090": 450, # Watts
97
+ "a100_40gb": 400,
98
+ "a100_80gb": 400,
99
+ "h100": 700
100
+ }
101
+ },
102
+ "runpod": {
103
+ "rtx_4090": 0.44,
104
+ "rtx_a6000": 0.79,
105
+ "a100_40gb": 1.69,
106
+ "a100_80gb": 2.89,
107
+ "h100": 4.89
108
+ },
109
+ "lambda_labs": {
110
+ "rtx_4090": 0.50,
111
+ "a100_40gb": 1.50,
112
+ "a100_80gb": 2.50,
113
+ "h100": 4.50
114
+ },
115
+ "coreweave": {
116
+ "rtx_4090": 0.57,
117
+ "a100_40gb": 2.06,
118
+ "a100_80gb": 2.23,
119
+ "h100": 4.76
120
+ }
121
+ }
122
+
123
+ def track_deployment_usage(
124
+ self,
125
+ model_id: str,
126
+ provider: Union[str, DeploymentProvider],
127
+ operation_type: Union[str, ModelOperationType],
128
+ service_type: str,
129
+ operation: str,
130
+
131
+ # GPU/Infrastructure metrics
132
+ gpu_type: Optional[Union[str, GPUType]] = None,
133
+ gpu_count: Optional[int] = None,
134
+ runtime_hours: Optional[float] = None,
135
+ cpu_cores: Optional[int] = None,
136
+ memory_gb: Optional[int] = None,
137
+
138
+ # Training-specific
139
+ training_epochs: Optional[int] = None,
140
+ training_steps: Optional[int] = None,
141
+ dataset_size: Optional[int] = None,
142
+
143
+ # Deployment-specific
144
+ deployment_duration_hours: Optional[float] = None,
145
+ requests_served: Optional[int] = None,
146
+ avg_latency_ms: Optional[float] = None,
147
+
148
+ # Standard billing
149
+ input_tokens: Optional[int] = None,
150
+ output_tokens: Optional[int] = None,
151
+ cost_usd: Optional[float] = None,
152
+ metadata: Optional[Dict[str, Any]] = None
153
+ ) -> DeploymentUsageRecord:
154
+ """
155
+ Track deployment/training usage with specialized metrics
156
+
157
+ Args:
158
+ model_id: Model identifier
159
+ provider: Deployment provider
160
+ operation_type: Type of operation (training, deployment, inference)
161
+ service_type: Service type (llm, vision, etc.)
162
+ operation: Specific operation
163
+ gpu_type: Type of GPU used
164
+ gpu_count: Number of GPUs
165
+ runtime_hours: Hours of runtime
166
+ training_epochs: Number of training epochs
167
+ deployment_duration_hours: Hours deployment was active
168
+ ... (other parameters as documented)
169
+
170
+ Returns:
171
+ DeploymentUsageRecord with calculated costs
172
+ """
173
+ # Convert enums to strings
174
+ if isinstance(provider, DeploymentProvider):
175
+ provider = provider.value
176
+ if isinstance(operation_type, ModelOperationType):
177
+ operation_type = operation_type.value
178
+ if isinstance(gpu_type, GPUType):
179
+ gpu_type = gpu_type.value
180
+
181
+ # Calculate deployment-specific costs
182
+ if cost_usd is None:
183
+ cost_breakdown = self._calculate_deployment_cost(
184
+ provider, gpu_type, gpu_count, runtime_hours,
185
+ deployment_duration_hours, training_epochs, training_steps
186
+ )
187
+ cost_usd = cost_breakdown["total_cost"]
188
+ compute_cost = cost_breakdown["compute_cost"]
189
+ storage_cost = cost_breakdown["storage_cost"]
190
+ network_cost = cost_breakdown["network_cost"]
191
+ else:
192
+ compute_cost = cost_usd # If provided, assume it's compute cost
193
+ storage_cost = 0.0
194
+ network_cost = 0.0
195
+
196
+ # Create deployment usage record
197
+ record = DeploymentUsageRecord(
198
+ timestamp=datetime.now(timezone.utc).isoformat(),
199
+ model_id=model_id,
200
+ operation_type=operation_type,
201
+ provider=provider,
202
+ service_type=service_type,
203
+ operation=operation,
204
+ input_tokens=input_tokens,
205
+ output_tokens=output_tokens,
206
+ total_tokens=(input_tokens or 0) + (output_tokens or 0) if input_tokens or output_tokens else None,
207
+ cost_usd=cost_usd,
208
+ metadata=metadata or {},
209
+
210
+ # Deployment-specific fields
211
+ gpu_type=gpu_type,
212
+ gpu_count=gpu_count,
213
+ runtime_hours=runtime_hours,
214
+ cpu_cores=cpu_cores,
215
+ memory_gb=memory_gb,
216
+ training_epochs=training_epochs,
217
+ training_steps=training_steps,
218
+ dataset_size=dataset_size,
219
+ deployment_duration_hours=deployment_duration_hours,
220
+ requests_served=requests_served,
221
+ avg_latency_ms=avg_latency_ms,
222
+ compute_cost_usd=compute_cost,
223
+ storage_cost_usd=storage_cost,
224
+ network_cost_usd=network_cost
225
+ )
226
+
227
+ # Add to records and save
228
+ self.usage_records.append(record)
229
+ self._save_data()
230
+
231
+ logger.info(f"Tracked deployment usage: {model_id} - {provider} - {gpu_type} - ${cost_usd:.4f}")
232
+ return record
233
+
234
+ def _calculate_deployment_cost(
235
+ self,
236
+ provider: str,
237
+ gpu_type: Optional[str],
238
+ gpu_count: Optional[int],
239
+ runtime_hours: Optional[float],
240
+ deployment_duration_hours: Optional[float],
241
+ training_epochs: Optional[int],
242
+ training_steps: Optional[int]
243
+ ) -> Dict[str, float]:
244
+ """Calculate deployment costs based on provider and usage"""
245
+
246
+ gpu_count = gpu_count or 1
247
+ runtime_hours = runtime_hours or deployment_duration_hours or 1.0
248
+
249
+ compute_cost = 0.0
250
+ storage_cost = 0.0
251
+ network_cost = 0.0
252
+
253
+ try:
254
+ if provider in self.pricing_data:
255
+ pricing = self.pricing_data[provider]
256
+
257
+ if provider == "modal":
258
+ # Modal pricing: per-GPU hourly rate
259
+ if gpu_type and gpu_type in pricing:
260
+ compute_cost = pricing[gpu_type] * gpu_count * runtime_hours
261
+ else:
262
+ compute_cost = pricing.get("base_compute", 0.10) * runtime_hours
263
+
264
+ elif provider == "triton_local":
265
+ # Local deployment: electricity costs
266
+ if gpu_type and gpu_type in pricing["gpu_tdp"]:
267
+ power_watts = pricing["gpu_tdp"][gpu_type] * gpu_count
268
+ kwh_used = (power_watts / 1000) * runtime_hours
269
+ compute_cost = kwh_used * pricing["electricity"]
270
+
271
+ elif provider in ["runpod", "lambda_labs", "coreweave"]:
272
+ # Cloud GPU providers: per-GPU hourly rates
273
+ if gpu_type and gpu_type in pricing:
274
+ compute_cost = pricing[gpu_type] * gpu_count * runtime_hours
275
+
276
+ # Add storage costs (simplified)
277
+ storage_cost = runtime_hours * 0.01 # $0.01/hour for storage
278
+
279
+ # Add network costs for training (data transfer)
280
+ if training_epochs and training_epochs > 0:
281
+ network_cost = training_epochs * 0.05 # $0.05 per epoch for data
282
+
283
+ except Exception as e:
284
+ logger.error(f"Error calculating deployment cost: {e}")
285
+ compute_cost = 0.0
286
+
287
+ total_cost = compute_cost + storage_cost + network_cost
288
+
289
+ return {
290
+ "total_cost": round(total_cost, 6),
291
+ "compute_cost": round(compute_cost, 6),
292
+ "storage_cost": round(storage_cost, 6),
293
+ "network_cost": round(network_cost, 6)
294
+ }
295
+
296
+ def estimate_deployment_cost(
297
+ self,
298
+ provider: str,
299
+ gpu_type: str,
300
+ gpu_count: int = 1,
301
+ estimated_hours: float = 1.0,
302
+ operation_type: str = "deployment"
303
+ ) -> Dict[str, float]:
304
+ """
305
+ Estimate deployment costs before starting deployment
306
+
307
+ Args:
308
+ provider: Deployment provider
309
+ gpu_type: GPU type to use
310
+ gpu_count: Number of GPUs
311
+ estimated_hours: Estimated runtime hours
312
+ operation_type: Type of operation
313
+
314
+ Returns:
315
+ Cost breakdown dictionary
316
+ """
317
+ return self._calculate_deployment_cost(
318
+ provider, gpu_type, gpu_count, estimated_hours,
319
+ estimated_hours, None, None
320
+ )
321
+
322
+ def get_deployment_summary(
323
+ self,
324
+ start_date: Optional[datetime] = None,
325
+ end_date: Optional[datetime] = None,
326
+ provider: Optional[str] = None,
327
+ gpu_type: Optional[str] = None
328
+ ) -> Dict[str, Any]:
329
+ """Get deployment cost summary with filters"""
330
+
331
+ # Filter records
332
+ filtered_records = []
333
+ for record in self.usage_records:
334
+ # Check if it's a deployment record
335
+ if not isinstance(record, DeploymentUsageRecord):
336
+ continue
337
+
338
+ # Apply filters
339
+ if start_date and datetime.fromisoformat(record.timestamp.replace('Z', '+00:00')) < start_date:
340
+ continue
341
+ if end_date and datetime.fromisoformat(record.timestamp.replace('Z', '+00:00')) > end_date:
342
+ continue
343
+ if provider and record.provider != provider:
344
+ continue
345
+ if gpu_type and record.gpu_type != gpu_type:
346
+ continue
347
+
348
+ filtered_records.append(record)
349
+
350
+ if not filtered_records:
351
+ return {
352
+ "total_cost": 0.0,
353
+ "total_gpu_hours": 0.0,
354
+ "deployments": 0,
355
+ "by_provider": {},
356
+ "by_gpu_type": {},
357
+ "by_operation": {}
358
+ }
359
+
360
+ # Calculate summary
361
+ total_cost = sum(record.cost_usd or 0 for record in filtered_records)
362
+ total_gpu_hours = sum((record.runtime_hours or 0) * (record.gpu_count or 1) for record in filtered_records)
363
+ total_deployments = len(filtered_records)
364
+
365
+ # Group by provider
366
+ by_provider = {}
367
+ for record in filtered_records:
368
+ if record.provider not in by_provider:
369
+ by_provider[record.provider] = {"cost": 0.0, "gpu_hours": 0.0, "count": 0}
370
+ by_provider[record.provider]["cost"] += record.cost_usd or 0
371
+ by_provider[record.provider]["gpu_hours"] += (record.runtime_hours or 0) * (record.gpu_count or 1)
372
+ by_provider[record.provider]["count"] += 1
373
+
374
+ # Group by GPU type
375
+ by_gpu_type = {}
376
+ for record in filtered_records:
377
+ gpu = record.gpu_type or "unknown"
378
+ if gpu not in by_gpu_type:
379
+ by_gpu_type[gpu] = {"cost": 0.0, "gpu_hours": 0.0, "count": 0}
380
+ by_gpu_type[gpu]["cost"] += record.cost_usd or 0
381
+ by_gpu_type[gpu]["gpu_hours"] += (record.runtime_hours or 0) * (record.gpu_count or 1)
382
+ by_gpu_type[gpu]["count"] += 1
383
+
384
+ # Group by operation
385
+ by_operation = {}
386
+ for record in filtered_records:
387
+ op = record.operation_type
388
+ if op not in by_operation:
389
+ by_operation[op] = {"cost": 0.0, "gpu_hours": 0.0, "count": 0}
390
+ by_operation[op]["cost"] += record.cost_usd or 0
391
+ by_operation[op]["gpu_hours"] += (record.runtime_hours or 0) * (record.gpu_count or 1)
392
+ by_operation[op]["count"] += 1
393
+
394
+ return {
395
+ "total_cost": round(total_cost, 6),
396
+ "total_gpu_hours": round(total_gpu_hours, 2),
397
+ "deployments": total_deployments,
398
+ "avg_cost_per_deployment": round(total_cost / total_deployments, 6) if total_deployments > 0 else 0,
399
+ "avg_cost_per_gpu_hour": round(total_cost / total_gpu_hours, 6) if total_gpu_hours > 0 else 0,
400
+ "by_provider": by_provider,
401
+ "by_gpu_type": by_gpu_type,
402
+ "by_operation": by_operation,
403
+ "period": {
404
+ "start": filtered_records[0].timestamp if filtered_records else None,
405
+ "end": filtered_records[-1].timestamp if filtered_records else None
406
+ }
407
+ }
408
+
409
+ # Global deployment billing tracker instance
410
+ _global_deployment_tracker: Optional[DeploymentBillingTracker] = None
411
+
412
+ def get_deployment_billing_tracker() -> DeploymentBillingTracker:
413
+ """Get the global deployment billing tracker instance"""
414
+ global _global_deployment_tracker
415
+ if _global_deployment_tracker is None:
416
+ try:
417
+ from .model_repo import ModelRegistry
418
+ registry = ModelRegistry()
419
+ _global_deployment_tracker = DeploymentBillingTracker(model_registry=registry)
420
+ except Exception:
421
+ _global_deployment_tracker = DeploymentBillingTracker()
422
+ return _global_deployment_tracker
423
+
424
+ def track_deployment_usage(**kwargs) -> DeploymentUsageRecord:
425
+ """Convenience function to track deployment usage"""
426
+ return get_deployment_billing_tracker().track_deployment_usage(**kwargs)
427
+
428
+ def estimate_deployment_cost(**kwargs) -> Dict[str, float]:
429
+ """Convenience function to estimate deployment cost"""
430
+ return get_deployment_billing_tracker().estimate_deployment_cost(**kwargs)
@@ -2,8 +2,6 @@ from typing import Dict, Optional, List, Any
2
2
  import logging
3
3
  from pathlib import Path
4
4
  from datetime import datetime
5
- from huggingface_hub import hf_hub_download, snapshot_download
6
- from huggingface_hub.errors import HfHubHTTPError
7
5
  from .model_storage import ModelStorage, LocalModelStorage
8
6
  from .model_repo import ModelRegistry, ModelType, ModelCapability
9
7
  from .model_billing_tracker import ModelBillingTracker, ModelOperationType
@@ -37,20 +35,43 @@ class ModelManager:
37
35
  self.config_manager = config_manager or ConfigManager()
38
36
 
39
37
  def get_model_pricing(self, provider: str, model_name: str) -> Dict[str, float]:
40
- """获取模型定价信息"""
38
+ """获取模型定价信息(从数据库)"""
41
39
  try:
42
- models = self.config_manager.get_models_by_provider(provider)
43
- for model in models:
44
- if model.get("model_id") == model_name:
45
- metadata = model.get("metadata", {})
46
- if "cost_per_1000_tokens" in metadata:
47
- return {"input": metadata["cost_per_1000_tokens"], "output": metadata["cost_per_1000_tokens"]}
48
- elif "cost_per_minute" in metadata:
49
- return {"input": metadata["cost_per_minute"], "output": 0.0}
50
- elif "cost_per_1000_chars" in metadata:
51
- return {"input": metadata["cost_per_1000_chars"], "output": 0.0}
40
+ if not self.registry or not hasattr(self.registry, 'supabase_client'):
41
+ logger.warning("No database connection for pricing lookup")
42
+ return {"input": 0.0, "output": 0.0}
43
+
44
+ # 查询统一定价表
45
+ result = self.registry.supabase_client.table('current_model_pricing').select('*').eq(
46
+ 'model_id', model_name
47
+ ).eq('provider', provider).execute()
48
+
49
+ if result.data and len(result.data) > 0:
50
+ pricing = result.data[0]
51
+
52
+ # 根据定价模型转换为统一格式
53
+ pricing_model = pricing.get('pricing_model')
54
+ unit_size = pricing.get('unit_size', 1)
55
+
56
+ if pricing_model == 'per_token':
57
+ # 转换为每个 token 的成本
58
+ input_cost = float(pricing.get('input_cost_per_unit', 0)) * unit_size
59
+ output_cost = float(pricing.get('output_cost_per_unit', 0)) * unit_size
60
+ elif pricing_model in ['per_character', 'per_minute', 'per_request']:
61
+ # 这些按原始单位计费
62
+ input_cost = float(pricing.get('input_cost_per_unit', 0))
63
+ output_cost = float(pricing.get('output_cost_per_unit', 0))
64
+ # 如果有基础请求费用,加到 input 成本中
65
+ if pricing.get('base_cost_per_request', 0) > 0:
66
+ input_cost += float(pricing.get('base_cost_per_request', 0))
67
+ else:
68
+ input_cost = output_cost = 0.0
69
+
70
+ return {"input": input_cost, "output": output_cost}
71
+
52
72
  except Exception as e:
53
73
  logger.warning(f"Failed to get pricing for {provider}/{model_name}: {e}")
74
+
54
75
  return {"input": 0.0, "output": 0.0}
55
76
 
56
77
  def calculate_cost(self, provider: str, model_name: str, input_tokens: int, output_tokens: int) -> float:
@@ -79,73 +100,7 @@ class ModelManager:
79
100
  logger.warning(f"Failed to find cheapest model for {provider}: {e}")
80
101
  return None
81
102
 
82
- async def get_model(self,
83
- model_id: str,
84
- repo_id: str,
85
- model_type: ModelType,
86
- capabilities: List[ModelCapability],
87
- revision: Optional[str] = None,
88
- force_download: bool = False) -> Optional[Path]:
89
- """
90
- Get model files, downloading if necessary
91
-
92
- Args:
93
- model_id: Unique identifier for the model
94
- repo_id: Hugging Face repository ID
95
- model_type: Type of model (LLM, embedding, etc.)
96
- capabilities: List of model capabilities
97
- revision: Specific model version/tag
98
- force_download: Force re-download even if cached
99
-
100
- Returns:
101
- Path to the model files or None if failed
102
- """
103
- # Check if model is already downloaded
104
- if not force_download:
105
- model_path = await self.storage.load_model(model_id)
106
- if model_path:
107
- logger.info(f"Using cached model {model_id}")
108
- return model_path
109
-
110
- try:
111
- # Download model files
112
- logger.info(f"Downloading model {model_id} from {repo_id}")
113
- model_dir = Path(f"./models/temp/{model_id}")
114
- model_dir.mkdir(parents=True, exist_ok=True)
115
-
116
- snapshot_download(
117
- repo_id=repo_id,
118
- revision=revision,
119
- local_dir=model_dir,
120
- local_dir_use_symlinks=False
121
- )
122
-
123
- # Save model and metadata
124
- metadata = {
125
- "repo_id": repo_id,
126
- "revision": revision,
127
- "downloaded_at": str(Path(model_dir).stat().st_mtime)
128
- }
129
-
130
- # Register model
131
- self.registry.register_model(
132
- model_id=model_id,
133
- model_type=model_type,
134
- capabilities=capabilities,
135
- metadata=metadata
136
- )
137
-
138
- # Save model files
139
- await self.storage.save_model(model_id, str(model_dir), metadata)
140
-
141
- return await self.storage.load_model(model_id)
142
-
143
- except HfHubHTTPError as e:
144
- logger.error(f"Failed to download model {model_id}: {e}")
145
- return None
146
- except Exception as e:
147
- logger.error(f"Unexpected error downloading model {model_id}: {e}")
148
- return None
103
+ # Local model download functionality removed - use cloud API services only
149
104
 
150
105
  async def list_models(self) -> List[Dict[str, Any]]:
151
106
  """List all downloaded models with their metadata"""