isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +35 -80
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/types.py +1 -0
  26. isa_model/deployment/__init__.py +5 -48
  27. isa_model/deployment/core/__init__.py +2 -31
  28. isa_model/deployment/core/deployment_manager.py +1278 -370
  29. isa_model/deployment/modal/__init__.py +8 -0
  30. isa_model/deployment/modal/config.py +136 -0
  31. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  32. isa_model/deployment/modal/services/__init__.py +3 -0
  33. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  34. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  35. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  36. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  37. isa_model/deployment/modal/services/video/__init__.py +1 -0
  38. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  39. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  40. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  41. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  42. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  43. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  44. isa_model/deployment/storage/__init__.py +5 -0
  45. isa_model/deployment/storage/deployment_repository.py +824 -0
  46. isa_model/deployment/triton/__init__.py +10 -0
  47. isa_model/deployment/triton/config.py +196 -0
  48. isa_model/deployment/triton/configs/__init__.py +1 -0
  49. isa_model/deployment/triton/provider.py +512 -0
  50. isa_model/deployment/triton/scripts/__init__.py +1 -0
  51. isa_model/deployment/triton/templates/__init__.py +1 -0
  52. isa_model/inference/__init__.py +47 -1
  53. isa_model/inference/ai_factory.py +137 -10
  54. isa_model/inference/legacy_services/__init__.py +21 -0
  55. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  56. isa_model/inference/legacy_services/model_service.py +573 -0
  57. isa_model/inference/legacy_services/model_serving.py +717 -0
  58. isa_model/inference/legacy_services/model_training.py +561 -0
  59. isa_model/inference/models/__init__.py +21 -0
  60. isa_model/inference/models/inference_config.py +551 -0
  61. isa_model/inference/models/inference_record.py +675 -0
  62. isa_model/inference/models/performance_models.py +714 -0
  63. isa_model/inference/repositories/__init__.py +9 -0
  64. isa_model/inference/repositories/inference_repository.py +828 -0
  65. isa_model/inference/services/audio/base_stt_service.py +184 -11
  66. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  67. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  68. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  69. isa_model/inference/services/llm/__init__.py +10 -2
  70. isa_model/inference/services/llm/base_llm_service.py +335 -24
  71. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  72. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  73. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  74. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  75. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  76. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  77. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  78. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  79. isa_model/inference/services/vision/__init__.py +22 -1
  80. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  81. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  82. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  83. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  84. isa_model/serving/api/cache_manager.py +245 -0
  85. isa_model/serving/api/dependencies/__init__.py +1 -0
  86. isa_model/serving/api/dependencies/auth.py +194 -0
  87. isa_model/serving/api/dependencies/database.py +139 -0
  88. isa_model/serving/api/error_handlers.py +284 -0
  89. isa_model/serving/api/fastapi_server.py +172 -22
  90. isa_model/serving/api/middleware/auth.py +8 -2
  91. isa_model/serving/api/middleware/security.py +23 -33
  92. isa_model/serving/api/middleware/tenant_context.py +414 -0
  93. isa_model/serving/api/routes/analytics.py +4 -1
  94. isa_model/serving/api/routes/config.py +645 -0
  95. isa_model/serving/api/routes/deployment_billing.py +315 -0
  96. isa_model/serving/api/routes/deployments.py +138 -2
  97. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  98. isa_model/serving/api/routes/health.py +32 -12
  99. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  100. isa_model/serving/api/routes/local_deployments.py +448 -0
  101. isa_model/serving/api/routes/tenants.py +575 -0
  102. isa_model/serving/api/routes/unified.py +680 -18
  103. isa_model/serving/api/routes/webhooks.py +479 -0
  104. isa_model/serving/api/startup.py +68 -54
  105. isa_model/utils/gpu_utils.py +311 -0
  106. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
  107. isa_model-0.4.4.dist-info/RECORD +180 -0
  108. isa_model/core/security/secrets.py +0 -358
  109. isa_model/core/storage/hf_storage.py +0 -419
  110. isa_model/core/storage/minio_storage.py +0 -0
  111. isa_model/deployment/cloud/__init__.py +0 -9
  112. isa_model/deployment/cloud/modal/__init__.py +0 -10
  113. isa_model/deployment/core/deployment_config.py +0 -356
  114. isa_model/deployment/core/isa_deployment_service.py +0 -401
  115. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  116. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  117. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  118. isa_model/deployment/runtime/deployed_service.py +0 -338
  119. isa_model/deployment/services/__init__.py +0 -9
  120. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  121. isa_model/deployment/services/model_service.py +0 -332
  122. isa_model/deployment/services/service_monitor.py +0 -356
  123. isa_model/deployment/services/service_registry.py +0 -527
  124. isa_model/eval/__init__.py +0 -92
  125. isa_model/eval/benchmarks/__init__.py +0 -27
  126. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  127. isa_model/eval/benchmarks.py +0 -701
  128. isa_model/eval/config/__init__.py +0 -10
  129. isa_model/eval/config/evaluation_config.py +0 -108
  130. isa_model/eval/evaluators/__init__.py +0 -24
  131. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  132. isa_model/eval/evaluators/base_evaluator.py +0 -503
  133. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  134. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  135. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  136. isa_model/eval/example_evaluation.py +0 -395
  137. isa_model/eval/factory.py +0 -798
  138. isa_model/eval/infrastructure/__init__.py +0 -24
  139. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  140. isa_model/eval/isa_benchmarks.py +0 -700
  141. isa_model/eval/isa_integration.py +0 -582
  142. isa_model/eval/metrics.py +0 -951
  143. isa_model/eval/tests/unit/test_basic.py +0 -396
  144. isa_model/serving/api/routes/evaluations.py +0 -579
  145. isa_model/training/__init__.py +0 -168
  146. isa_model/training/annotation/annotation_schema.py +0 -47
  147. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  148. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  149. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  150. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  151. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  152. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  153. isa_model/training/annotation/views/annotation_controller.py +0 -158
  154. isa_model/training/cloud/__init__.py +0 -22
  155. isa_model/training/cloud/job_orchestrator.py +0 -402
  156. isa_model/training/cloud/runpod_trainer.py +0 -454
  157. isa_model/training/cloud/storage_manager.py +0 -482
  158. isa_model/training/core/__init__.py +0 -26
  159. isa_model/training/core/config.py +0 -181
  160. isa_model/training/core/dataset.py +0 -222
  161. isa_model/training/core/trainer.py +0 -720
  162. isa_model/training/core/utils.py +0 -213
  163. isa_model/training/examples/intelligent_training_example.py +0 -281
  164. isa_model/training/factory.py +0 -424
  165. isa_model/training/intelligent/__init__.py +0 -25
  166. isa_model/training/intelligent/decision_engine.py +0 -643
  167. isa_model/training/intelligent/intelligent_factory.py +0 -888
  168. isa_model/training/intelligent/knowledge_base.py +0 -751
  169. isa_model/training/intelligent/resource_optimizer.py +0 -839
  170. isa_model/training/intelligent/task_classifier.py +0 -576
  171. isa_model/training/storage/__init__.py +0 -24
  172. isa_model/training/storage/core_integration.py +0 -439
  173. isa_model/training/storage/training_repository.py +0 -552
  174. isa_model/training/storage/training_storage.py +0 -628
  175. isa_model-0.4.0.dist-info/RECORD +0 -182
  176. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  177. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  178. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  179. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  180. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  181. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  182. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  183. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  184. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  185. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  186. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  187. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  188. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
  189. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
@@ -1,839 +0,0 @@
1
- """
2
- Resource Optimization System for Training
3
-
4
- This module provides intelligent resource selection and cost optimization:
5
- - GPU type selection based on model requirements
6
- - Cloud provider comparison and selection
7
- - Cost estimation and budget optimization
8
- - Performance prediction and time estimation
9
- - Resource availability monitoring
10
-
11
- Optimizes for cost, performance, and availability based on user constraints.
12
- """
13
-
14
- import logging
15
- from typing import Dict, List, Optional, Any, Tuple
16
- from dataclasses import dataclass, field
17
- from datetime import datetime
18
- import json
19
- import os
20
-
21
- logger = logging.getLogger(__name__)
22
-
23
-
24
- @dataclass
25
- class GPUSpec:
26
- """GPU specification and characteristics."""
27
-
28
- name: str
29
- memory_gb: int
30
- compute_capability: float
31
-
32
- # Performance characteristics
33
- fp16_tflops: float
34
- fp32_tflops: float
35
- memory_bandwidth_gbps: float
36
-
37
- # Cost (per hour in USD)
38
- cost_per_hour: float = 0.0
39
-
40
- # Availability
41
- availability_score: float = 1.0 # 0.0 to 1.0
42
-
43
- # Provider information
44
- providers: List[str] = field(default_factory=list)
45
-
46
- # Training characteristics
47
- training_efficiency: float = 1.0 # Relative efficiency for training
48
- power_efficiency: float = 1.0 # Performance per watt
49
-
50
- # Metadata
51
- is_recommended: bool = False
52
- description: str = ""
53
-
54
-
55
- @dataclass
56
- class CloudProvider:
57
- """Cloud provider specification."""
58
-
59
- name: str
60
- regions: List[str]
61
-
62
- # Available GPU types
63
- available_gpus: List[str]
64
-
65
- # Pricing model
66
- pricing_model: str = "hourly" # "hourly", "spot", "reserved"
67
-
68
- # Features
69
- supports_spot_instances: bool = False
70
- supports_auto_scaling: bool = False
71
- supports_preemption: bool = False
72
-
73
- # Performance characteristics
74
- startup_time_minutes: float = 5.0
75
- network_performance: str = "standard" # "low", "standard", "high"
76
-
77
- # Reliability
78
- availability_score: float = 0.99
79
-
80
- # Additional costs
81
- storage_cost_per_gb_hour: float = 0.0
82
- egress_cost_per_gb: float = 0.0
83
-
84
- description: str = ""
85
-
86
-
87
- @dataclass
88
- class ResourceRecommendation:
89
- """Resource optimization recommendation."""
90
-
91
- # Selected resources
92
- gpu: str
93
- cloud_provider: str
94
- region: str
95
- instance_type: str
96
-
97
- # Cost estimates
98
- estimated_cost: float
99
- cost_breakdown: Dict[str, float]
100
-
101
- # Performance estimates
102
- estimated_time: float # hours
103
- performance_score: float
104
-
105
- # Configuration
106
- recommended_batch_size: int
107
- recommended_precision: str # "fp16", "fp32", "bf16"
108
-
109
- # Alternatives
110
- alternatives: List[Dict[str, Any]]
111
-
112
- # Reasoning
113
- decision_factors: List[str]
114
- confidence: float
115
-
116
- # Metadata
117
- created_at: datetime = field(default_factory=datetime.now)
118
-
119
-
120
- class ResourceOptimizer:
121
- """
122
- Intelligent resource optimization system.
123
-
124
- This class analyzes training requirements and recommends optimal resources:
125
- - GPU selection based on model size and requirements
126
- - Cloud provider comparison for cost and performance
127
- - Cost estimation and budget optimization
128
- - Performance prediction and time estimation
129
-
130
- Example:
131
- ```python
132
- optimizer = ResourceOptimizer()
133
-
134
- recommendation = optimizer.optimize_resources(
135
- model_name="google/gemma-2-7b-it",
136
- training_config=config,
137
- budget_limit=100.0,
138
- time_limit=8
139
- )
140
-
141
- print(f"Recommended: {recommendation.gpu} on {recommendation.cloud_provider}")
142
- print(f"Cost: ${recommendation.estimated_cost:.2f}")
143
- ```
144
- """
145
-
146
- def __init__(self, data_dir: Optional[str] = None):
147
- """
148
- Initialize resource optimizer.
149
-
150
- Args:
151
- data_dir: Directory for storing resource data
152
- """
153
- self.data_dir = data_dir or os.path.join(os.getcwd(), "resource_data")
154
- os.makedirs(self.data_dir, exist_ok=True)
155
-
156
- # Initialize resource databases
157
- self.gpus: Dict[str, GPUSpec] = {}
158
- self.cloud_providers: Dict[str, CloudProvider] = {}
159
- self.pricing_cache: Dict[str, Dict[str, float]] = {}
160
-
161
- # Load resource data
162
- self._load_resource_data()
163
-
164
- # Initialize with defaults if empty
165
- if not self.gpus:
166
- self._initialize_default_resources()
167
-
168
- logger.info(f"Resource optimizer initialized with {len(self.gpus)} GPUs and {len(self.cloud_providers)} providers")
169
-
170
- def optimize_resources(
171
- self,
172
- model_name: str,
173
- training_config: Any,
174
- budget_limit: Optional[float] = None,
175
- time_limit: Optional[int] = None,
176
- preferences: Optional[Dict[str, Any]] = None
177
- ) -> ResourceRecommendation:
178
- """
179
- Optimize resource selection for training requirements.
180
-
181
- Args:
182
- model_name: Name of the model to train
183
- training_config: Training configuration
184
- budget_limit: Maximum budget in USD
185
- time_limit: Maximum time in hours
186
- preferences: User preferences for GPU/cloud providers
187
-
188
- Returns:
189
- Optimal resource recommendation
190
- """
191
- preferences = preferences or {}
192
-
193
- logger.info(f"Optimizing resources for {model_name}")
194
-
195
- try:
196
- # Step 1: Analyze model requirements
197
- model_requirements = self._analyze_model_requirements(model_name, training_config)
198
-
199
- # Step 2: Filter compatible GPUs
200
- compatible_gpus = self._filter_compatible_gpus(model_requirements)
201
-
202
- # Step 3: Estimate costs and performance for each option
203
- gpu_options = []
204
- total_evaluated = 0
205
- total_filtered = 0
206
-
207
- for gpu_name in compatible_gpus:
208
- gpu_spec = self.gpus[gpu_name]
209
-
210
- # Get best provider for this GPU
211
- provider_options = self._get_provider_options(gpu_name, preferences)
212
-
213
- for provider_name, provider_spec, region, instance_type in provider_options:
214
- total_evaluated += 1
215
- option = self._evaluate_option(
216
- gpu_spec, provider_spec, region, instance_type,
217
- model_requirements, budget_limit, time_limit
218
- )
219
-
220
- if option:
221
- gpu_options.append(option)
222
- else:
223
- total_filtered += 1
224
-
225
- # Step 4: Rank options by overall score
226
- if not gpu_options:
227
- logger.warning(f"No compatible GPU options found. Evaluated {total_evaluated} options, {total_filtered} filtered by constraints.")
228
- logger.warning(f"Budget limit: {budget_limit}, Time limit: {time_limit}")
229
- raise ValueError("No compatible GPU options found")
230
-
231
- gpu_options.sort(key=lambda x: x["score"], reverse=True)
232
-
233
- # Step 5: Select best option
234
- best_option = gpu_options[0]
235
-
236
- # Step 6: Generate alternatives
237
- alternatives = self._generate_alternatives(gpu_options[1:5]) # Top 5 alternatives
238
-
239
- # Step 7: Create recommendation
240
- recommendation = ResourceRecommendation(
241
- gpu=best_option["gpu"],
242
- cloud_provider=best_option["provider"],
243
- region=best_option["region"],
244
- instance_type=best_option["instance_type"],
245
- estimated_cost=best_option["cost"],
246
- cost_breakdown=best_option["cost_breakdown"],
247
- estimated_time=best_option["time"],
248
- performance_score=best_option["performance"],
249
- recommended_batch_size=best_option["batch_size"],
250
- recommended_precision=best_option["precision"],
251
- alternatives=alternatives,
252
- decision_factors=best_option["reasons"],
253
- confidence=best_option["confidence"]
254
- )
255
-
256
- logger.info(f"Selected {recommendation.gpu} on {recommendation.cloud_provider} "
257
- f"(${recommendation.estimated_cost:.2f}, {recommendation.estimated_time:.1f}h)")
258
-
259
- return recommendation
260
-
261
- except Exception as e:
262
- logger.error(f"Resource optimization failed: {e}")
263
- raise
264
-
265
- def _analyze_model_requirements(self, model_name: str, training_config: Any) -> Dict[str, Any]:
266
- """Analyze model resource requirements."""
267
- requirements = {
268
- "min_memory_gb": 8,
269
- "recommended_memory_gb": 16,
270
- "compute_intensity": "medium", # "low", "medium", "high"
271
- "precision": "fp16",
272
- "batch_size": getattr(training_config, 'batch_size', 4),
273
- "sequence_length": 1024,
274
- "model_size_gb": 4.0,
275
- "training_type": getattr(training_config, 'training_type', 'sft')
276
- }
277
-
278
- # Estimate model size and requirements based on name
279
- if "2b" in model_name.lower():
280
- requirements.update({
281
- "min_memory_gb": 6, # Reduced for LoRA training
282
- "recommended_memory_gb": 10,
283
- "model_size_gb": 4.0,
284
- "compute_intensity": "medium"
285
- })
286
- elif "4b" in model_name.lower():
287
- requirements.update({
288
- "min_memory_gb": 8, # Reduced for LoRA training
289
- "recommended_memory_gb": 12,
290
- "model_size_gb": 8.0,
291
- "compute_intensity": "medium"
292
- })
293
- elif "7b" in model_name.lower():
294
- requirements.update({
295
- "min_memory_gb": 12, # Reduced for LoRA training
296
- "recommended_memory_gb": 16,
297
- "model_size_gb": 14.0,
298
- "compute_intensity": "high"
299
- })
300
- elif "13b" in model_name.lower():
301
- requirements.update({
302
- "min_memory_gb": 20, # Reduced for LoRA training
303
- "recommended_memory_gb": 32,
304
- "model_size_gb": 26.0,
305
- "compute_intensity": "high"
306
- })
307
-
308
- # Adjust for LoRA training (most training uses LoRA)
309
- if hasattr(training_config, 'lora_config') and training_config.lora_config and training_config.lora_config.use_lora:
310
- requirements["min_memory_gb"] = int(requirements["min_memory_gb"] * 0.8)
311
- requirements["recommended_memory_gb"] = int(requirements["recommended_memory_gb"] * 0.9)
312
- else:
313
- # Assume LoRA by default for most efficient training
314
- requirements["min_memory_gb"] = int(requirements["min_memory_gb"] * 0.8)
315
- requirements["recommended_memory_gb"] = int(requirements["recommended_memory_gb"] * 0.9)
316
-
317
- # Adjust for batch size
318
- batch_size = requirements["batch_size"]
319
- if batch_size > 4:
320
- requirements["min_memory_gb"] = int(requirements["min_memory_gb"] * (1 + (batch_size - 4) * 0.15))
321
- requirements["recommended_memory_gb"] = int(requirements["recommended_memory_gb"] * (1 + (batch_size - 4) * 0.15))
322
-
323
- return requirements
324
-
325
- def _filter_compatible_gpus(self, requirements: Dict[str, Any]) -> List[str]:
326
- """Filter GPUs that meet the requirements."""
327
- compatible = []
328
-
329
- min_memory = requirements["min_memory_gb"]
330
-
331
- for gpu_name, gpu_spec in self.gpus.items():
332
- if gpu_spec.memory_gb >= min_memory:
333
- compatible.append(gpu_name)
334
-
335
- return compatible
336
-
337
- def _get_provider_options(self, gpu_name: str, preferences: Dict[str, Any]) -> List[Tuple[str, CloudProvider, str, str]]:
338
- """Get provider options for a GPU."""
339
- options = []
340
- gpu_spec = self.gpus[gpu_name]
341
-
342
- for provider_name in gpu_spec.providers:
343
- if provider_name in self.cloud_providers:
344
- provider_spec = self.cloud_providers[provider_name]
345
-
346
- # Skip if not in user preferences
347
- if preferences.get("cloud") and provider_name not in preferences["cloud"]:
348
- continue
349
-
350
- # Get regions and instance types
351
- for region in provider_spec.regions[:2]: # Limit to top 2 regions
352
- instance_type = f"{gpu_name.lower().replace(' ', '-')}-instance"
353
- options.append((provider_name, provider_spec, region, instance_type))
354
-
355
- return options
356
-
357
- def _evaluate_option(
358
- self,
359
- gpu_spec: GPUSpec,
360
- provider_spec: CloudProvider,
361
- region: str,
362
- instance_type: str,
363
- requirements: Dict[str, Any],
364
- budget_limit: Optional[float],
365
- time_limit: Optional[int]
366
- ) -> Optional[Dict[str, Any]]:
367
- """Evaluate a specific resource option."""
368
-
369
- # Estimate training time (more realistic for LoRA training)
370
- base_time = 3.0 # Base training time in hours for LoRA
371
- time_factor = 1.0 / gpu_spec.training_efficiency
372
-
373
- # Adjust base time for model size
374
- model_size_gb = requirements.get("model_size_gb", 8.0)
375
- if model_size_gb > 20: # 13B+ models
376
- base_time = 6.0
377
- elif model_size_gb > 12: # 7B models
378
- base_time = 4.0
379
- elif model_size_gb > 6: # 4B models
380
- base_time = 3.0
381
- else: # 2B models
382
- base_time = 2.0
383
-
384
- # Adjust for compute intensity
385
- if requirements["compute_intensity"] == "high":
386
- time_factor *= 1.3
387
- elif requirements["compute_intensity"] == "low":
388
- time_factor *= 0.8
389
-
390
- # Adjust for training type (LoRA is much faster)
391
- if requirements.get("training_type") == "sft":
392
- time_factor *= 0.7 # LoRA SFT is typically faster
393
-
394
- estimated_time = base_time * time_factor
395
-
396
- # Estimate costs
397
- compute_cost = gpu_spec.cost_per_hour * estimated_time
398
- storage_cost = provider_spec.storage_cost_per_gb_hour * 100 * estimated_time # Assume 100GB storage
399
-
400
- total_cost = compute_cost + storage_cost
401
-
402
- # Check constraints
403
- if budget_limit and total_cost > budget_limit:
404
- return None
405
-
406
- if time_limit and estimated_time > time_limit:
407
- return None
408
-
409
- # Calculate performance score
410
- performance_score = self._calculate_performance_score(gpu_spec, requirements)
411
-
412
- # Calculate cost efficiency
413
- cost_efficiency = performance_score / total_cost if total_cost > 0 else 0
414
-
415
- # Calculate overall score
416
- score = self._calculate_overall_score(
417
- performance_score, cost_efficiency, gpu_spec, provider_spec, requirements
418
- )
419
-
420
- # Determine optimal batch size and precision
421
- batch_size = self._determine_optimal_batch_size(gpu_spec, requirements)
422
- precision = self._determine_optimal_precision(gpu_spec, requirements)
423
-
424
- # Generate reasons
425
- reasons = self._generate_option_reasons(gpu_spec, provider_spec, total_cost, estimated_time)
426
-
427
- return {
428
- "gpu": gpu_spec.name,
429
- "provider": provider_spec.name,
430
- "region": region,
431
- "instance_type": instance_type,
432
- "cost": total_cost,
433
- "cost_breakdown": {
434
- "compute": compute_cost,
435
- "storage": storage_cost
436
- },
437
- "time": estimated_time,
438
- "performance": performance_score,
439
- "batch_size": batch_size,
440
- "precision": precision,
441
- "score": score,
442
- "reasons": reasons,
443
- "confidence": min(1.0, score / 100.0)
444
- }
445
-
446
- def _calculate_performance_score(self, gpu_spec: GPUSpec, requirements: Dict[str, Any]) -> float:
447
- """Calculate performance score for a GPU."""
448
- score = 0.0
449
-
450
- # Memory adequacy
451
- memory_ratio = gpu_spec.memory_gb / requirements["recommended_memory_gb"]
452
- if memory_ratio >= 1.0:
453
- score += 30
454
- else:
455
- score += memory_ratio * 30
456
-
457
- # Compute performance
458
- if requirements["precision"] == "fp16":
459
- compute_score = min(30, gpu_spec.fp16_tflops / 100 * 30)
460
- else:
461
- compute_score = min(30, gpu_spec.fp32_tflops / 50 * 30)
462
- score += compute_score
463
-
464
- # Training efficiency
465
- score += gpu_spec.training_efficiency * 20
466
-
467
- # Memory bandwidth
468
- bandwidth_score = min(20, gpu_spec.memory_bandwidth_gbps / 1000 * 20)
469
- score += bandwidth_score
470
-
471
- return score
472
-
473
- def _calculate_overall_score(
474
- self,
475
- performance_score: float,
476
- cost_efficiency: float,
477
- gpu_spec: GPUSpec,
478
- provider_spec: CloudProvider,
479
- requirements: Dict[str, Any]
480
- ) -> float:
481
- """Calculate overall option score."""
482
- score = 0.0
483
-
484
- # Performance weight (40%)
485
- score += performance_score * 0.4
486
-
487
- # Cost efficiency weight (30%)
488
- score += cost_efficiency * 30 * 0.3
489
-
490
- # Availability weight (15%)
491
- score += gpu_spec.availability_score * provider_spec.availability_score * 15
492
-
493
- # Recommendation bonus (10%)
494
- if gpu_spec.is_recommended:
495
- score += 10
496
-
497
- # Provider reliability (5%)
498
- score += provider_spec.availability_score * 5
499
-
500
- return score
501
-
502
- def _determine_optimal_batch_size(self, gpu_spec: GPUSpec, requirements: Dict[str, Any]) -> int:
503
- """Determine optimal batch size for GPU."""
504
- base_batch_size = requirements["batch_size"]
505
-
506
- # Adjust based on GPU memory
507
- if gpu_spec.memory_gb >= 40:
508
- return min(base_batch_size * 4, 16)
509
- elif gpu_spec.memory_gb >= 24:
510
- return min(base_batch_size * 2, 8)
511
- elif gpu_spec.memory_gb >= 16:
512
- return base_batch_size
513
- else:
514
- return max(1, base_batch_size // 2)
515
-
516
- def _determine_optimal_precision(self, gpu_spec: GPUSpec, requirements: Dict[str, Any]) -> str:
517
- """Determine optimal precision for GPU."""
518
- # Prefer fp16 for modern GPUs with good fp16 performance
519
- if gpu_spec.fp16_tflops > gpu_spec.fp32_tflops * 1.5:
520
- return "fp16"
521
- else:
522
- return "fp32"
523
-
524
- def _generate_option_reasons(
525
- self,
526
- gpu_spec: GPUSpec,
527
- provider_spec: CloudProvider,
528
- cost: float,
529
- time: float
530
- ) -> List[str]:
531
- """Generate reasons for selecting this option."""
532
- reasons = []
533
-
534
- reasons.append(f"{gpu_spec.name} provides {gpu_spec.memory_gb}GB memory")
535
-
536
- if gpu_spec.is_recommended:
537
- reasons.append("Recommended GPU for this model type")
538
-
539
- if cost < 50:
540
- reasons.append("Cost-effective option")
541
- elif cost < 100:
542
- reasons.append("Moderate cost option")
543
-
544
- if time < 5:
545
- reasons.append("Fast training time")
546
- elif time < 12:
547
- reasons.append("Reasonable training time")
548
-
549
- if provider_spec.availability_score > 0.95:
550
- reasons.append("High availability provider")
551
-
552
- return reasons
553
-
554
- def _generate_alternatives(self, options: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
555
- """Generate alternative recommendations."""
556
- alternatives = []
557
-
558
- for option in options:
559
- alt = {
560
- "gpu": option["gpu"],
561
- "provider": option["provider"],
562
- "cost": option["cost"],
563
- "time": option["time"],
564
- "performance": option["performance"],
565
- "reason": f"Alternative option with different cost/performance tradeoff"
566
- }
567
- alternatives.append(alt)
568
-
569
- return alternatives
570
-
571
- def _initialize_default_resources(self) -> None:
572
- """Initialize with default GPU and cloud provider data."""
573
- self._add_default_gpus()
574
- self._add_default_cloud_providers()
575
- self._save_resource_data()
576
-
577
- logger.info("Initialized resource optimizer with default data")
578
-
579
- def _add_default_gpus(self) -> None:
580
- """Add default GPU specifications."""
581
- gpus = [
582
- GPUSpec(
583
- name="NVIDIA RTX A6000",
584
- memory_gb=48,
585
- compute_capability=8.6,
586
- fp16_tflops=150,
587
- fp32_tflops=38,
588
- memory_bandwidth_gbps=768,
589
- cost_per_hour=1.89,
590
- availability_score=0.8,
591
- providers=["runpod", "vast", "lambda"],
592
- training_efficiency=1.0,
593
- power_efficiency=0.9,
594
- is_recommended=True,
595
- description="High-memory professional GPU ideal for large models"
596
- ),
597
- GPUSpec(
598
- name="NVIDIA RTX 4090",
599
- memory_gb=24,
600
- compute_capability=8.9,
601
- fp16_tflops=165,
602
- fp32_tflops=83,
603
- memory_bandwidth_gbps=1008,
604
- cost_per_hour=1.25,
605
- availability_score=0.9,
606
- providers=["runpod", "vast"],
607
- training_efficiency=1.1,
608
- power_efficiency=1.0,
609
- is_recommended=True,
610
- description="Latest consumer GPU with excellent performance"
611
- ),
612
- GPUSpec(
613
- name="NVIDIA A100 40GB",
614
- memory_gb=40,
615
- compute_capability=8.0,
616
- fp16_tflops=312,
617
- fp32_tflops=19.5,
618
- memory_bandwidth_gbps=1555,
619
- cost_per_hour=2.95,
620
- availability_score=0.7,
621
- providers=["runpod", "aws", "gcp"],
622
- training_efficiency=1.2,
623
- power_efficiency=1.1,
624
- is_recommended=True,
625
- description="Data center GPU optimized for AI training"
626
- ),
627
- GPUSpec(
628
- name="NVIDIA RTX 3090",
629
- memory_gb=24,
630
- compute_capability=8.6,
631
- fp16_tflops=142,
632
- fp32_tflops=35.6,
633
- memory_bandwidth_gbps=936,
634
- cost_per_hour=0.89,
635
- availability_score=0.95,
636
- providers=["runpod", "vast", "lambda"],
637
- training_efficiency=0.9,
638
- power_efficiency=0.8,
639
- is_recommended=False,
640
- description="Previous generation high-memory consumer GPU"
641
- ),
642
- GPUSpec(
643
- name="NVIDIA RTX 4080",
644
- memory_gb=16,
645
- compute_capability=8.9,
646
- fp16_tflops=120,
647
- fp32_tflops=48.7,
648
- memory_bandwidth_gbps=716,
649
- cost_per_hour=0.95,
650
- availability_score=0.85,
651
- providers=["runpod", "vast"],
652
- training_efficiency=1.0,
653
- power_efficiency=1.0,
654
- is_recommended=False,
655
- description="Mid-range modern GPU for smaller models"
656
- ),
657
- GPUSpec(
658
- name="NVIDIA RTX 3080",
659
- memory_gb=10,
660
- compute_capability=8.6,
661
- fp16_tflops=119,
662
- fp32_tflops=29.8,
663
- memory_bandwidth_gbps=760,
664
- cost_per_hour=0.55,
665
- availability_score=0.9,
666
- providers=["runpod", "vast", "lambda"],
667
- training_efficiency=0.8,
668
- power_efficiency=0.8,
669
- is_recommended=False,
670
- description="Budget-friendly option for small models"
671
- )
672
- ]
673
-
674
- for gpu in gpus:
675
- self.gpus[gpu.name] = gpu
676
-
677
- def _add_default_cloud_providers(self) -> None:
678
- """Add default cloud provider specifications."""
679
- providers = [
680
- CloudProvider(
681
- name="runpod",
682
- regions=["US-East", "US-West", "EU-West"],
683
- available_gpus=["NVIDIA RTX A6000", "NVIDIA RTX 4090", "NVIDIA A100 40GB", "NVIDIA RTX 3090", "NVIDIA RTX 4080", "NVIDIA RTX 3080"],
684
- pricing_model="hourly",
685
- supports_spot_instances=True,
686
- supports_auto_scaling=False,
687
- supports_preemption=True,
688
- startup_time_minutes=2.0,
689
- network_performance="high",
690
- availability_score=0.95,
691
- storage_cost_per_gb_hour=0.0002,
692
- egress_cost_per_gb=0.02,
693
- description="Specialized GPU cloud for AI/ML workloads"
694
- ),
695
- CloudProvider(
696
- name="vast",
697
- regions=["Global"],
698
- available_gpus=["NVIDIA RTX A6000", "NVIDIA RTX 4090", "NVIDIA RTX 3090", "NVIDIA RTX 4080", "NVIDIA RTX 3080"],
699
- pricing_model="spot",
700
- supports_spot_instances=True,
701
- supports_auto_scaling=False,
702
- supports_preemption=True,
703
- startup_time_minutes=3.0,
704
- network_performance="standard",
705
- availability_score=0.85,
706
- storage_cost_per_gb_hour=0.0001,
707
- egress_cost_per_gb=0.01,
708
- description="Decentralized GPU marketplace with competitive pricing"
709
- ),
710
- CloudProvider(
711
- name="lambda",
712
- regions=["US-East", "US-West"],
713
- available_gpus=["NVIDIA RTX A6000", "NVIDIA RTX 3090", "NVIDIA RTX 3080"],
714
- pricing_model="hourly",
715
- supports_spot_instances=False,
716
- supports_auto_scaling=True,
717
- supports_preemption=False,
718
- startup_time_minutes=1.0,
719
- network_performance="high",
720
- availability_score=0.98,
721
- storage_cost_per_gb_hour=0.0003,
722
- egress_cost_per_gb=0.05,
723
- description="Premium GPU cloud with high reliability"
724
- ),
725
- CloudProvider(
726
- name="aws",
727
- regions=["us-east-1", "us-west-2", "eu-west-1"],
728
- available_gpus=["NVIDIA A100 40GB"],
729
- pricing_model="hourly",
730
- supports_spot_instances=True,
731
- supports_auto_scaling=True,
732
- supports_preemption=True,
733
- startup_time_minutes=5.0,
734
- network_performance="high",
735
- availability_score=0.99,
736
- storage_cost_per_gb_hour=0.0005,
737
- egress_cost_per_gb=0.09,
738
- description="Enterprise cloud with comprehensive services"
739
- ),
740
- CloudProvider(
741
- name="gcp",
742
- regions=["us-central1", "us-east1", "europe-west1"],
743
- available_gpus=["NVIDIA A100 40GB"],
744
- pricing_model="hourly",
745
- supports_spot_instances=True,
746
- supports_auto_scaling=True,
747
- supports_preemption=True,
748
- startup_time_minutes=4.0,
749
- network_performance="high",
750
- availability_score=0.99,
751
- storage_cost_per_gb_hour=0.0004,
752
- egress_cost_per_gb=0.08,
753
- description="Google's cloud platform with AI/ML focus"
754
- )
755
- ]
756
-
757
- for provider in providers:
758
- self.cloud_providers[provider.name] = provider
759
-
760
- def _load_resource_data(self) -> None:
761
- """Load resource data from disk."""
762
- try:
763
- self._load_gpus()
764
- self._load_cloud_providers()
765
- except Exception as e:
766
- logger.warning(f"Failed to load resource data: {e}")
767
-
768
- def _save_resource_data(self) -> None:
769
- """Save resource data to disk."""
770
- try:
771
- self._save_gpus()
772
- self._save_cloud_providers()
773
- except Exception as e:
774
- logger.error(f"Failed to save resource data: {e}")
775
-
776
- def _load_gpus(self) -> None:
777
- """Load GPU data from disk."""
778
- gpus_file = os.path.join(self.data_dir, "gpus.json")
779
- if os.path.exists(gpus_file):
780
- with open(gpus_file, 'r') as f:
781
- data = json.load(f)
782
- for name, gpu_data in data.items():
783
- self.gpus[name] = GPUSpec(**gpu_data)
784
-
785
- def _save_gpus(self) -> None:
786
- """Save GPU data to disk."""
787
- gpus_file = os.path.join(self.data_dir, "gpus.json")
788
- with open(gpus_file, 'w') as f:
789
- from dataclasses import asdict
790
- data = {name: asdict(gpu) for name, gpu in self.gpus.items()}
791
- json.dump(data, f, indent=2)
792
-
793
- def _load_cloud_providers(self) -> None:
794
- """Load cloud provider data from disk."""
795
- providers_file = os.path.join(self.data_dir, "cloud_providers.json")
796
- if os.path.exists(providers_file):
797
- with open(providers_file, 'r') as f:
798
- data = json.load(f)
799
- for name, provider_data in data.items():
800
- self.cloud_providers[name] = CloudProvider(**provider_data)
801
-
802
- def _save_cloud_providers(self) -> None:
803
- """Save cloud provider data to disk."""
804
- providers_file = os.path.join(self.data_dir, "cloud_providers.json")
805
- with open(providers_file, 'w') as f:
806
- from dataclasses import asdict
807
- data = {name: asdict(provider) for name, provider in self.cloud_providers.items()}
808
- json.dump(data, f, indent=2)
809
-
810
- def get_available_gpus(self) -> List[str]:
811
- """Get list of available GPU types."""
812
- return list(self.gpus.keys())
813
-
814
- def get_available_providers(self) -> List[str]:
815
- """Get list of available cloud providers."""
816
- return list(self.cloud_providers.keys())
817
-
818
- def estimate_cost(self, gpu_name: str, provider_name: str, hours: float) -> float:
819
- """Estimate cost for specific GPU and provider."""
820
- if gpu_name in self.gpus and provider_name in self.cloud_providers:
821
- gpu_spec = self.gpus[gpu_name]
822
- provider_spec = self.cloud_providers[provider_name]
823
-
824
- compute_cost = gpu_spec.cost_per_hour * hours
825
- storage_cost = provider_spec.storage_cost_per_gb_hour * 100 * hours # Assume 100GB
826
-
827
- return compute_cost + storage_cost
828
-
829
- return 0.0
830
-
831
- def get_statistics(self) -> Dict[str, Any]:
832
- """Get resource optimizer statistics."""
833
- return {
834
- "total_gpus": len(self.gpus),
835
- "total_providers": len(self.cloud_providers),
836
- "avg_gpu_memory": sum(gpu.memory_gb for gpu in self.gpus.values()) / len(self.gpus) if self.gpus else 0,
837
- "avg_cost_per_hour": sum(gpu.cost_per_hour for gpu in self.gpus.values()) / len(self.gpus) if self.gpus else 0,
838
- "recommended_gpus": len([gpu for gpu in self.gpus.values() if gpu.is_recommended])
839
- }