isa-model 0.4.0__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +40 -17
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/storage/hf_storage.py +1 -1
  26. isa_model/core/types.py +1 -0
  27. isa_model/deployment/__init__.py +5 -48
  28. isa_model/deployment/core/__init__.py +2 -31
  29. isa_model/deployment/core/deployment_manager.py +1278 -370
  30. isa_model/deployment/local/__init__.py +31 -0
  31. isa_model/deployment/local/config.py +248 -0
  32. isa_model/deployment/local/gpu_gateway.py +607 -0
  33. isa_model/deployment/local/health_checker.py +428 -0
  34. isa_model/deployment/local/provider.py +586 -0
  35. isa_model/deployment/local/tensorrt_service.py +621 -0
  36. isa_model/deployment/local/transformers_service.py +644 -0
  37. isa_model/deployment/local/vllm_service.py +527 -0
  38. isa_model/deployment/modal/__init__.py +8 -0
  39. isa_model/deployment/modal/config.py +136 -0
  40. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  41. isa_model/deployment/modal/services/__init__.py +3 -0
  42. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  43. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  44. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  45. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  46. isa_model/deployment/modal/services/video/__init__.py +1 -0
  47. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  48. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  49. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  50. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  51. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  52. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  53. isa_model/deployment/storage/__init__.py +5 -0
  54. isa_model/deployment/storage/deployment_repository.py +824 -0
  55. isa_model/deployment/triton/__init__.py +10 -0
  56. isa_model/deployment/triton/config.py +196 -0
  57. isa_model/deployment/triton/configs/__init__.py +1 -0
  58. isa_model/deployment/triton/provider.py +512 -0
  59. isa_model/deployment/triton/scripts/__init__.py +1 -0
  60. isa_model/deployment/triton/templates/__init__.py +1 -0
  61. isa_model/inference/__init__.py +47 -1
  62. isa_model/inference/ai_factory.py +137 -10
  63. isa_model/inference/legacy_services/__init__.py +21 -0
  64. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  65. isa_model/inference/legacy_services/model_service.py +573 -0
  66. isa_model/inference/legacy_services/model_serving.py +717 -0
  67. isa_model/inference/legacy_services/model_training.py +561 -0
  68. isa_model/inference/models/__init__.py +21 -0
  69. isa_model/inference/models/inference_config.py +551 -0
  70. isa_model/inference/models/inference_record.py +675 -0
  71. isa_model/inference/models/performance_models.py +714 -0
  72. isa_model/inference/repositories/__init__.py +9 -0
  73. isa_model/inference/repositories/inference_repository.py +828 -0
  74. isa_model/inference/services/audio/base_stt_service.py +184 -11
  75. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  76. isa_model/inference/services/custom_model_manager.py +277 -0
  77. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  78. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  79. isa_model/inference/services/llm/__init__.py +10 -2
  80. isa_model/inference/services/llm/base_llm_service.py +335 -24
  81. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  82. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  83. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  84. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  85. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  86. isa_model/inference/services/llm/local_llm_service.py +747 -0
  87. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  88. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  89. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  90. isa_model/inference/services/vision/__init__.py +22 -1
  91. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  92. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  93. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  94. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  95. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  96. isa_model/serving/api/cache_manager.py +245 -0
  97. isa_model/serving/api/dependencies/__init__.py +1 -0
  98. isa_model/serving/api/dependencies/auth.py +194 -0
  99. isa_model/serving/api/dependencies/database.py +139 -0
  100. isa_model/serving/api/error_handlers.py +284 -0
  101. isa_model/serving/api/fastapi_server.py +172 -22
  102. isa_model/serving/api/middleware/auth.py +8 -2
  103. isa_model/serving/api/middleware/security.py +23 -33
  104. isa_model/serving/api/middleware/tenant_context.py +414 -0
  105. isa_model/serving/api/routes/analytics.py +4 -1
  106. isa_model/serving/api/routes/config.py +645 -0
  107. isa_model/serving/api/routes/deployment_billing.py +315 -0
  108. isa_model/serving/api/routes/deployments.py +138 -2
  109. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  110. isa_model/serving/api/routes/health.py +32 -12
  111. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  112. isa_model/serving/api/routes/local_deployments.py +448 -0
  113. isa_model/serving/api/routes/tenants.py +575 -0
  114. isa_model/serving/api/routes/unified.py +680 -18
  115. isa_model/serving/api/routes/webhooks.py +479 -0
  116. isa_model/serving/api/startup.py +68 -54
  117. isa_model/utils/gpu_utils.py +311 -0
  118. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/METADATA +66 -24
  119. isa_model-0.4.3.dist-info/RECORD +193 -0
  120. isa_model/core/storage/minio_storage.py +0 -0
  121. isa_model/deployment/cloud/__init__.py +0 -9
  122. isa_model/deployment/cloud/modal/__init__.py +0 -10
  123. isa_model/deployment/core/deployment_config.py +0 -356
  124. isa_model/deployment/core/isa_deployment_service.py +0 -401
  125. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  126. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  127. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  128. isa_model/deployment/runtime/deployed_service.py +0 -338
  129. isa_model/deployment/services/__init__.py +0 -9
  130. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  131. isa_model/deployment/services/model_service.py +0 -332
  132. isa_model/deployment/services/service_monitor.py +0 -356
  133. isa_model/deployment/services/service_registry.py +0 -527
  134. isa_model/eval/__init__.py +0 -92
  135. isa_model/eval/benchmarks/__init__.py +0 -27
  136. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  137. isa_model/eval/benchmarks.py +0 -701
  138. isa_model/eval/config/__init__.py +0 -10
  139. isa_model/eval/config/evaluation_config.py +0 -108
  140. isa_model/eval/evaluators/__init__.py +0 -24
  141. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  142. isa_model/eval/evaluators/base_evaluator.py +0 -503
  143. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  144. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  145. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  146. isa_model/eval/example_evaluation.py +0 -395
  147. isa_model/eval/factory.py +0 -798
  148. isa_model/eval/infrastructure/__init__.py +0 -24
  149. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  150. isa_model/eval/isa_benchmarks.py +0 -700
  151. isa_model/eval/isa_integration.py +0 -582
  152. isa_model/eval/metrics.py +0 -951
  153. isa_model/eval/tests/unit/test_basic.py +0 -396
  154. isa_model/serving/api/routes/evaluations.py +0 -579
  155. isa_model/training/__init__.py +0 -168
  156. isa_model/training/annotation/annotation_schema.py +0 -47
  157. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  158. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  159. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  160. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  161. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  162. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  163. isa_model/training/annotation/views/annotation_controller.py +0 -158
  164. isa_model/training/cloud/__init__.py +0 -22
  165. isa_model/training/cloud/job_orchestrator.py +0 -402
  166. isa_model/training/cloud/runpod_trainer.py +0 -454
  167. isa_model/training/cloud/storage_manager.py +0 -482
  168. isa_model/training/core/__init__.py +0 -26
  169. isa_model/training/core/config.py +0 -181
  170. isa_model/training/core/dataset.py +0 -222
  171. isa_model/training/core/trainer.py +0 -720
  172. isa_model/training/core/utils.py +0 -213
  173. isa_model/training/examples/intelligent_training_example.py +0 -281
  174. isa_model/training/factory.py +0 -424
  175. isa_model/training/intelligent/__init__.py +0 -25
  176. isa_model/training/intelligent/decision_engine.py +0 -643
  177. isa_model/training/intelligent/intelligent_factory.py +0 -888
  178. isa_model/training/intelligent/knowledge_base.py +0 -751
  179. isa_model/training/intelligent/resource_optimizer.py +0 -839
  180. isa_model/training/intelligent/task_classifier.py +0 -576
  181. isa_model/training/storage/__init__.py +0 -24
  182. isa_model/training/storage/core_integration.py +0 -439
  183. isa_model/training/storage/training_repository.py +0 -552
  184. isa_model/training/storage/training_storage.py +0 -628
  185. isa_model-0.4.0.dist-info/RECORD +0 -182
  186. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  187. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  188. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  189. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  190. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  191. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  192. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  193. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  194. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  195. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  196. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  197. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  198. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  199. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -1,839 +0,0 @@
1
- """
2
- Resource Optimization System for Training
3
-
4
- This module provides intelligent resource selection and cost optimization:
5
- - GPU type selection based on model requirements
6
- - Cloud provider comparison and selection
7
- - Cost estimation and budget optimization
8
- - Performance prediction and time estimation
9
- - Resource availability monitoring
10
-
11
- Optimizes for cost, performance, and availability based on user constraints.
12
- """
13
-
14
- import logging
15
- from typing import Dict, List, Optional, Any, Tuple
16
- from dataclasses import dataclass, field
17
- from datetime import datetime
18
- import json
19
- import os
20
-
21
- logger = logging.getLogger(__name__)
22
-
23
-
24
- @dataclass
25
- class GPUSpec:
26
- """GPU specification and characteristics."""
27
-
28
- name: str
29
- memory_gb: int
30
- compute_capability: float
31
-
32
- # Performance characteristics
33
- fp16_tflops: float
34
- fp32_tflops: float
35
- memory_bandwidth_gbps: float
36
-
37
- # Cost (per hour in USD)
38
- cost_per_hour: float = 0.0
39
-
40
- # Availability
41
- availability_score: float = 1.0 # 0.0 to 1.0
42
-
43
- # Provider information
44
- providers: List[str] = field(default_factory=list)
45
-
46
- # Training characteristics
47
- training_efficiency: float = 1.0 # Relative efficiency for training
48
- power_efficiency: float = 1.0 # Performance per watt
49
-
50
- # Metadata
51
- is_recommended: bool = False
52
- description: str = ""
53
-
54
-
55
- @dataclass
56
- class CloudProvider:
57
- """Cloud provider specification."""
58
-
59
- name: str
60
- regions: List[str]
61
-
62
- # Available GPU types
63
- available_gpus: List[str]
64
-
65
- # Pricing model
66
- pricing_model: str = "hourly" # "hourly", "spot", "reserved"
67
-
68
- # Features
69
- supports_spot_instances: bool = False
70
- supports_auto_scaling: bool = False
71
- supports_preemption: bool = False
72
-
73
- # Performance characteristics
74
- startup_time_minutes: float = 5.0
75
- network_performance: str = "standard" # "low", "standard", "high"
76
-
77
- # Reliability
78
- availability_score: float = 0.99
79
-
80
- # Additional costs
81
- storage_cost_per_gb_hour: float = 0.0
82
- egress_cost_per_gb: float = 0.0
83
-
84
- description: str = ""
85
-
86
-
87
- @dataclass
88
- class ResourceRecommendation:
89
- """Resource optimization recommendation."""
90
-
91
- # Selected resources
92
- gpu: str
93
- cloud_provider: str
94
- region: str
95
- instance_type: str
96
-
97
- # Cost estimates
98
- estimated_cost: float
99
- cost_breakdown: Dict[str, float]
100
-
101
- # Performance estimates
102
- estimated_time: float # hours
103
- performance_score: float
104
-
105
- # Configuration
106
- recommended_batch_size: int
107
- recommended_precision: str # "fp16", "fp32", "bf16"
108
-
109
- # Alternatives
110
- alternatives: List[Dict[str, Any]]
111
-
112
- # Reasoning
113
- decision_factors: List[str]
114
- confidence: float
115
-
116
- # Metadata
117
- created_at: datetime = field(default_factory=datetime.now)
118
-
119
-
120
- class ResourceOptimizer:
121
- """
122
- Intelligent resource optimization system.
123
-
124
- This class analyzes training requirements and recommends optimal resources:
125
- - GPU selection based on model size and requirements
126
- - Cloud provider comparison for cost and performance
127
- - Cost estimation and budget optimization
128
- - Performance prediction and time estimation
129
-
130
- Example:
131
- ```python
132
- optimizer = ResourceOptimizer()
133
-
134
- recommendation = optimizer.optimize_resources(
135
- model_name="google/gemma-2-7b-it",
136
- training_config=config,
137
- budget_limit=100.0,
138
- time_limit=8
139
- )
140
-
141
- print(f"Recommended: {recommendation.gpu} on {recommendation.cloud_provider}")
142
- print(f"Cost: ${recommendation.estimated_cost:.2f}")
143
- ```
144
- """
145
-
146
- def __init__(self, data_dir: Optional[str] = None):
147
- """
148
- Initialize resource optimizer.
149
-
150
- Args:
151
- data_dir: Directory for storing resource data
152
- """
153
- self.data_dir = data_dir or os.path.join(os.getcwd(), "resource_data")
154
- os.makedirs(self.data_dir, exist_ok=True)
155
-
156
- # Initialize resource databases
157
- self.gpus: Dict[str, GPUSpec] = {}
158
- self.cloud_providers: Dict[str, CloudProvider] = {}
159
- self.pricing_cache: Dict[str, Dict[str, float]] = {}
160
-
161
- # Load resource data
162
- self._load_resource_data()
163
-
164
- # Initialize with defaults if empty
165
- if not self.gpus:
166
- self._initialize_default_resources()
167
-
168
- logger.info(f"Resource optimizer initialized with {len(self.gpus)} GPUs and {len(self.cloud_providers)} providers")
169
-
170
- def optimize_resources(
171
- self,
172
- model_name: str,
173
- training_config: Any,
174
- budget_limit: Optional[float] = None,
175
- time_limit: Optional[int] = None,
176
- preferences: Optional[Dict[str, Any]] = None
177
- ) -> ResourceRecommendation:
178
- """
179
- Optimize resource selection for training requirements.
180
-
181
- Args:
182
- model_name: Name of the model to train
183
- training_config: Training configuration
184
- budget_limit: Maximum budget in USD
185
- time_limit: Maximum time in hours
186
- preferences: User preferences for GPU/cloud providers
187
-
188
- Returns:
189
- Optimal resource recommendation
190
- """
191
- preferences = preferences or {}
192
-
193
- logger.info(f"Optimizing resources for {model_name}")
194
-
195
- try:
196
- # Step 1: Analyze model requirements
197
- model_requirements = self._analyze_model_requirements(model_name, training_config)
198
-
199
- # Step 2: Filter compatible GPUs
200
- compatible_gpus = self._filter_compatible_gpus(model_requirements)
201
-
202
- # Step 3: Estimate costs and performance for each option
203
- gpu_options = []
204
- total_evaluated = 0
205
- total_filtered = 0
206
-
207
- for gpu_name in compatible_gpus:
208
- gpu_spec = self.gpus[gpu_name]
209
-
210
- # Get best provider for this GPU
211
- provider_options = self._get_provider_options(gpu_name, preferences)
212
-
213
- for provider_name, provider_spec, region, instance_type in provider_options:
214
- total_evaluated += 1
215
- option = self._evaluate_option(
216
- gpu_spec, provider_spec, region, instance_type,
217
- model_requirements, budget_limit, time_limit
218
- )
219
-
220
- if option:
221
- gpu_options.append(option)
222
- else:
223
- total_filtered += 1
224
-
225
- # Step 4: Rank options by overall score
226
- if not gpu_options:
227
- logger.warning(f"No compatible GPU options found. Evaluated {total_evaluated} options, {total_filtered} filtered by constraints.")
228
- logger.warning(f"Budget limit: {budget_limit}, Time limit: {time_limit}")
229
- raise ValueError("No compatible GPU options found")
230
-
231
- gpu_options.sort(key=lambda x: x["score"], reverse=True)
232
-
233
- # Step 5: Select best option
234
- best_option = gpu_options[0]
235
-
236
- # Step 6: Generate alternatives
237
- alternatives = self._generate_alternatives(gpu_options[1:5]) # Top 5 alternatives
238
-
239
- # Step 7: Create recommendation
240
- recommendation = ResourceRecommendation(
241
- gpu=best_option["gpu"],
242
- cloud_provider=best_option["provider"],
243
- region=best_option["region"],
244
- instance_type=best_option["instance_type"],
245
- estimated_cost=best_option["cost"],
246
- cost_breakdown=best_option["cost_breakdown"],
247
- estimated_time=best_option["time"],
248
- performance_score=best_option["performance"],
249
- recommended_batch_size=best_option["batch_size"],
250
- recommended_precision=best_option["precision"],
251
- alternatives=alternatives,
252
- decision_factors=best_option["reasons"],
253
- confidence=best_option["confidence"]
254
- )
255
-
256
- logger.info(f"Selected {recommendation.gpu} on {recommendation.cloud_provider} "
257
- f"(${recommendation.estimated_cost:.2f}, {recommendation.estimated_time:.1f}h)")
258
-
259
- return recommendation
260
-
261
- except Exception as e:
262
- logger.error(f"Resource optimization failed: {e}")
263
- raise
264
-
265
- def _analyze_model_requirements(self, model_name: str, training_config: Any) -> Dict[str, Any]:
266
- """Analyze model resource requirements."""
267
- requirements = {
268
- "min_memory_gb": 8,
269
- "recommended_memory_gb": 16,
270
- "compute_intensity": "medium", # "low", "medium", "high"
271
- "precision": "fp16",
272
- "batch_size": getattr(training_config, 'batch_size', 4),
273
- "sequence_length": 1024,
274
- "model_size_gb": 4.0,
275
- "training_type": getattr(training_config, 'training_type', 'sft')
276
- }
277
-
278
- # Estimate model size and requirements based on name
279
- if "2b" in model_name.lower():
280
- requirements.update({
281
- "min_memory_gb": 6, # Reduced for LoRA training
282
- "recommended_memory_gb": 10,
283
- "model_size_gb": 4.0,
284
- "compute_intensity": "medium"
285
- })
286
- elif "4b" in model_name.lower():
287
- requirements.update({
288
- "min_memory_gb": 8, # Reduced for LoRA training
289
- "recommended_memory_gb": 12,
290
- "model_size_gb": 8.0,
291
- "compute_intensity": "medium"
292
- })
293
- elif "7b" in model_name.lower():
294
- requirements.update({
295
- "min_memory_gb": 12, # Reduced for LoRA training
296
- "recommended_memory_gb": 16,
297
- "model_size_gb": 14.0,
298
- "compute_intensity": "high"
299
- })
300
- elif "13b" in model_name.lower():
301
- requirements.update({
302
- "min_memory_gb": 20, # Reduced for LoRA training
303
- "recommended_memory_gb": 32,
304
- "model_size_gb": 26.0,
305
- "compute_intensity": "high"
306
- })
307
-
308
- # Adjust for LoRA training (most training uses LoRA)
309
- if hasattr(training_config, 'lora_config') and training_config.lora_config and training_config.lora_config.use_lora:
310
- requirements["min_memory_gb"] = int(requirements["min_memory_gb"] * 0.8)
311
- requirements["recommended_memory_gb"] = int(requirements["recommended_memory_gb"] * 0.9)
312
- else:
313
- # Assume LoRA by default for most efficient training
314
- requirements["min_memory_gb"] = int(requirements["min_memory_gb"] * 0.8)
315
- requirements["recommended_memory_gb"] = int(requirements["recommended_memory_gb"] * 0.9)
316
-
317
- # Adjust for batch size
318
- batch_size = requirements["batch_size"]
319
- if batch_size > 4:
320
- requirements["min_memory_gb"] = int(requirements["min_memory_gb"] * (1 + (batch_size - 4) * 0.15))
321
- requirements["recommended_memory_gb"] = int(requirements["recommended_memory_gb"] * (1 + (batch_size - 4) * 0.15))
322
-
323
- return requirements
324
-
325
- def _filter_compatible_gpus(self, requirements: Dict[str, Any]) -> List[str]:
326
- """Filter GPUs that meet the requirements."""
327
- compatible = []
328
-
329
- min_memory = requirements["min_memory_gb"]
330
-
331
- for gpu_name, gpu_spec in self.gpus.items():
332
- if gpu_spec.memory_gb >= min_memory:
333
- compatible.append(gpu_name)
334
-
335
- return compatible
336
-
337
- def _get_provider_options(self, gpu_name: str, preferences: Dict[str, Any]) -> List[Tuple[str, CloudProvider, str, str]]:
338
- """Get provider options for a GPU."""
339
- options = []
340
- gpu_spec = self.gpus[gpu_name]
341
-
342
- for provider_name in gpu_spec.providers:
343
- if provider_name in self.cloud_providers:
344
- provider_spec = self.cloud_providers[provider_name]
345
-
346
- # Skip if not in user preferences
347
- if preferences.get("cloud") and provider_name not in preferences["cloud"]:
348
- continue
349
-
350
- # Get regions and instance types
351
- for region in provider_spec.regions[:2]: # Limit to top 2 regions
352
- instance_type = f"{gpu_name.lower().replace(' ', '-')}-instance"
353
- options.append((provider_name, provider_spec, region, instance_type))
354
-
355
- return options
356
-
357
- def _evaluate_option(
358
- self,
359
- gpu_spec: GPUSpec,
360
- provider_spec: CloudProvider,
361
- region: str,
362
- instance_type: str,
363
- requirements: Dict[str, Any],
364
- budget_limit: Optional[float],
365
- time_limit: Optional[int]
366
- ) -> Optional[Dict[str, Any]]:
367
- """Evaluate a specific resource option."""
368
-
369
- # Estimate training time (more realistic for LoRA training)
370
- base_time = 3.0 # Base training time in hours for LoRA
371
- time_factor = 1.0 / gpu_spec.training_efficiency
372
-
373
- # Adjust base time for model size
374
- model_size_gb = requirements.get("model_size_gb", 8.0)
375
- if model_size_gb > 20: # 13B+ models
376
- base_time = 6.0
377
- elif model_size_gb > 12: # 7B models
378
- base_time = 4.0
379
- elif model_size_gb > 6: # 4B models
380
- base_time = 3.0
381
- else: # 2B models
382
- base_time = 2.0
383
-
384
- # Adjust for compute intensity
385
- if requirements["compute_intensity"] == "high":
386
- time_factor *= 1.3
387
- elif requirements["compute_intensity"] == "low":
388
- time_factor *= 0.8
389
-
390
- # Adjust for training type (LoRA is much faster)
391
- if requirements.get("training_type") == "sft":
392
- time_factor *= 0.7 # LoRA SFT is typically faster
393
-
394
- estimated_time = base_time * time_factor
395
-
396
- # Estimate costs
397
- compute_cost = gpu_spec.cost_per_hour * estimated_time
398
- storage_cost = provider_spec.storage_cost_per_gb_hour * 100 * estimated_time # Assume 100GB storage
399
-
400
- total_cost = compute_cost + storage_cost
401
-
402
- # Check constraints
403
- if budget_limit and total_cost > budget_limit:
404
- return None
405
-
406
- if time_limit and estimated_time > time_limit:
407
- return None
408
-
409
- # Calculate performance score
410
- performance_score = self._calculate_performance_score(gpu_spec, requirements)
411
-
412
- # Calculate cost efficiency
413
- cost_efficiency = performance_score / total_cost if total_cost > 0 else 0
414
-
415
- # Calculate overall score
416
- score = self._calculate_overall_score(
417
- performance_score, cost_efficiency, gpu_spec, provider_spec, requirements
418
- )
419
-
420
- # Determine optimal batch size and precision
421
- batch_size = self._determine_optimal_batch_size(gpu_spec, requirements)
422
- precision = self._determine_optimal_precision(gpu_spec, requirements)
423
-
424
- # Generate reasons
425
- reasons = self._generate_option_reasons(gpu_spec, provider_spec, total_cost, estimated_time)
426
-
427
- return {
428
- "gpu": gpu_spec.name,
429
- "provider": provider_spec.name,
430
- "region": region,
431
- "instance_type": instance_type,
432
- "cost": total_cost,
433
- "cost_breakdown": {
434
- "compute": compute_cost,
435
- "storage": storage_cost
436
- },
437
- "time": estimated_time,
438
- "performance": performance_score,
439
- "batch_size": batch_size,
440
- "precision": precision,
441
- "score": score,
442
- "reasons": reasons,
443
- "confidence": min(1.0, score / 100.0)
444
- }
445
-
446
- def _calculate_performance_score(self, gpu_spec: GPUSpec, requirements: Dict[str, Any]) -> float:
447
- """Calculate performance score for a GPU."""
448
- score = 0.0
449
-
450
- # Memory adequacy
451
- memory_ratio = gpu_spec.memory_gb / requirements["recommended_memory_gb"]
452
- if memory_ratio >= 1.0:
453
- score += 30
454
- else:
455
- score += memory_ratio * 30
456
-
457
- # Compute performance
458
- if requirements["precision"] == "fp16":
459
- compute_score = min(30, gpu_spec.fp16_tflops / 100 * 30)
460
- else:
461
- compute_score = min(30, gpu_spec.fp32_tflops / 50 * 30)
462
- score += compute_score
463
-
464
- # Training efficiency
465
- score += gpu_spec.training_efficiency * 20
466
-
467
- # Memory bandwidth
468
- bandwidth_score = min(20, gpu_spec.memory_bandwidth_gbps / 1000 * 20)
469
- score += bandwidth_score
470
-
471
- return score
472
-
473
- def _calculate_overall_score(
474
- self,
475
- performance_score: float,
476
- cost_efficiency: float,
477
- gpu_spec: GPUSpec,
478
- provider_spec: CloudProvider,
479
- requirements: Dict[str, Any]
480
- ) -> float:
481
- """Calculate overall option score."""
482
- score = 0.0
483
-
484
- # Performance weight (40%)
485
- score += performance_score * 0.4
486
-
487
- # Cost efficiency weight (30%)
488
- score += cost_efficiency * 30 * 0.3
489
-
490
- # Availability weight (15%)
491
- score += gpu_spec.availability_score * provider_spec.availability_score * 15
492
-
493
- # Recommendation bonus (10%)
494
- if gpu_spec.is_recommended:
495
- score += 10
496
-
497
- # Provider reliability (5%)
498
- score += provider_spec.availability_score * 5
499
-
500
- return score
501
-
502
- def _determine_optimal_batch_size(self, gpu_spec: GPUSpec, requirements: Dict[str, Any]) -> int:
503
- """Determine optimal batch size for GPU."""
504
- base_batch_size = requirements["batch_size"]
505
-
506
- # Adjust based on GPU memory
507
- if gpu_spec.memory_gb >= 40:
508
- return min(base_batch_size * 4, 16)
509
- elif gpu_spec.memory_gb >= 24:
510
- return min(base_batch_size * 2, 8)
511
- elif gpu_spec.memory_gb >= 16:
512
- return base_batch_size
513
- else:
514
- return max(1, base_batch_size // 2)
515
-
516
- def _determine_optimal_precision(self, gpu_spec: GPUSpec, requirements: Dict[str, Any]) -> str:
517
- """Determine optimal precision for GPU."""
518
- # Prefer fp16 for modern GPUs with good fp16 performance
519
- if gpu_spec.fp16_tflops > gpu_spec.fp32_tflops * 1.5:
520
- return "fp16"
521
- else:
522
- return "fp32"
523
-
524
- def _generate_option_reasons(
525
- self,
526
- gpu_spec: GPUSpec,
527
- provider_spec: CloudProvider,
528
- cost: float,
529
- time: float
530
- ) -> List[str]:
531
- """Generate reasons for selecting this option."""
532
- reasons = []
533
-
534
- reasons.append(f"{gpu_spec.name} provides {gpu_spec.memory_gb}GB memory")
535
-
536
- if gpu_spec.is_recommended:
537
- reasons.append("Recommended GPU for this model type")
538
-
539
- if cost < 50:
540
- reasons.append("Cost-effective option")
541
- elif cost < 100:
542
- reasons.append("Moderate cost option")
543
-
544
- if time < 5:
545
- reasons.append("Fast training time")
546
- elif time < 12:
547
- reasons.append("Reasonable training time")
548
-
549
- if provider_spec.availability_score > 0.95:
550
- reasons.append("High availability provider")
551
-
552
- return reasons
553
-
554
- def _generate_alternatives(self, options: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
555
- """Generate alternative recommendations."""
556
- alternatives = []
557
-
558
- for option in options:
559
- alt = {
560
- "gpu": option["gpu"],
561
- "provider": option["provider"],
562
- "cost": option["cost"],
563
- "time": option["time"],
564
- "performance": option["performance"],
565
- "reason": f"Alternative option with different cost/performance tradeoff"
566
- }
567
- alternatives.append(alt)
568
-
569
- return alternatives
570
-
571
- def _initialize_default_resources(self) -> None:
572
- """Initialize with default GPU and cloud provider data."""
573
- self._add_default_gpus()
574
- self._add_default_cloud_providers()
575
- self._save_resource_data()
576
-
577
- logger.info("Initialized resource optimizer with default data")
578
-
579
- def _add_default_gpus(self) -> None:
580
- """Add default GPU specifications."""
581
- gpus = [
582
- GPUSpec(
583
- name="NVIDIA RTX A6000",
584
- memory_gb=48,
585
- compute_capability=8.6,
586
- fp16_tflops=150,
587
- fp32_tflops=38,
588
- memory_bandwidth_gbps=768,
589
- cost_per_hour=1.89,
590
- availability_score=0.8,
591
- providers=["runpod", "vast", "lambda"],
592
- training_efficiency=1.0,
593
- power_efficiency=0.9,
594
- is_recommended=True,
595
- description="High-memory professional GPU ideal for large models"
596
- ),
597
- GPUSpec(
598
- name="NVIDIA RTX 4090",
599
- memory_gb=24,
600
- compute_capability=8.9,
601
- fp16_tflops=165,
602
- fp32_tflops=83,
603
- memory_bandwidth_gbps=1008,
604
- cost_per_hour=1.25,
605
- availability_score=0.9,
606
- providers=["runpod", "vast"],
607
- training_efficiency=1.1,
608
- power_efficiency=1.0,
609
- is_recommended=True,
610
- description="Latest consumer GPU with excellent performance"
611
- ),
612
- GPUSpec(
613
- name="NVIDIA A100 40GB",
614
- memory_gb=40,
615
- compute_capability=8.0,
616
- fp16_tflops=312,
617
- fp32_tflops=19.5,
618
- memory_bandwidth_gbps=1555,
619
- cost_per_hour=2.95,
620
- availability_score=0.7,
621
- providers=["runpod", "aws", "gcp"],
622
- training_efficiency=1.2,
623
- power_efficiency=1.1,
624
- is_recommended=True,
625
- description="Data center GPU optimized for AI training"
626
- ),
627
- GPUSpec(
628
- name="NVIDIA RTX 3090",
629
- memory_gb=24,
630
- compute_capability=8.6,
631
- fp16_tflops=142,
632
- fp32_tflops=35.6,
633
- memory_bandwidth_gbps=936,
634
- cost_per_hour=0.89,
635
- availability_score=0.95,
636
- providers=["runpod", "vast", "lambda"],
637
- training_efficiency=0.9,
638
- power_efficiency=0.8,
639
- is_recommended=False,
640
- description="Previous generation high-memory consumer GPU"
641
- ),
642
- GPUSpec(
643
- name="NVIDIA RTX 4080",
644
- memory_gb=16,
645
- compute_capability=8.9,
646
- fp16_tflops=120,
647
- fp32_tflops=48.7,
648
- memory_bandwidth_gbps=716,
649
- cost_per_hour=0.95,
650
- availability_score=0.85,
651
- providers=["runpod", "vast"],
652
- training_efficiency=1.0,
653
- power_efficiency=1.0,
654
- is_recommended=False,
655
- description="Mid-range modern GPU for smaller models"
656
- ),
657
- GPUSpec(
658
- name="NVIDIA RTX 3080",
659
- memory_gb=10,
660
- compute_capability=8.6,
661
- fp16_tflops=119,
662
- fp32_tflops=29.8,
663
- memory_bandwidth_gbps=760,
664
- cost_per_hour=0.55,
665
- availability_score=0.9,
666
- providers=["runpod", "vast", "lambda"],
667
- training_efficiency=0.8,
668
- power_efficiency=0.8,
669
- is_recommended=False,
670
- description="Budget-friendly option for small models"
671
- )
672
- ]
673
-
674
- for gpu in gpus:
675
- self.gpus[gpu.name] = gpu
676
-
677
- def _add_default_cloud_providers(self) -> None:
678
- """Add default cloud provider specifications."""
679
- providers = [
680
- CloudProvider(
681
- name="runpod",
682
- regions=["US-East", "US-West", "EU-West"],
683
- available_gpus=["NVIDIA RTX A6000", "NVIDIA RTX 4090", "NVIDIA A100 40GB", "NVIDIA RTX 3090", "NVIDIA RTX 4080", "NVIDIA RTX 3080"],
684
- pricing_model="hourly",
685
- supports_spot_instances=True,
686
- supports_auto_scaling=False,
687
- supports_preemption=True,
688
- startup_time_minutes=2.0,
689
- network_performance="high",
690
- availability_score=0.95,
691
- storage_cost_per_gb_hour=0.0002,
692
- egress_cost_per_gb=0.02,
693
- description="Specialized GPU cloud for AI/ML workloads"
694
- ),
695
- CloudProvider(
696
- name="vast",
697
- regions=["Global"],
698
- available_gpus=["NVIDIA RTX A6000", "NVIDIA RTX 4090", "NVIDIA RTX 3090", "NVIDIA RTX 4080", "NVIDIA RTX 3080"],
699
- pricing_model="spot",
700
- supports_spot_instances=True,
701
- supports_auto_scaling=False,
702
- supports_preemption=True,
703
- startup_time_minutes=3.0,
704
- network_performance="standard",
705
- availability_score=0.85,
706
- storage_cost_per_gb_hour=0.0001,
707
- egress_cost_per_gb=0.01,
708
- description="Decentralized GPU marketplace with competitive pricing"
709
- ),
710
- CloudProvider(
711
- name="lambda",
712
- regions=["US-East", "US-West"],
713
- available_gpus=["NVIDIA RTX A6000", "NVIDIA RTX 3090", "NVIDIA RTX 3080"],
714
- pricing_model="hourly",
715
- supports_spot_instances=False,
716
- supports_auto_scaling=True,
717
- supports_preemption=False,
718
- startup_time_minutes=1.0,
719
- network_performance="high",
720
- availability_score=0.98,
721
- storage_cost_per_gb_hour=0.0003,
722
- egress_cost_per_gb=0.05,
723
- description="Premium GPU cloud with high reliability"
724
- ),
725
- CloudProvider(
726
- name="aws",
727
- regions=["us-east-1", "us-west-2", "eu-west-1"],
728
- available_gpus=["NVIDIA A100 40GB"],
729
- pricing_model="hourly",
730
- supports_spot_instances=True,
731
- supports_auto_scaling=True,
732
- supports_preemption=True,
733
- startup_time_minutes=5.0,
734
- network_performance="high",
735
- availability_score=0.99,
736
- storage_cost_per_gb_hour=0.0005,
737
- egress_cost_per_gb=0.09,
738
- description="Enterprise cloud with comprehensive services"
739
- ),
740
- CloudProvider(
741
- name="gcp",
742
- regions=["us-central1", "us-east1", "europe-west1"],
743
- available_gpus=["NVIDIA A100 40GB"],
744
- pricing_model="hourly",
745
- supports_spot_instances=True,
746
- supports_auto_scaling=True,
747
- supports_preemption=True,
748
- startup_time_minutes=4.0,
749
- network_performance="high",
750
- availability_score=0.99,
751
- storage_cost_per_gb_hour=0.0004,
752
- egress_cost_per_gb=0.08,
753
- description="Google's cloud platform with AI/ML focus"
754
- )
755
- ]
756
-
757
- for provider in providers:
758
- self.cloud_providers[provider.name] = provider
759
-
760
- def _load_resource_data(self) -> None:
761
- """Load resource data from disk."""
762
- try:
763
- self._load_gpus()
764
- self._load_cloud_providers()
765
- except Exception as e:
766
- logger.warning(f"Failed to load resource data: {e}")
767
-
768
- def _save_resource_data(self) -> None:
769
- """Save resource data to disk."""
770
- try:
771
- self._save_gpus()
772
- self._save_cloud_providers()
773
- except Exception as e:
774
- logger.error(f"Failed to save resource data: {e}")
775
-
776
- def _load_gpus(self) -> None:
777
- """Load GPU data from disk."""
778
- gpus_file = os.path.join(self.data_dir, "gpus.json")
779
- if os.path.exists(gpus_file):
780
- with open(gpus_file, 'r') as f:
781
- data = json.load(f)
782
- for name, gpu_data in data.items():
783
- self.gpus[name] = GPUSpec(**gpu_data)
784
-
785
- def _save_gpus(self) -> None:
786
- """Save GPU data to disk."""
787
- gpus_file = os.path.join(self.data_dir, "gpus.json")
788
- with open(gpus_file, 'w') as f:
789
- from dataclasses import asdict
790
- data = {name: asdict(gpu) for name, gpu in self.gpus.items()}
791
- json.dump(data, f, indent=2)
792
-
793
- def _load_cloud_providers(self) -> None:
794
- """Load cloud provider data from disk."""
795
- providers_file = os.path.join(self.data_dir, "cloud_providers.json")
796
- if os.path.exists(providers_file):
797
- with open(providers_file, 'r') as f:
798
- data = json.load(f)
799
- for name, provider_data in data.items():
800
- self.cloud_providers[name] = CloudProvider(**provider_data)
801
-
802
- def _save_cloud_providers(self) -> None:
803
- """Save cloud provider data to disk."""
804
- providers_file = os.path.join(self.data_dir, "cloud_providers.json")
805
- with open(providers_file, 'w') as f:
806
- from dataclasses import asdict
807
- data = {name: asdict(provider) for name, provider in self.cloud_providers.items()}
808
- json.dump(data, f, indent=2)
809
-
810
- def get_available_gpus(self) -> List[str]:
811
- """Get list of available GPU types."""
812
- return list(self.gpus.keys())
813
-
814
- def get_available_providers(self) -> List[str]:
815
- """Get list of available cloud providers."""
816
- return list(self.cloud_providers.keys())
817
-
818
- def estimate_cost(self, gpu_name: str, provider_name: str, hours: float) -> float:
819
- """Estimate cost for specific GPU and provider."""
820
- if gpu_name in self.gpus and provider_name in self.cloud_providers:
821
- gpu_spec = self.gpus[gpu_name]
822
- provider_spec = self.cloud_providers[provider_name]
823
-
824
- compute_cost = gpu_spec.cost_per_hour * hours
825
- storage_cost = provider_spec.storage_cost_per_gb_hour * 100 * hours # Assume 100GB
826
-
827
- return compute_cost + storage_cost
828
-
829
- return 0.0
830
-
831
- def get_statistics(self) -> Dict[str, Any]:
832
- """Get resource optimizer statistics."""
833
- return {
834
- "total_gpus": len(self.gpus),
835
- "total_providers": len(self.cloud_providers),
836
- "avg_gpu_memory": sum(gpu.memory_gb for gpu in self.gpus.values()) / len(self.gpus) if self.gpus else 0,
837
- "avg_cost_per_hour": sum(gpu.cost_per_hour for gpu in self.gpus.values()) / len(self.gpus) if self.gpus else 0,
838
- "recommended_gpus": len([gpu for gpu in self.gpus.values() if gpu.is_recommended])
839
- }