isa-model 0.4.0__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +40 -17
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/storage/hf_storage.py +1 -1
  26. isa_model/core/types.py +1 -0
  27. isa_model/deployment/__init__.py +5 -48
  28. isa_model/deployment/core/__init__.py +2 -31
  29. isa_model/deployment/core/deployment_manager.py +1278 -370
  30. isa_model/deployment/local/__init__.py +31 -0
  31. isa_model/deployment/local/config.py +248 -0
  32. isa_model/deployment/local/gpu_gateway.py +607 -0
  33. isa_model/deployment/local/health_checker.py +428 -0
  34. isa_model/deployment/local/provider.py +586 -0
  35. isa_model/deployment/local/tensorrt_service.py +621 -0
  36. isa_model/deployment/local/transformers_service.py +644 -0
  37. isa_model/deployment/local/vllm_service.py +527 -0
  38. isa_model/deployment/modal/__init__.py +8 -0
  39. isa_model/deployment/modal/config.py +136 -0
  40. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  41. isa_model/deployment/modal/services/__init__.py +3 -0
  42. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  43. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  44. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  45. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  46. isa_model/deployment/modal/services/video/__init__.py +1 -0
  47. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  48. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  49. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  50. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  51. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  52. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  53. isa_model/deployment/storage/__init__.py +5 -0
  54. isa_model/deployment/storage/deployment_repository.py +824 -0
  55. isa_model/deployment/triton/__init__.py +10 -0
  56. isa_model/deployment/triton/config.py +196 -0
  57. isa_model/deployment/triton/configs/__init__.py +1 -0
  58. isa_model/deployment/triton/provider.py +512 -0
  59. isa_model/deployment/triton/scripts/__init__.py +1 -0
  60. isa_model/deployment/triton/templates/__init__.py +1 -0
  61. isa_model/inference/__init__.py +47 -1
  62. isa_model/inference/ai_factory.py +137 -10
  63. isa_model/inference/legacy_services/__init__.py +21 -0
  64. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  65. isa_model/inference/legacy_services/model_service.py +573 -0
  66. isa_model/inference/legacy_services/model_serving.py +717 -0
  67. isa_model/inference/legacy_services/model_training.py +561 -0
  68. isa_model/inference/models/__init__.py +21 -0
  69. isa_model/inference/models/inference_config.py +551 -0
  70. isa_model/inference/models/inference_record.py +675 -0
  71. isa_model/inference/models/performance_models.py +714 -0
  72. isa_model/inference/repositories/__init__.py +9 -0
  73. isa_model/inference/repositories/inference_repository.py +828 -0
  74. isa_model/inference/services/audio/base_stt_service.py +184 -11
  75. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  76. isa_model/inference/services/custom_model_manager.py +277 -0
  77. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  78. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  79. isa_model/inference/services/llm/__init__.py +10 -2
  80. isa_model/inference/services/llm/base_llm_service.py +335 -24
  81. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  82. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  83. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  84. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  85. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  86. isa_model/inference/services/llm/local_llm_service.py +747 -0
  87. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  88. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  89. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  90. isa_model/inference/services/vision/__init__.py +22 -1
  91. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  92. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  93. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  94. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  95. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  96. isa_model/serving/api/cache_manager.py +245 -0
  97. isa_model/serving/api/dependencies/__init__.py +1 -0
  98. isa_model/serving/api/dependencies/auth.py +194 -0
  99. isa_model/serving/api/dependencies/database.py +139 -0
  100. isa_model/serving/api/error_handlers.py +284 -0
  101. isa_model/serving/api/fastapi_server.py +172 -22
  102. isa_model/serving/api/middleware/auth.py +8 -2
  103. isa_model/serving/api/middleware/security.py +23 -33
  104. isa_model/serving/api/middleware/tenant_context.py +414 -0
  105. isa_model/serving/api/routes/analytics.py +4 -1
  106. isa_model/serving/api/routes/config.py +645 -0
  107. isa_model/serving/api/routes/deployment_billing.py +315 -0
  108. isa_model/serving/api/routes/deployments.py +138 -2
  109. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  110. isa_model/serving/api/routes/health.py +32 -12
  111. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  112. isa_model/serving/api/routes/local_deployments.py +448 -0
  113. isa_model/serving/api/routes/tenants.py +575 -0
  114. isa_model/serving/api/routes/unified.py +680 -18
  115. isa_model/serving/api/routes/webhooks.py +479 -0
  116. isa_model/serving/api/startup.py +68 -54
  117. isa_model/utils/gpu_utils.py +311 -0
  118. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/METADATA +66 -24
  119. isa_model-0.4.3.dist-info/RECORD +193 -0
  120. isa_model/core/storage/minio_storage.py +0 -0
  121. isa_model/deployment/cloud/__init__.py +0 -9
  122. isa_model/deployment/cloud/modal/__init__.py +0 -10
  123. isa_model/deployment/core/deployment_config.py +0 -356
  124. isa_model/deployment/core/isa_deployment_service.py +0 -401
  125. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  126. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  127. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  128. isa_model/deployment/runtime/deployed_service.py +0 -338
  129. isa_model/deployment/services/__init__.py +0 -9
  130. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  131. isa_model/deployment/services/model_service.py +0 -332
  132. isa_model/deployment/services/service_monitor.py +0 -356
  133. isa_model/deployment/services/service_registry.py +0 -527
  134. isa_model/eval/__init__.py +0 -92
  135. isa_model/eval/benchmarks/__init__.py +0 -27
  136. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  137. isa_model/eval/benchmarks.py +0 -701
  138. isa_model/eval/config/__init__.py +0 -10
  139. isa_model/eval/config/evaluation_config.py +0 -108
  140. isa_model/eval/evaluators/__init__.py +0 -24
  141. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  142. isa_model/eval/evaluators/base_evaluator.py +0 -503
  143. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  144. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  145. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  146. isa_model/eval/example_evaluation.py +0 -395
  147. isa_model/eval/factory.py +0 -798
  148. isa_model/eval/infrastructure/__init__.py +0 -24
  149. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  150. isa_model/eval/isa_benchmarks.py +0 -700
  151. isa_model/eval/isa_integration.py +0 -582
  152. isa_model/eval/metrics.py +0 -951
  153. isa_model/eval/tests/unit/test_basic.py +0 -396
  154. isa_model/serving/api/routes/evaluations.py +0 -579
  155. isa_model/training/__init__.py +0 -168
  156. isa_model/training/annotation/annotation_schema.py +0 -47
  157. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  158. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  159. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  160. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  161. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  162. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  163. isa_model/training/annotation/views/annotation_controller.py +0 -158
  164. isa_model/training/cloud/__init__.py +0 -22
  165. isa_model/training/cloud/job_orchestrator.py +0 -402
  166. isa_model/training/cloud/runpod_trainer.py +0 -454
  167. isa_model/training/cloud/storage_manager.py +0 -482
  168. isa_model/training/core/__init__.py +0 -26
  169. isa_model/training/core/config.py +0 -181
  170. isa_model/training/core/dataset.py +0 -222
  171. isa_model/training/core/trainer.py +0 -720
  172. isa_model/training/core/utils.py +0 -213
  173. isa_model/training/examples/intelligent_training_example.py +0 -281
  174. isa_model/training/factory.py +0 -424
  175. isa_model/training/intelligent/__init__.py +0 -25
  176. isa_model/training/intelligent/decision_engine.py +0 -643
  177. isa_model/training/intelligent/intelligent_factory.py +0 -888
  178. isa_model/training/intelligent/knowledge_base.py +0 -751
  179. isa_model/training/intelligent/resource_optimizer.py +0 -839
  180. isa_model/training/intelligent/task_classifier.py +0 -576
  181. isa_model/training/storage/__init__.py +0 -24
  182. isa_model/training/storage/core_integration.py +0 -439
  183. isa_model/training/storage/training_repository.py +0 -552
  184. isa_model/training/storage/training_storage.py +0 -628
  185. isa_model-0.4.0.dist-info/RECORD +0 -182
  186. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  187. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  188. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  189. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  190. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  191. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  192. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  193. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  194. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  195. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  196. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  197. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  198. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  199. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -12,11 +12,9 @@ import logging
12
12
  import asyncio
13
13
  from typing import Dict, Any
14
14
  import json
15
- import psycopg2
16
15
  import os
17
16
 
18
17
  from ...core.config.config_manager import ConfigManager
19
- from ...core.database.migrations import run_environment_migrations
20
18
  from ...core.models.model_repo import ModelRegistry
21
19
  from ...core.types import ModelType, ModelCapability
22
20
 
@@ -27,67 +25,59 @@ class StartupInitializer:
27
25
 
28
26
  def __init__(self):
29
27
  self.config_manager = ConfigManager()
28
+ self._embedding_service = None
29
+ self._model_registry = None
30
30
 
31
31
  async def initialize_system(self):
32
32
  """Run complete system initialization"""
33
- logger.info("🚀 Starting ISA Model system initialization...")
34
-
33
+ print("🚀 Starting ISA Model system initialization...")
34
+
35
35
  try:
36
- # 1. Run database migrations
37
- await self._run_migrations()
38
-
39
- # 2. Populate model registry
36
+ # 1. Populate model registry
40
37
  await self._populate_models()
41
-
42
- # 3. Generate embeddings
38
+
39
+ # 2. Generate embeddings
43
40
  await self._generate_embeddings()
44
-
45
- # 4. Validate system
41
+
42
+ # 3. Validate system
46
43
  await self._validate_system()
47
-
48
- logger.info("✅ System initialization completed successfully!")
49
-
44
+
45
+ print("✅ System initialization completed successfully!")
46
+
50
47
  except Exception as e:
51
48
  logger.error(f"❌ System initialization failed: {e}")
52
49
  raise
53
50
 
54
- async def _run_migrations(self):
55
- """Run database migrations"""
56
- logger.info("📋 Running database migrations...")
57
-
58
- try:
59
- success = run_environment_migrations()
60
- if success:
61
- logger.info("✅ Database migrations completed")
62
- else:
63
- raise Exception("Database migrations failed")
64
- except Exception as e:
65
- logger.error(f"❌ Migration error: {e}")
66
- raise
67
-
68
51
  async def _populate_models(self):
69
52
  """Populate model registry with all configured models"""
70
- logger.info("📚 Populating model registry...")
53
+ print("📚 Populating model registry...")
71
54
 
72
55
  try:
73
56
  registry = ModelRegistry()
57
+ self._model_registry = registry # Track for cleanup
58
+
59
+ # Check if models are already populated to avoid unnecessary database operations
60
+ try:
61
+ stats = registry.get_stats()
62
+ if stats and stats.get('total_models', 0) > 0:
63
+ print(f"✅ Model registry already populated: {stats['total_models']} models")
64
+ return
65
+ except Exception as e:
66
+ print(f"⚠️ Could not check existing models, proceeding with population: {e}")
74
67
 
75
68
  # Get all configured models
76
69
  all_models = self.config_manager.model_definitions
77
70
 
78
71
  if not all_models:
79
- logger.warning("⚠️ No models configured in providers")
72
+ print("⚠️ No models configured in providers")
80
73
  return
81
74
 
82
75
  registered_count = 0
83
76
 
84
77
  for model_id, model_data in all_models.items():
85
78
  try:
86
- # Check if already registered
87
- existing = registry.get_model_info(model_id)
88
- if existing:
89
- logger.debug(f"Model {model_id} already registered, skipping")
90
- continue
79
+ # Skip individual model check to avoid multiple database queries
80
+ # We already checked if any models exist above
91
81
 
92
82
  # Map model type
93
83
  model_type_str = model_data.get('type', 'llm')
@@ -110,7 +100,6 @@ class StartupInitializer:
110
100
 
111
101
  if success:
112
102
  registered_count += 1
113
- logger.debug(f"Registered {model_id} ({provider})")
114
103
  else:
115
104
  logger.warning(f"Failed to register {model_id}")
116
105
 
@@ -118,7 +107,7 @@ class StartupInitializer:
118
107
  logger.error(f"Error registering {model_id}: {e}")
119
108
  continue
120
109
 
121
- logger.info(f"✅ Model registry populated: {registered_count}/{len(all_models)} models")
110
+ print(f"✅ Model registry populated: {registered_count}/{len(all_models)} models")
122
111
 
123
112
  except Exception as e:
124
113
  logger.error(f"❌ Model population error: {e}")
@@ -126,16 +115,17 @@ class StartupInitializer:
126
115
 
127
116
  async def _generate_embeddings(self):
128
117
  """Generate embeddings for all registered models using OpenAI embedding service"""
129
- logger.info("🧠 Generating model embeddings with OpenAI embedding service...")
118
+ print("🧠 Generating model embeddings...")
130
119
 
131
120
  try:
132
121
  # Initialize embedding service
133
122
  from ...inference.ai_factory import AIFactory
134
123
  factory = AIFactory.get_instance()
135
124
  embedding_service = factory.get_embed("text-embedding-3-small", "openai")
125
+ self._embedding_service = embedding_service # Track for cleanup
136
126
 
137
127
  if not embedding_service:
138
- logger.warning("⚠️ Could not initialize embedding service, skipping embedding generation")
128
+ print("⚠️ Could not initialize embedding service, skipping embedding generation")
139
129
  return
140
130
 
141
131
  # Get all registered models
@@ -143,7 +133,7 @@ class StartupInitializer:
143
133
  models = registry.list_models()
144
134
 
145
135
  if not models:
146
- logger.warning("⚠️ No models found in registry")
136
+ print("⚠️ No models found in registry")
147
137
  return
148
138
 
149
139
  # Check existing embeddings using Supabase client
@@ -151,15 +141,12 @@ class StartupInitializer:
151
141
  existing_result = supabase_client.table("model_embeddings").select("model_id").execute()
152
142
  existing_embeddings = {row['model_id'] for row in existing_result.data}
153
143
 
154
- logger.info(f"Found {len(existing_embeddings)} existing embeddings")
155
-
156
144
  processed = 0
157
145
 
158
146
  for model_id, model_data in models.items():
159
147
  try:
160
148
  # Skip if embedding already exists
161
149
  if model_id in existing_embeddings:
162
- logger.debug(f"Embedding already exists for {model_id}, skipping")
163
150
  continue
164
151
 
165
152
  provider = model_data.get('provider', 'unknown')
@@ -178,7 +165,6 @@ class StartupInitializer:
178
165
  search_text += f"Specialized for: {', '.join(specialized_tasks)}"
179
166
 
180
167
  # Generate embedding using OpenAI service
181
- logger.debug(f"Generating embedding for {model_id}...")
182
168
  embedding = await embedding_service.create_text_embedding(search_text)
183
169
 
184
170
  # Store embedding in database
@@ -193,7 +179,6 @@ class StartupInitializer:
193
179
 
194
180
  if result.data:
195
181
  processed += 1
196
- logger.debug(f"Stored embedding for {model_id}")
197
182
  else:
198
183
  logger.warning(f"Failed to store embedding for {model_id}")
199
184
 
@@ -201,7 +186,7 @@ class StartupInitializer:
201
186
  logger.error(f"Error creating embedding for {model_id}: {e}")
202
187
  continue
203
188
 
204
- logger.info(f"✅ Generated {processed}/{len(models)} new embeddings")
189
+ print(f"✅ Generated {processed}/{len(models)} new embeddings")
205
190
 
206
191
  # Close embedding service
207
192
  await embedding_service.close()
@@ -212,16 +197,16 @@ class StartupInitializer:
212
197
 
213
198
  async def _validate_system(self):
214
199
  """Validate system is working correctly"""
215
- logger.info("🔍 Validating system...")
200
+ print("🔍 Validating system...")
216
201
 
217
202
  try:
218
203
  registry = ModelRegistry()
219
204
  stats = registry.get_stats()
220
205
 
221
- logger.info(f"📊 System validation results:")
222
- logger.info(f" Models: {stats['total_models']}")
223
- logger.info(f" By type: {stats['models_by_type']}")
224
- logger.info(f" By capability: {stats['models_by_capability']}")
206
+ print(f"📊 System validation results:")
207
+ print(f" Models: {stats['total_models']}")
208
+ print(f" By type: {stats['models_by_type']}")
209
+ print(f" By capability: {stats['models_by_capability']}")
225
210
 
226
211
  if stats['total_models'] == 0:
227
212
  raise Exception("No models found in registry")
@@ -233,12 +218,12 @@ class StartupInitializer:
233
218
 
234
219
  # Test basic functionality
235
220
  available_models = await selector.get_available_models()
236
- logger.info(f" Available models for selection: {len(available_models)}")
221
+ print(f" Available models for selection: {len(available_models)}")
237
222
 
238
223
  except Exception as e:
239
224
  logger.warning(f"⚠️ Intelligent selector initialization failed: {e}")
240
225
 
241
- logger.info("✅ System validation completed")
226
+ print("✅ System validation completed")
242
227
 
243
228
  except Exception as e:
244
229
  logger.error(f"❌ System validation error: {e}")
@@ -294,6 +279,35 @@ class StartupInitializer:
294
279
 
295
280
  return result
296
281
 
282
+ async def cleanup(self):
283
+ """Clean up startup resources"""
284
+ logger.info("🧹 Starting startup initializer cleanup...")
285
+
286
+ try:
287
+ # Clean up any persistent connections or resources
288
+ # Most cleanup is handled by individual services, but we can do some general cleanup here
289
+
290
+ # If we have any cached embedding services, clean them up
291
+ if hasattr(self, '_embedding_service') and self._embedding_service:
292
+ try:
293
+ await self._embedding_service.close()
294
+ logger.info("✅ Embedding service closed")
295
+ except Exception as e:
296
+ logger.error(f"❌ Error closing embedding service: {e}")
297
+
298
+ # Clean up model registry connections if needed
299
+ if hasattr(self, '_model_registry'):
300
+ try:
301
+ # ModelRegistry doesn't need explicit cleanup currently
302
+ # but this is where we'd add it if needed
303
+ pass
304
+ except Exception as e:
305
+ logger.error(f"❌ Error cleaning up model registry: {e}")
306
+
307
+ logger.info("✅ Startup initializer cleanup completed")
308
+
309
+ except Exception as e:
310
+ logger.error(f"❌ Error during startup cleanup: {e}")
297
311
 
298
312
 
299
313
  # Global initializer instance
@@ -0,0 +1,311 @@
1
+ """
2
+ GPU detection and resource management utilities
3
+
4
+ Provides functions for detecting and managing local GPU resources.
5
+ """
6
+
7
+ import os
8
+ import logging
9
+ import subprocess
10
+ from typing import Dict, List, Optional, Any, Tuple
11
+ from dataclasses import dataclass
12
+ import platform
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ @dataclass
18
+ class GPUInfo:
19
+ """GPU information structure"""
20
+ gpu_id: int
21
+ name: str
22
+ memory_total: int # MB
23
+ memory_free: int # MB
24
+ memory_used: int # MB
25
+ utilization: float # %
26
+ temperature: Optional[int] = None # Celsius
27
+ power_draw: Optional[float] = None # Watts
28
+ driver_version: Optional[str] = None
29
+ cuda_version: Optional[str] = None
30
+
31
+
32
+ class GPUManager:
33
+ """Local GPU resource manager"""
34
+
35
+ def __init__(self):
36
+ self.gpus: List[GPUInfo] = []
37
+ self.cuda_available = False
38
+ self.nvidia_smi_available = False
39
+ self._initialize()
40
+
41
+ def _initialize(self):
42
+ """Initialize GPU detection"""
43
+ self.cuda_available = self._check_cuda_availability()
44
+ self.nvidia_smi_available = self._check_nvidia_smi()
45
+
46
+ if self.nvidia_smi_available:
47
+ self.gpus = self._detect_nvidia_gpus()
48
+ elif self.cuda_available:
49
+ self.gpus = self._detect_cuda_gpus_fallback()
50
+ else:
51
+ logger.warning("No CUDA-capable GPUs detected")
52
+
53
+ def _check_cuda_availability(self) -> bool:
54
+ """Check if CUDA is available through PyTorch"""
55
+ try:
56
+ import torch
57
+ available = torch.cuda.is_available()
58
+ if available:
59
+ logger.info(f"CUDA detected: {torch.cuda.device_count()} devices")
60
+ logger.info(f"CUDA version: {torch.version.cuda}")
61
+ return available
62
+ except ImportError:
63
+ logger.warning("PyTorch not available for CUDA detection")
64
+ return False
65
+ except Exception as e:
66
+ logger.warning(f"CUDA detection failed: {e}")
67
+ return False
68
+
69
+ def _check_nvidia_smi(self) -> bool:
70
+ """Check if nvidia-smi is available"""
71
+ try:
72
+ result = subprocess.run(['nvidia-smi', '--version'],
73
+ capture_output=True, text=True, timeout=5)
74
+ return result.returncode == 0
75
+ except (subprocess.TimeoutExpired, FileNotFoundError, subprocess.SubprocessError):
76
+ return False
77
+
78
+ def _detect_nvidia_gpus(self) -> List[GPUInfo]:
79
+ """Detect GPUs using nvidia-smi"""
80
+ gpus = []
81
+
82
+ try:
83
+ # Get GPU information using nvidia-smi
84
+ cmd = [
85
+ 'nvidia-smi',
86
+ '--query-gpu=index,name,memory.total,memory.free,memory.used,utilization.gpu,temperature.gpu,power.draw,driver_version',
87
+ '--format=csv,noheader,nounits'
88
+ ]
89
+
90
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
91
+
92
+ if result.returncode == 0:
93
+ lines = result.stdout.strip().split('\n')
94
+ for line in lines:
95
+ if line.strip():
96
+ parts = [p.strip() for p in line.split(',')]
97
+ if len(parts) >= 7:
98
+ gpu_info = GPUInfo(
99
+ gpu_id=int(parts[0]),
100
+ name=parts[1],
101
+ memory_total=int(parts[2]),
102
+ memory_free=int(parts[3]),
103
+ memory_used=int(parts[4]),
104
+ utilization=float(parts[5]),
105
+ temperature=int(parts[6]) if parts[6] != '[Not Supported]' else None,
106
+ power_draw=float(parts[7]) if len(parts) > 7 and parts[7] != '[Not Supported]' else None,
107
+ driver_version=parts[8] if len(parts) > 8 else None
108
+ )
109
+ gpus.append(gpu_info)
110
+
111
+ # Get CUDA version
112
+ try:
113
+ cuda_result = subprocess.run(['nvcc', '--version'],
114
+ capture_output=True, text=True, timeout=5)
115
+ if cuda_result.returncode == 0:
116
+ for line in cuda_result.stdout.split('\n'):
117
+ if 'release' in line.lower():
118
+ cuda_version = line.split()[-1].rstrip(',')
119
+ for gpu in gpus:
120
+ gpu.cuda_version = cuda_version
121
+ break
122
+ except (subprocess.TimeoutExpired, FileNotFoundError):
123
+ pass
124
+
125
+ except Exception as e:
126
+ logger.error(f"Failed to detect GPUs with nvidia-smi: {e}")
127
+
128
+ return gpus
129
+
130
+ def _detect_cuda_gpus_fallback(self) -> List[GPUInfo]:
131
+ """Fallback GPU detection using PyTorch"""
132
+ gpus = []
133
+
134
+ try:
135
+ import torch
136
+ if torch.cuda.is_available():
137
+ for i in range(torch.cuda.device_count()):
138
+ props = torch.cuda.get_device_properties(i)
139
+
140
+ # Get memory info
141
+ torch.cuda.set_device(i)
142
+ memory_total = torch.cuda.get_device_properties(i).total_memory // (1024**2) # MB
143
+ memory_free = (torch.cuda.get_device_properties(i).total_memory - torch.cuda.memory_allocated(i)) // (1024**2)
144
+ memory_used = torch.cuda.memory_allocated(i) // (1024**2)
145
+
146
+ gpu_info = GPUInfo(
147
+ gpu_id=i,
148
+ name=props.name,
149
+ memory_total=memory_total,
150
+ memory_free=memory_free,
151
+ memory_used=memory_used,
152
+ utilization=0.0, # Cannot get utilization without nvidia-smi
153
+ cuda_version=torch.version.cuda
154
+ )
155
+ gpus.append(gpu_info)
156
+
157
+ except Exception as e:
158
+ logger.error(f"Failed to detect GPUs with PyTorch: {e}")
159
+
160
+ return gpus
161
+
162
+ def get_gpu_info(self, gpu_id: Optional[int] = None) -> Optional[GPUInfo]:
163
+ """Get information for a specific GPU or best available GPU"""
164
+ if not self.gpus:
165
+ return None
166
+
167
+ if gpu_id is not None:
168
+ for gpu in self.gpus:
169
+ if gpu.gpu_id == gpu_id:
170
+ return gpu
171
+ return None
172
+
173
+ # Return GPU with most free memory
174
+ return max(self.gpus, key=lambda x: x.memory_free)
175
+
176
+ def get_best_gpu(self, min_memory_mb: int = 1024) -> Optional[GPUInfo]:
177
+ """Get the best available GPU for model deployment"""
178
+ available_gpus = [gpu for gpu in self.gpus if gpu.memory_free >= min_memory_mb]
179
+
180
+ if not available_gpus:
181
+ return None
182
+
183
+ # Sort by free memory (descending) and utilization (ascending)
184
+ return sorted(available_gpus,
185
+ key=lambda x: (-x.memory_free, x.utilization))[0]
186
+
187
+ def estimate_model_memory(self, model_id: str, precision: str = "float16") -> int:
188
+ """Estimate memory requirements for a model in MB"""
189
+ # Simple estimation based on model name and precision
190
+ memory_multipliers = {
191
+ "float32": 4,
192
+ "float16": 2,
193
+ "int8": 1,
194
+ "int4": 0.5
195
+ }
196
+
197
+ multiplier = memory_multipliers.get(precision, 2)
198
+
199
+ # Rough parameter estimates based on model names
200
+ if "7b" in model_id.lower():
201
+ params = 7_000_000_000
202
+ elif "13b" in model_id.lower():
203
+ params = 13_000_000_000
204
+ elif "70b" in model_id.lower():
205
+ params = 70_000_000_000
206
+ elif "large" in model_id.lower():
207
+ params = 1_000_000_000
208
+ elif "medium" in model_id.lower():
209
+ params = 350_000_000
210
+ elif "small" in model_id.lower():
211
+ params = 125_000_000
212
+ else:
213
+ params = 500_000_000 # Default estimate
214
+
215
+ # Memory = parameters * bytes_per_param + overhead
216
+ estimated_mb = int((params * multiplier + 1024**3) / (1024**2)) # +1GB overhead
217
+
218
+ return estimated_mb
219
+
220
+ def check_gpu_compatibility(self, model_id: str, precision: str = "float16") -> Tuple[bool, List[str]]:
221
+ """Check if local GPUs can handle the model"""
222
+ warnings = []
223
+
224
+ if not self.gpus:
225
+ return False, ["No CUDA-capable GPUs detected"]
226
+
227
+ estimated_memory = self.estimate_model_memory(model_id, precision)
228
+ best_gpu = self.get_best_gpu(estimated_memory)
229
+
230
+ if not best_gpu:
231
+ warnings.append(f"Insufficient GPU memory. Required: {estimated_memory}MB, Available: {max(gpu.memory_free for gpu in self.gpus)}MB")
232
+ return False, warnings
233
+
234
+ # Check compute capability for advanced features
235
+ if precision in ["int8", "int4"]:
236
+ warnings.append("Quantized precision may require specific GPU compute capability")
237
+
238
+ return True, warnings
239
+
240
+ def refresh(self):
241
+ """Refresh GPU information"""
242
+ if self.nvidia_smi_available:
243
+ self.gpus = self._detect_nvidia_gpus()
244
+ elif self.cuda_available:
245
+ self.gpus = self._detect_cuda_gpus_fallback()
246
+
247
+ def get_system_info(self) -> Dict[str, Any]:
248
+ """Get comprehensive system information"""
249
+ info = {
250
+ "platform": platform.system(),
251
+ "architecture": platform.machine(),
252
+ "cuda_available": self.cuda_available,
253
+ "nvidia_smi_available": self.nvidia_smi_available,
254
+ "gpu_count": len(self.gpus),
255
+ "gpus": [
256
+ {
257
+ "id": gpu.gpu_id,
258
+ "name": gpu.name,
259
+ "memory_total_mb": gpu.memory_total,
260
+ "memory_free_mb": gpu.memory_free,
261
+ "memory_used_mb": gpu.memory_used,
262
+ "utilization_percent": gpu.utilization,
263
+ "temperature_c": gpu.temperature,
264
+ "power_draw_w": gpu.power_draw,
265
+ "driver_version": gpu.driver_version,
266
+ "cuda_version": gpu.cuda_version
267
+ }
268
+ for gpu in self.gpus
269
+ ]
270
+ }
271
+
272
+ # Add Python environment info
273
+ try:
274
+ import torch
275
+ info["torch_version"] = torch.__version__
276
+ info["torch_cuda_version"] = torch.version.cuda
277
+ except ImportError:
278
+ pass
279
+
280
+ return info
281
+
282
+
283
+ # Global GPU manager instance
284
+ _gpu_manager = None
285
+
286
+ def get_gpu_manager() -> GPUManager:
287
+ """Get global GPU manager instance"""
288
+ global _gpu_manager
289
+ if _gpu_manager is None:
290
+ _gpu_manager = GPUManager()
291
+ return _gpu_manager
292
+
293
+
294
+ def detect_gpus() -> List[GPUInfo]:
295
+ """Convenience function to detect GPUs"""
296
+ return get_gpu_manager().gpus
297
+
298
+
299
+ def get_best_gpu(min_memory_mb: int = 1024) -> Optional[GPUInfo]:
300
+ """Convenience function to get best available GPU"""
301
+ return get_gpu_manager().get_best_gpu(min_memory_mb)
302
+
303
+
304
+ def check_cuda_availability() -> bool:
305
+ """Check if CUDA is available"""
306
+ return get_gpu_manager().cuda_available
307
+
308
+
309
+ def estimate_model_memory(model_id: str, precision: str = "float16") -> int:
310
+ """Estimate model memory requirements"""
311
+ return get_gpu_manager().estimate_model_memory(model_id, precision)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: isa_model
3
- Version: 0.4.0
3
+ Version: 0.4.3
4
4
  Summary: Unified AI model serving framework
5
5
  Author: isA_Model Contributors
6
6
  Classifier: Development Status :: 3 - Alpha
@@ -10,45 +10,87 @@ Classifier: Programming Language :: Python :: 3
10
10
  Requires-Python: >=3.8
11
11
  Description-Content-Type: text/markdown
12
12
  Requires-Dist: fastapi>=0.95.0
13
- Requires-Dist: numpy>=1.20.0
14
- Requires-Dist: httpx>=0.23.0
15
- Requires-Dist: pydantic>=2.0.0
16
13
  Requires-Dist: uvicorn>=0.22.0
14
+ Requires-Dist: pydantic>=2.0.0
15
+ Requires-Dist: httpx>=0.23.0
17
16
  Requires-Dist: requests>=2.28.0
18
17
  Requires-Dist: aiohttp>=3.8.0
19
- Requires-Dist: transformers>=4.30.0
20
- Requires-Dist: langchain-core>=0.1.0
21
- Requires-Dist: huggingface-hub>=0.16.0
22
- Requires-Dist: kubernetes>=25.3.0
23
- Requires-Dist: mlflow>=2.4.0
24
- Requires-Dist: torch>=2.0.0
25
- Requires-Dist: openai>=1.10.0
26
- Requires-Dist: replicate>=0.23.0
27
18
  Requires-Dist: python-dotenv>=1.0.0
28
- Requires-Dist: ollama>=0.3.0
29
- Requires-Dist: runpod>=1.0.0
30
- Requires-Dist: boto3>=1.26.0
31
- Requires-Dist: google-cloud-storage>=2.7.0
32
- Requires-Dist: datasets>=2.10.0
33
- Requires-Dist: accelerate>=0.20.0
34
- Requires-Dist: bitsandbytes>=0.39.0
35
- Requires-Dist: peft>=0.4.0
36
- Requires-Dist: trl>=0.4.0
19
+ Requires-Dist: numpy>=1.20.0
37
20
  Requires-Dist: supabase>=2.0.0
38
- Requires-Dist: pgvector>=0.2.0
39
21
  Requires-Dist: psycopg2-binary>=2.9.0
40
22
  Requires-Dist: asyncpg>=0.28.0
41
23
  Requires-Dist: slowapi>=0.1.8
42
- Requires-Dist: redis>=4.5.0
43
24
  Requires-Dist: circuitbreaker>=1.3.2
44
- Requires-Dist: prometheus-fastapi-instrumentator>=6.1.0
45
25
  Requires-Dist: structlog>=23.1.0
26
+ Requires-Dist: psutil>=5.9.0
27
+ Requires-Dist: redis>=4.5.0
28
+ Requires-Dist: tenacity>=8.2.0
29
+ Provides-Extra: cloud
30
+ Requires-Dist: openai>=1.10.0; extra == "cloud"
31
+ Requires-Dist: replicate>=0.23.0; extra == "cloud"
32
+ Requires-Dist: cerebras-cloud-sdk>=1.0.0; extra == "cloud"
33
+ Requires-Dist: modal>=0.63.0; extra == "cloud"
34
+ Requires-Dist: grpclib>=0.4.7; extra == "cloud"
35
+ Requires-Dist: python-logging-loki>=0.3.1; extra == "cloud"
36
+ Requires-Dist: huggingface-hub>=0.16.0; extra == "cloud"
37
+ Requires-Dist: docker>=6.0.0; extra == "cloud"
38
+ Requires-Dist: influxdb-client>=1.36.0; extra == "cloud"
39
+ Requires-Dist: tiktoken>=0.5.0; extra == "cloud"
40
+ Provides-Extra: local
41
+ Requires-Dist: torch>=2.0.0; extra == "local"
42
+ Requires-Dist: transformers>=4.30.0; extra == "local"
43
+ Requires-Dist: accelerate>=0.20.0; extra == "local"
44
+ Requires-Dist: huggingface-hub>=0.16.0; extra == "local"
45
+ Requires-Dist: safetensors>=0.4.1; extra == "local"
46
+ Requires-Dist: sentencepiece>=0.1.99; extra == "local"
47
+ Provides-Extra: training
48
+ Requires-Dist: datasets>=2.10.0; extra == "training"
49
+ Requires-Dist: peft>=0.4.0; extra == "training"
50
+ Requires-Dist: trl>=0.4.0; extra == "training"
51
+ Requires-Dist: bitsandbytes>=0.39.0; extra == "training"
52
+ Provides-Extra: audio
53
+ Requires-Dist: librosa>=0.10.1; extra == "audio"
54
+ Requires-Dist: soundfile>=0.12.1; extra == "audio"
55
+ Requires-Dist: numba>=0.57.0; extra == "audio"
56
+ Provides-Extra: vision
57
+ Requires-Dist: Pillow>=10.0.1; extra == "vision"
58
+ Requires-Dist: torchvision>=0.15.2; extra == "vision"
59
+ Provides-Extra: langchain
60
+ Requires-Dist: langchain-core>=0.1.0; extra == "langchain"
61
+ Requires-Dist: langchain-openai>=0.0.2; extra == "langchain"
62
+ Provides-Extra: storage
63
+ Requires-Dist: boto3>=1.26.0; extra == "storage"
64
+ Requires-Dist: google-cloud-storage>=2.7.0; extra == "storage"
65
+ Provides-Extra: monitoring
66
+ Requires-Dist: mlflow>=2.4.0; extra == "monitoring"
67
+ Requires-Dist: redis>=4.5.0; extra == "monitoring"
68
+ Requires-Dist: prometheus-fastapi-instrumentator>=6.1.0; extra == "monitoring"
69
+ Requires-Dist: influxdb-client>=1.36.0; extra == "monitoring"
70
+ Requires-Dist: pgvector>=0.2.0; extra == "monitoring"
71
+ Requires-Dist: python-logging-loki>=0.3.1; extra == "monitoring"
72
+ Provides-Extra: k8s
73
+ Requires-Dist: kubernetes>=25.3.0; extra == "k8s"
74
+ Provides-Extra: gpu-cloud
75
+ Requires-Dist: runpod>=1.0.0; extra == "gpu-cloud"
76
+ Requires-Dist: ollama>=0.3.0; extra == "gpu-cloud"
46
77
  Provides-Extra: dev
47
78
  Requires-Dist: pytest>=7.0.0; extra == "dev"
48
79
  Requires-Dist: black>=22.0.0; extra == "dev"
49
80
  Requires-Dist: flake8>=4.0.0; extra == "dev"
50
81
  Requires-Dist: mypy>=0.991; extra == "dev"
51
82
  Requires-Dist: twine>=4.0.0; extra == "dev"
83
+ Provides-Extra: api-only
84
+ Requires-Dist: isa-model[cloud,langchain]; extra == "api-only"
85
+ Provides-Extra: full-local
86
+ Requires-Dist: isa-model[audio,langchain,local,training,vision]; extra == "full-local"
87
+ Provides-Extra: production
88
+ Requires-Dist: isa-model[cloud,k8s,monitoring,storage]; extra == "production"
89
+ Provides-Extra: staging
90
+ Requires-Dist: isa-model[cloud,langchain,monitoring,storage]; extra == "staging"
91
+ Requires-Dist: python-consul>=1.1.0; extra == "staging"
92
+ Provides-Extra: all
93
+ Requires-Dist: isa-model[audio,cloud,gpu-cloud,k8s,langchain,local,monitoring,storage,training,vision]; extra == "all"
52
94
 
53
95
  # isa_model_sdk - Unified AI Model Serving Framework
54
96