isa-model 0.4.0__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +466 -43
- isa_model/core/cache/redis_cache.py +12 -3
- isa_model/core/config/config_manager.py +230 -3
- isa_model/core/config.py +90 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +21 -1
- isa_model/core/database/supabase_client.py +154 -19
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +27 -18
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_manager.py +40 -17
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +174 -18
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/services/intelligent_model_selector.py +399 -21
- isa_model/core/storage/hf_storage.py +1 -1
- isa_model/core/types.py +1 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -370
- isa_model/deployment/local/__init__.py +31 -0
- isa_model/deployment/local/config.py +248 -0
- isa_model/deployment/local/gpu_gateway.py +607 -0
- isa_model/deployment/local/health_checker.py +428 -0
- isa_model/deployment/local/provider.py +586 -0
- isa_model/deployment/local/tensorrt_service.py +621 -0
- isa_model/deployment/local/transformers_service.py +644 -0
- isa_model/deployment/local/vllm_service.py +527 -0
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +137 -10
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/openai_stt_service.py +22 -6
- isa_model/inference/services/custom_model_manager.py +277 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +335 -24
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/local_llm_service.py +747 -0
- isa_model/inference/services/llm/ollama_llm_service.py +9 -2
- isa_model/inference/services/llm/openai_llm_service.py +33 -16
- isa_model/inference/services/llm/yyds_llm_service.py +8 -2
- isa_model/inference/services/vision/__init__.py +22 -1
- isa_model/inference/services/vision/blip_vision_service.py +359 -0
- isa_model/inference/services/vision/helpers/image_utils.py +8 -5
- isa_model/inference/services/vision/isa_vision_service.py +65 -4
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +172 -22
- isa_model/serving/api/middleware/auth.py +8 -2
- isa_model/serving/api/middleware/security.py +23 -33
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +4 -1
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +138 -2
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +680 -18
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +68 -54
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/METADATA +66 -24
- isa_model-0.4.3.dist-info/RECORD +193 -0
- isa_model/core/storage/minio_storage.py +0 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks/__init__.py +0 -27
- isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
- isa_model/eval/benchmarks.py +0 -701
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -24
- isa_model/eval/evaluators/audio_evaluator.py +0 -727
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/embedding_evaluator.py +0 -742
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/evaluators/vision_evaluator.py +0 -564
- isa_model/eval/example_evaluation.py +0 -395
- isa_model/eval/factory.py +0 -798
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/isa_benchmarks.py +0 -700
- isa_model/eval/isa_integration.py +0 -582
- isa_model/eval/metrics.py +0 -951
- isa_model/eval/tests/unit/test_basic.py +0 -396
- isa_model/serving/api/routes/evaluations.py +0 -579
- isa_model/training/__init__.py +0 -168
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -26
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/examples/intelligent_training_example.py +0 -281
- isa_model/training/factory.py +0 -424
- isa_model/training/intelligent/__init__.py +0 -25
- isa_model/training/intelligent/decision_engine.py +0 -643
- isa_model/training/intelligent/intelligent_factory.py +0 -888
- isa_model/training/intelligent/knowledge_base.py +0 -751
- isa_model/training/intelligent/resource_optimizer.py +0 -839
- isa_model/training/intelligent/task_classifier.py +0 -576
- isa_model/training/storage/__init__.py +0 -24
- isa_model/training/storage/core_integration.py +0 -439
- isa_model/training/storage/training_repository.py +0 -552
- isa_model/training/storage/training_storage.py +0 -628
- isa_model-0.4.0.dist-info/RECORD +0 -182
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
isa_model/serving/api/startup.py
CHANGED
@@ -12,11 +12,9 @@ import logging
|
|
12
12
|
import asyncio
|
13
13
|
from typing import Dict, Any
|
14
14
|
import json
|
15
|
-
import psycopg2
|
16
15
|
import os
|
17
16
|
|
18
17
|
from ...core.config.config_manager import ConfigManager
|
19
|
-
from ...core.database.migrations import run_environment_migrations
|
20
18
|
from ...core.models.model_repo import ModelRegistry
|
21
19
|
from ...core.types import ModelType, ModelCapability
|
22
20
|
|
@@ -27,67 +25,59 @@ class StartupInitializer:
|
|
27
25
|
|
28
26
|
def __init__(self):
|
29
27
|
self.config_manager = ConfigManager()
|
28
|
+
self._embedding_service = None
|
29
|
+
self._model_registry = None
|
30
30
|
|
31
31
|
async def initialize_system(self):
|
32
32
|
"""Run complete system initialization"""
|
33
|
-
|
34
|
-
|
33
|
+
print("🚀 Starting ISA Model system initialization...")
|
34
|
+
|
35
35
|
try:
|
36
|
-
# 1.
|
37
|
-
await self._run_migrations()
|
38
|
-
|
39
|
-
# 2. Populate model registry
|
36
|
+
# 1. Populate model registry
|
40
37
|
await self._populate_models()
|
41
|
-
|
42
|
-
#
|
38
|
+
|
39
|
+
# 2. Generate embeddings
|
43
40
|
await self._generate_embeddings()
|
44
|
-
|
45
|
-
#
|
41
|
+
|
42
|
+
# 3. Validate system
|
46
43
|
await self._validate_system()
|
47
|
-
|
48
|
-
|
49
|
-
|
44
|
+
|
45
|
+
print("✅ System initialization completed successfully!")
|
46
|
+
|
50
47
|
except Exception as e:
|
51
48
|
logger.error(f"❌ System initialization failed: {e}")
|
52
49
|
raise
|
53
50
|
|
54
|
-
async def _run_migrations(self):
|
55
|
-
"""Run database migrations"""
|
56
|
-
logger.info("📋 Running database migrations...")
|
57
|
-
|
58
|
-
try:
|
59
|
-
success = run_environment_migrations()
|
60
|
-
if success:
|
61
|
-
logger.info("✅ Database migrations completed")
|
62
|
-
else:
|
63
|
-
raise Exception("Database migrations failed")
|
64
|
-
except Exception as e:
|
65
|
-
logger.error(f"❌ Migration error: {e}")
|
66
|
-
raise
|
67
|
-
|
68
51
|
async def _populate_models(self):
|
69
52
|
"""Populate model registry with all configured models"""
|
70
|
-
|
53
|
+
print("📚 Populating model registry...")
|
71
54
|
|
72
55
|
try:
|
73
56
|
registry = ModelRegistry()
|
57
|
+
self._model_registry = registry # Track for cleanup
|
58
|
+
|
59
|
+
# Check if models are already populated to avoid unnecessary database operations
|
60
|
+
try:
|
61
|
+
stats = registry.get_stats()
|
62
|
+
if stats and stats.get('total_models', 0) > 0:
|
63
|
+
print(f"✅ Model registry already populated: {stats['total_models']} models")
|
64
|
+
return
|
65
|
+
except Exception as e:
|
66
|
+
print(f"⚠️ Could not check existing models, proceeding with population: {e}")
|
74
67
|
|
75
68
|
# Get all configured models
|
76
69
|
all_models = self.config_manager.model_definitions
|
77
70
|
|
78
71
|
if not all_models:
|
79
|
-
|
72
|
+
print("⚠️ No models configured in providers")
|
80
73
|
return
|
81
74
|
|
82
75
|
registered_count = 0
|
83
76
|
|
84
77
|
for model_id, model_data in all_models.items():
|
85
78
|
try:
|
86
|
-
#
|
87
|
-
|
88
|
-
if existing:
|
89
|
-
logger.debug(f"Model {model_id} already registered, skipping")
|
90
|
-
continue
|
79
|
+
# Skip individual model check to avoid multiple database queries
|
80
|
+
# We already checked if any models exist above
|
91
81
|
|
92
82
|
# Map model type
|
93
83
|
model_type_str = model_data.get('type', 'llm')
|
@@ -110,7 +100,6 @@ class StartupInitializer:
|
|
110
100
|
|
111
101
|
if success:
|
112
102
|
registered_count += 1
|
113
|
-
logger.debug(f"Registered {model_id} ({provider})")
|
114
103
|
else:
|
115
104
|
logger.warning(f"Failed to register {model_id}")
|
116
105
|
|
@@ -118,7 +107,7 @@ class StartupInitializer:
|
|
118
107
|
logger.error(f"Error registering {model_id}: {e}")
|
119
108
|
continue
|
120
109
|
|
121
|
-
|
110
|
+
print(f"✅ Model registry populated: {registered_count}/{len(all_models)} models")
|
122
111
|
|
123
112
|
except Exception as e:
|
124
113
|
logger.error(f"❌ Model population error: {e}")
|
@@ -126,16 +115,17 @@ class StartupInitializer:
|
|
126
115
|
|
127
116
|
async def _generate_embeddings(self):
|
128
117
|
"""Generate embeddings for all registered models using OpenAI embedding service"""
|
129
|
-
|
118
|
+
print("🧠 Generating model embeddings...")
|
130
119
|
|
131
120
|
try:
|
132
121
|
# Initialize embedding service
|
133
122
|
from ...inference.ai_factory import AIFactory
|
134
123
|
factory = AIFactory.get_instance()
|
135
124
|
embedding_service = factory.get_embed("text-embedding-3-small", "openai")
|
125
|
+
self._embedding_service = embedding_service # Track for cleanup
|
136
126
|
|
137
127
|
if not embedding_service:
|
138
|
-
|
128
|
+
print("⚠️ Could not initialize embedding service, skipping embedding generation")
|
139
129
|
return
|
140
130
|
|
141
131
|
# Get all registered models
|
@@ -143,7 +133,7 @@ class StartupInitializer:
|
|
143
133
|
models = registry.list_models()
|
144
134
|
|
145
135
|
if not models:
|
146
|
-
|
136
|
+
print("⚠️ No models found in registry")
|
147
137
|
return
|
148
138
|
|
149
139
|
# Check existing embeddings using Supabase client
|
@@ -151,15 +141,12 @@ class StartupInitializer:
|
|
151
141
|
existing_result = supabase_client.table("model_embeddings").select("model_id").execute()
|
152
142
|
existing_embeddings = {row['model_id'] for row in existing_result.data}
|
153
143
|
|
154
|
-
logger.info(f"Found {len(existing_embeddings)} existing embeddings")
|
155
|
-
|
156
144
|
processed = 0
|
157
145
|
|
158
146
|
for model_id, model_data in models.items():
|
159
147
|
try:
|
160
148
|
# Skip if embedding already exists
|
161
149
|
if model_id in existing_embeddings:
|
162
|
-
logger.debug(f"Embedding already exists for {model_id}, skipping")
|
163
150
|
continue
|
164
151
|
|
165
152
|
provider = model_data.get('provider', 'unknown')
|
@@ -178,7 +165,6 @@ class StartupInitializer:
|
|
178
165
|
search_text += f"Specialized for: {', '.join(specialized_tasks)}"
|
179
166
|
|
180
167
|
# Generate embedding using OpenAI service
|
181
|
-
logger.debug(f"Generating embedding for {model_id}...")
|
182
168
|
embedding = await embedding_service.create_text_embedding(search_text)
|
183
169
|
|
184
170
|
# Store embedding in database
|
@@ -193,7 +179,6 @@ class StartupInitializer:
|
|
193
179
|
|
194
180
|
if result.data:
|
195
181
|
processed += 1
|
196
|
-
logger.debug(f"Stored embedding for {model_id}")
|
197
182
|
else:
|
198
183
|
logger.warning(f"Failed to store embedding for {model_id}")
|
199
184
|
|
@@ -201,7 +186,7 @@ class StartupInitializer:
|
|
201
186
|
logger.error(f"Error creating embedding for {model_id}: {e}")
|
202
187
|
continue
|
203
188
|
|
204
|
-
|
189
|
+
print(f"✅ Generated {processed}/{len(models)} new embeddings")
|
205
190
|
|
206
191
|
# Close embedding service
|
207
192
|
await embedding_service.close()
|
@@ -212,16 +197,16 @@ class StartupInitializer:
|
|
212
197
|
|
213
198
|
async def _validate_system(self):
|
214
199
|
"""Validate system is working correctly"""
|
215
|
-
|
200
|
+
print("🔍 Validating system...")
|
216
201
|
|
217
202
|
try:
|
218
203
|
registry = ModelRegistry()
|
219
204
|
stats = registry.get_stats()
|
220
205
|
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
206
|
+
print(f"📊 System validation results:")
|
207
|
+
print(f" Models: {stats['total_models']}")
|
208
|
+
print(f" By type: {stats['models_by_type']}")
|
209
|
+
print(f" By capability: {stats['models_by_capability']}")
|
225
210
|
|
226
211
|
if stats['total_models'] == 0:
|
227
212
|
raise Exception("No models found in registry")
|
@@ -233,12 +218,12 @@ class StartupInitializer:
|
|
233
218
|
|
234
219
|
# Test basic functionality
|
235
220
|
available_models = await selector.get_available_models()
|
236
|
-
|
221
|
+
print(f" Available models for selection: {len(available_models)}")
|
237
222
|
|
238
223
|
except Exception as e:
|
239
224
|
logger.warning(f"⚠️ Intelligent selector initialization failed: {e}")
|
240
225
|
|
241
|
-
|
226
|
+
print("✅ System validation completed")
|
242
227
|
|
243
228
|
except Exception as e:
|
244
229
|
logger.error(f"❌ System validation error: {e}")
|
@@ -294,6 +279,35 @@ class StartupInitializer:
|
|
294
279
|
|
295
280
|
return result
|
296
281
|
|
282
|
+
async def cleanup(self):
|
283
|
+
"""Clean up startup resources"""
|
284
|
+
logger.info("🧹 Starting startup initializer cleanup...")
|
285
|
+
|
286
|
+
try:
|
287
|
+
# Clean up any persistent connections or resources
|
288
|
+
# Most cleanup is handled by individual services, but we can do some general cleanup here
|
289
|
+
|
290
|
+
# If we have any cached embedding services, clean them up
|
291
|
+
if hasattr(self, '_embedding_service') and self._embedding_service:
|
292
|
+
try:
|
293
|
+
await self._embedding_service.close()
|
294
|
+
logger.info("✅ Embedding service closed")
|
295
|
+
except Exception as e:
|
296
|
+
logger.error(f"❌ Error closing embedding service: {e}")
|
297
|
+
|
298
|
+
# Clean up model registry connections if needed
|
299
|
+
if hasattr(self, '_model_registry'):
|
300
|
+
try:
|
301
|
+
# ModelRegistry doesn't need explicit cleanup currently
|
302
|
+
# but this is where we'd add it if needed
|
303
|
+
pass
|
304
|
+
except Exception as e:
|
305
|
+
logger.error(f"❌ Error cleaning up model registry: {e}")
|
306
|
+
|
307
|
+
logger.info("✅ Startup initializer cleanup completed")
|
308
|
+
|
309
|
+
except Exception as e:
|
310
|
+
logger.error(f"❌ Error during startup cleanup: {e}")
|
297
311
|
|
298
312
|
|
299
313
|
# Global initializer instance
|
@@ -0,0 +1,311 @@
|
|
1
|
+
"""
|
2
|
+
GPU detection and resource management utilities
|
3
|
+
|
4
|
+
Provides functions for detecting and managing local GPU resources.
|
5
|
+
"""
|
6
|
+
|
7
|
+
import os
|
8
|
+
import logging
|
9
|
+
import subprocess
|
10
|
+
from typing import Dict, List, Optional, Any, Tuple
|
11
|
+
from dataclasses import dataclass
|
12
|
+
import platform
|
13
|
+
|
14
|
+
logger = logging.getLogger(__name__)
|
15
|
+
|
16
|
+
|
17
|
+
@dataclass
|
18
|
+
class GPUInfo:
|
19
|
+
"""GPU information structure"""
|
20
|
+
gpu_id: int
|
21
|
+
name: str
|
22
|
+
memory_total: int # MB
|
23
|
+
memory_free: int # MB
|
24
|
+
memory_used: int # MB
|
25
|
+
utilization: float # %
|
26
|
+
temperature: Optional[int] = None # Celsius
|
27
|
+
power_draw: Optional[float] = None # Watts
|
28
|
+
driver_version: Optional[str] = None
|
29
|
+
cuda_version: Optional[str] = None
|
30
|
+
|
31
|
+
|
32
|
+
class GPUManager:
|
33
|
+
"""Local GPU resource manager"""
|
34
|
+
|
35
|
+
def __init__(self):
|
36
|
+
self.gpus: List[GPUInfo] = []
|
37
|
+
self.cuda_available = False
|
38
|
+
self.nvidia_smi_available = False
|
39
|
+
self._initialize()
|
40
|
+
|
41
|
+
def _initialize(self):
|
42
|
+
"""Initialize GPU detection"""
|
43
|
+
self.cuda_available = self._check_cuda_availability()
|
44
|
+
self.nvidia_smi_available = self._check_nvidia_smi()
|
45
|
+
|
46
|
+
if self.nvidia_smi_available:
|
47
|
+
self.gpus = self._detect_nvidia_gpus()
|
48
|
+
elif self.cuda_available:
|
49
|
+
self.gpus = self._detect_cuda_gpus_fallback()
|
50
|
+
else:
|
51
|
+
logger.warning("No CUDA-capable GPUs detected")
|
52
|
+
|
53
|
+
def _check_cuda_availability(self) -> bool:
|
54
|
+
"""Check if CUDA is available through PyTorch"""
|
55
|
+
try:
|
56
|
+
import torch
|
57
|
+
available = torch.cuda.is_available()
|
58
|
+
if available:
|
59
|
+
logger.info(f"CUDA detected: {torch.cuda.device_count()} devices")
|
60
|
+
logger.info(f"CUDA version: {torch.version.cuda}")
|
61
|
+
return available
|
62
|
+
except ImportError:
|
63
|
+
logger.warning("PyTorch not available for CUDA detection")
|
64
|
+
return False
|
65
|
+
except Exception as e:
|
66
|
+
logger.warning(f"CUDA detection failed: {e}")
|
67
|
+
return False
|
68
|
+
|
69
|
+
def _check_nvidia_smi(self) -> bool:
|
70
|
+
"""Check if nvidia-smi is available"""
|
71
|
+
try:
|
72
|
+
result = subprocess.run(['nvidia-smi', '--version'],
|
73
|
+
capture_output=True, text=True, timeout=5)
|
74
|
+
return result.returncode == 0
|
75
|
+
except (subprocess.TimeoutExpired, FileNotFoundError, subprocess.SubprocessError):
|
76
|
+
return False
|
77
|
+
|
78
|
+
def _detect_nvidia_gpus(self) -> List[GPUInfo]:
|
79
|
+
"""Detect GPUs using nvidia-smi"""
|
80
|
+
gpus = []
|
81
|
+
|
82
|
+
try:
|
83
|
+
# Get GPU information using nvidia-smi
|
84
|
+
cmd = [
|
85
|
+
'nvidia-smi',
|
86
|
+
'--query-gpu=index,name,memory.total,memory.free,memory.used,utilization.gpu,temperature.gpu,power.draw,driver_version',
|
87
|
+
'--format=csv,noheader,nounits'
|
88
|
+
]
|
89
|
+
|
90
|
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
|
91
|
+
|
92
|
+
if result.returncode == 0:
|
93
|
+
lines = result.stdout.strip().split('\n')
|
94
|
+
for line in lines:
|
95
|
+
if line.strip():
|
96
|
+
parts = [p.strip() for p in line.split(',')]
|
97
|
+
if len(parts) >= 7:
|
98
|
+
gpu_info = GPUInfo(
|
99
|
+
gpu_id=int(parts[0]),
|
100
|
+
name=parts[1],
|
101
|
+
memory_total=int(parts[2]),
|
102
|
+
memory_free=int(parts[3]),
|
103
|
+
memory_used=int(parts[4]),
|
104
|
+
utilization=float(parts[5]),
|
105
|
+
temperature=int(parts[6]) if parts[6] != '[Not Supported]' else None,
|
106
|
+
power_draw=float(parts[7]) if len(parts) > 7 and parts[7] != '[Not Supported]' else None,
|
107
|
+
driver_version=parts[8] if len(parts) > 8 else None
|
108
|
+
)
|
109
|
+
gpus.append(gpu_info)
|
110
|
+
|
111
|
+
# Get CUDA version
|
112
|
+
try:
|
113
|
+
cuda_result = subprocess.run(['nvcc', '--version'],
|
114
|
+
capture_output=True, text=True, timeout=5)
|
115
|
+
if cuda_result.returncode == 0:
|
116
|
+
for line in cuda_result.stdout.split('\n'):
|
117
|
+
if 'release' in line.lower():
|
118
|
+
cuda_version = line.split()[-1].rstrip(',')
|
119
|
+
for gpu in gpus:
|
120
|
+
gpu.cuda_version = cuda_version
|
121
|
+
break
|
122
|
+
except (subprocess.TimeoutExpired, FileNotFoundError):
|
123
|
+
pass
|
124
|
+
|
125
|
+
except Exception as e:
|
126
|
+
logger.error(f"Failed to detect GPUs with nvidia-smi: {e}")
|
127
|
+
|
128
|
+
return gpus
|
129
|
+
|
130
|
+
def _detect_cuda_gpus_fallback(self) -> List[GPUInfo]:
|
131
|
+
"""Fallback GPU detection using PyTorch"""
|
132
|
+
gpus = []
|
133
|
+
|
134
|
+
try:
|
135
|
+
import torch
|
136
|
+
if torch.cuda.is_available():
|
137
|
+
for i in range(torch.cuda.device_count()):
|
138
|
+
props = torch.cuda.get_device_properties(i)
|
139
|
+
|
140
|
+
# Get memory info
|
141
|
+
torch.cuda.set_device(i)
|
142
|
+
memory_total = torch.cuda.get_device_properties(i).total_memory // (1024**2) # MB
|
143
|
+
memory_free = (torch.cuda.get_device_properties(i).total_memory - torch.cuda.memory_allocated(i)) // (1024**2)
|
144
|
+
memory_used = torch.cuda.memory_allocated(i) // (1024**2)
|
145
|
+
|
146
|
+
gpu_info = GPUInfo(
|
147
|
+
gpu_id=i,
|
148
|
+
name=props.name,
|
149
|
+
memory_total=memory_total,
|
150
|
+
memory_free=memory_free,
|
151
|
+
memory_used=memory_used,
|
152
|
+
utilization=0.0, # Cannot get utilization without nvidia-smi
|
153
|
+
cuda_version=torch.version.cuda
|
154
|
+
)
|
155
|
+
gpus.append(gpu_info)
|
156
|
+
|
157
|
+
except Exception as e:
|
158
|
+
logger.error(f"Failed to detect GPUs with PyTorch: {e}")
|
159
|
+
|
160
|
+
return gpus
|
161
|
+
|
162
|
+
def get_gpu_info(self, gpu_id: Optional[int] = None) -> Optional[GPUInfo]:
|
163
|
+
"""Get information for a specific GPU or best available GPU"""
|
164
|
+
if not self.gpus:
|
165
|
+
return None
|
166
|
+
|
167
|
+
if gpu_id is not None:
|
168
|
+
for gpu in self.gpus:
|
169
|
+
if gpu.gpu_id == gpu_id:
|
170
|
+
return gpu
|
171
|
+
return None
|
172
|
+
|
173
|
+
# Return GPU with most free memory
|
174
|
+
return max(self.gpus, key=lambda x: x.memory_free)
|
175
|
+
|
176
|
+
def get_best_gpu(self, min_memory_mb: int = 1024) -> Optional[GPUInfo]:
|
177
|
+
"""Get the best available GPU for model deployment"""
|
178
|
+
available_gpus = [gpu for gpu in self.gpus if gpu.memory_free >= min_memory_mb]
|
179
|
+
|
180
|
+
if not available_gpus:
|
181
|
+
return None
|
182
|
+
|
183
|
+
# Sort by free memory (descending) and utilization (ascending)
|
184
|
+
return sorted(available_gpus,
|
185
|
+
key=lambda x: (-x.memory_free, x.utilization))[0]
|
186
|
+
|
187
|
+
def estimate_model_memory(self, model_id: str, precision: str = "float16") -> int:
|
188
|
+
"""Estimate memory requirements for a model in MB"""
|
189
|
+
# Simple estimation based on model name and precision
|
190
|
+
memory_multipliers = {
|
191
|
+
"float32": 4,
|
192
|
+
"float16": 2,
|
193
|
+
"int8": 1,
|
194
|
+
"int4": 0.5
|
195
|
+
}
|
196
|
+
|
197
|
+
multiplier = memory_multipliers.get(precision, 2)
|
198
|
+
|
199
|
+
# Rough parameter estimates based on model names
|
200
|
+
if "7b" in model_id.lower():
|
201
|
+
params = 7_000_000_000
|
202
|
+
elif "13b" in model_id.lower():
|
203
|
+
params = 13_000_000_000
|
204
|
+
elif "70b" in model_id.lower():
|
205
|
+
params = 70_000_000_000
|
206
|
+
elif "large" in model_id.lower():
|
207
|
+
params = 1_000_000_000
|
208
|
+
elif "medium" in model_id.lower():
|
209
|
+
params = 350_000_000
|
210
|
+
elif "small" in model_id.lower():
|
211
|
+
params = 125_000_000
|
212
|
+
else:
|
213
|
+
params = 500_000_000 # Default estimate
|
214
|
+
|
215
|
+
# Memory = parameters * bytes_per_param + overhead
|
216
|
+
estimated_mb = int((params * multiplier + 1024**3) / (1024**2)) # +1GB overhead
|
217
|
+
|
218
|
+
return estimated_mb
|
219
|
+
|
220
|
+
def check_gpu_compatibility(self, model_id: str, precision: str = "float16") -> Tuple[bool, List[str]]:
|
221
|
+
"""Check if local GPUs can handle the model"""
|
222
|
+
warnings = []
|
223
|
+
|
224
|
+
if not self.gpus:
|
225
|
+
return False, ["No CUDA-capable GPUs detected"]
|
226
|
+
|
227
|
+
estimated_memory = self.estimate_model_memory(model_id, precision)
|
228
|
+
best_gpu = self.get_best_gpu(estimated_memory)
|
229
|
+
|
230
|
+
if not best_gpu:
|
231
|
+
warnings.append(f"Insufficient GPU memory. Required: {estimated_memory}MB, Available: {max(gpu.memory_free for gpu in self.gpus)}MB")
|
232
|
+
return False, warnings
|
233
|
+
|
234
|
+
# Check compute capability for advanced features
|
235
|
+
if precision in ["int8", "int4"]:
|
236
|
+
warnings.append("Quantized precision may require specific GPU compute capability")
|
237
|
+
|
238
|
+
return True, warnings
|
239
|
+
|
240
|
+
def refresh(self):
|
241
|
+
"""Refresh GPU information"""
|
242
|
+
if self.nvidia_smi_available:
|
243
|
+
self.gpus = self._detect_nvidia_gpus()
|
244
|
+
elif self.cuda_available:
|
245
|
+
self.gpus = self._detect_cuda_gpus_fallback()
|
246
|
+
|
247
|
+
def get_system_info(self) -> Dict[str, Any]:
|
248
|
+
"""Get comprehensive system information"""
|
249
|
+
info = {
|
250
|
+
"platform": platform.system(),
|
251
|
+
"architecture": platform.machine(),
|
252
|
+
"cuda_available": self.cuda_available,
|
253
|
+
"nvidia_smi_available": self.nvidia_smi_available,
|
254
|
+
"gpu_count": len(self.gpus),
|
255
|
+
"gpus": [
|
256
|
+
{
|
257
|
+
"id": gpu.gpu_id,
|
258
|
+
"name": gpu.name,
|
259
|
+
"memory_total_mb": gpu.memory_total,
|
260
|
+
"memory_free_mb": gpu.memory_free,
|
261
|
+
"memory_used_mb": gpu.memory_used,
|
262
|
+
"utilization_percent": gpu.utilization,
|
263
|
+
"temperature_c": gpu.temperature,
|
264
|
+
"power_draw_w": gpu.power_draw,
|
265
|
+
"driver_version": gpu.driver_version,
|
266
|
+
"cuda_version": gpu.cuda_version
|
267
|
+
}
|
268
|
+
for gpu in self.gpus
|
269
|
+
]
|
270
|
+
}
|
271
|
+
|
272
|
+
# Add Python environment info
|
273
|
+
try:
|
274
|
+
import torch
|
275
|
+
info["torch_version"] = torch.__version__
|
276
|
+
info["torch_cuda_version"] = torch.version.cuda
|
277
|
+
except ImportError:
|
278
|
+
pass
|
279
|
+
|
280
|
+
return info
|
281
|
+
|
282
|
+
|
283
|
+
# Global GPU manager instance
|
284
|
+
_gpu_manager = None
|
285
|
+
|
286
|
+
def get_gpu_manager() -> GPUManager:
|
287
|
+
"""Get global GPU manager instance"""
|
288
|
+
global _gpu_manager
|
289
|
+
if _gpu_manager is None:
|
290
|
+
_gpu_manager = GPUManager()
|
291
|
+
return _gpu_manager
|
292
|
+
|
293
|
+
|
294
|
+
def detect_gpus() -> List[GPUInfo]:
|
295
|
+
"""Convenience function to detect GPUs"""
|
296
|
+
return get_gpu_manager().gpus
|
297
|
+
|
298
|
+
|
299
|
+
def get_best_gpu(min_memory_mb: int = 1024) -> Optional[GPUInfo]:
|
300
|
+
"""Convenience function to get best available GPU"""
|
301
|
+
return get_gpu_manager().get_best_gpu(min_memory_mb)
|
302
|
+
|
303
|
+
|
304
|
+
def check_cuda_availability() -> bool:
|
305
|
+
"""Check if CUDA is available"""
|
306
|
+
return get_gpu_manager().cuda_available
|
307
|
+
|
308
|
+
|
309
|
+
def estimate_model_memory(model_id: str, precision: str = "float16") -> int:
|
310
|
+
"""Estimate model memory requirements"""
|
311
|
+
return get_gpu_manager().estimate_model_memory(model_id, precision)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: isa_model
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.3
|
4
4
|
Summary: Unified AI model serving framework
|
5
5
|
Author: isA_Model Contributors
|
6
6
|
Classifier: Development Status :: 3 - Alpha
|
@@ -10,45 +10,87 @@ Classifier: Programming Language :: Python :: 3
|
|
10
10
|
Requires-Python: >=3.8
|
11
11
|
Description-Content-Type: text/markdown
|
12
12
|
Requires-Dist: fastapi>=0.95.0
|
13
|
-
Requires-Dist: numpy>=1.20.0
|
14
|
-
Requires-Dist: httpx>=0.23.0
|
15
|
-
Requires-Dist: pydantic>=2.0.0
|
16
13
|
Requires-Dist: uvicorn>=0.22.0
|
14
|
+
Requires-Dist: pydantic>=2.0.0
|
15
|
+
Requires-Dist: httpx>=0.23.0
|
17
16
|
Requires-Dist: requests>=2.28.0
|
18
17
|
Requires-Dist: aiohttp>=3.8.0
|
19
|
-
Requires-Dist: transformers>=4.30.0
|
20
|
-
Requires-Dist: langchain-core>=0.1.0
|
21
|
-
Requires-Dist: huggingface-hub>=0.16.0
|
22
|
-
Requires-Dist: kubernetes>=25.3.0
|
23
|
-
Requires-Dist: mlflow>=2.4.0
|
24
|
-
Requires-Dist: torch>=2.0.0
|
25
|
-
Requires-Dist: openai>=1.10.0
|
26
|
-
Requires-Dist: replicate>=0.23.0
|
27
18
|
Requires-Dist: python-dotenv>=1.0.0
|
28
|
-
Requires-Dist:
|
29
|
-
Requires-Dist: runpod>=1.0.0
|
30
|
-
Requires-Dist: boto3>=1.26.0
|
31
|
-
Requires-Dist: google-cloud-storage>=2.7.0
|
32
|
-
Requires-Dist: datasets>=2.10.0
|
33
|
-
Requires-Dist: accelerate>=0.20.0
|
34
|
-
Requires-Dist: bitsandbytes>=0.39.0
|
35
|
-
Requires-Dist: peft>=0.4.0
|
36
|
-
Requires-Dist: trl>=0.4.0
|
19
|
+
Requires-Dist: numpy>=1.20.0
|
37
20
|
Requires-Dist: supabase>=2.0.0
|
38
|
-
Requires-Dist: pgvector>=0.2.0
|
39
21
|
Requires-Dist: psycopg2-binary>=2.9.0
|
40
22
|
Requires-Dist: asyncpg>=0.28.0
|
41
23
|
Requires-Dist: slowapi>=0.1.8
|
42
|
-
Requires-Dist: redis>=4.5.0
|
43
24
|
Requires-Dist: circuitbreaker>=1.3.2
|
44
|
-
Requires-Dist: prometheus-fastapi-instrumentator>=6.1.0
|
45
25
|
Requires-Dist: structlog>=23.1.0
|
26
|
+
Requires-Dist: psutil>=5.9.0
|
27
|
+
Requires-Dist: redis>=4.5.0
|
28
|
+
Requires-Dist: tenacity>=8.2.0
|
29
|
+
Provides-Extra: cloud
|
30
|
+
Requires-Dist: openai>=1.10.0; extra == "cloud"
|
31
|
+
Requires-Dist: replicate>=0.23.0; extra == "cloud"
|
32
|
+
Requires-Dist: cerebras-cloud-sdk>=1.0.0; extra == "cloud"
|
33
|
+
Requires-Dist: modal>=0.63.0; extra == "cloud"
|
34
|
+
Requires-Dist: grpclib>=0.4.7; extra == "cloud"
|
35
|
+
Requires-Dist: python-logging-loki>=0.3.1; extra == "cloud"
|
36
|
+
Requires-Dist: huggingface-hub>=0.16.0; extra == "cloud"
|
37
|
+
Requires-Dist: docker>=6.0.0; extra == "cloud"
|
38
|
+
Requires-Dist: influxdb-client>=1.36.0; extra == "cloud"
|
39
|
+
Requires-Dist: tiktoken>=0.5.0; extra == "cloud"
|
40
|
+
Provides-Extra: local
|
41
|
+
Requires-Dist: torch>=2.0.0; extra == "local"
|
42
|
+
Requires-Dist: transformers>=4.30.0; extra == "local"
|
43
|
+
Requires-Dist: accelerate>=0.20.0; extra == "local"
|
44
|
+
Requires-Dist: huggingface-hub>=0.16.0; extra == "local"
|
45
|
+
Requires-Dist: safetensors>=0.4.1; extra == "local"
|
46
|
+
Requires-Dist: sentencepiece>=0.1.99; extra == "local"
|
47
|
+
Provides-Extra: training
|
48
|
+
Requires-Dist: datasets>=2.10.0; extra == "training"
|
49
|
+
Requires-Dist: peft>=0.4.0; extra == "training"
|
50
|
+
Requires-Dist: trl>=0.4.0; extra == "training"
|
51
|
+
Requires-Dist: bitsandbytes>=0.39.0; extra == "training"
|
52
|
+
Provides-Extra: audio
|
53
|
+
Requires-Dist: librosa>=0.10.1; extra == "audio"
|
54
|
+
Requires-Dist: soundfile>=0.12.1; extra == "audio"
|
55
|
+
Requires-Dist: numba>=0.57.0; extra == "audio"
|
56
|
+
Provides-Extra: vision
|
57
|
+
Requires-Dist: Pillow>=10.0.1; extra == "vision"
|
58
|
+
Requires-Dist: torchvision>=0.15.2; extra == "vision"
|
59
|
+
Provides-Extra: langchain
|
60
|
+
Requires-Dist: langchain-core>=0.1.0; extra == "langchain"
|
61
|
+
Requires-Dist: langchain-openai>=0.0.2; extra == "langchain"
|
62
|
+
Provides-Extra: storage
|
63
|
+
Requires-Dist: boto3>=1.26.0; extra == "storage"
|
64
|
+
Requires-Dist: google-cloud-storage>=2.7.0; extra == "storage"
|
65
|
+
Provides-Extra: monitoring
|
66
|
+
Requires-Dist: mlflow>=2.4.0; extra == "monitoring"
|
67
|
+
Requires-Dist: redis>=4.5.0; extra == "monitoring"
|
68
|
+
Requires-Dist: prometheus-fastapi-instrumentator>=6.1.0; extra == "monitoring"
|
69
|
+
Requires-Dist: influxdb-client>=1.36.0; extra == "monitoring"
|
70
|
+
Requires-Dist: pgvector>=0.2.0; extra == "monitoring"
|
71
|
+
Requires-Dist: python-logging-loki>=0.3.1; extra == "monitoring"
|
72
|
+
Provides-Extra: k8s
|
73
|
+
Requires-Dist: kubernetes>=25.3.0; extra == "k8s"
|
74
|
+
Provides-Extra: gpu-cloud
|
75
|
+
Requires-Dist: runpod>=1.0.0; extra == "gpu-cloud"
|
76
|
+
Requires-Dist: ollama>=0.3.0; extra == "gpu-cloud"
|
46
77
|
Provides-Extra: dev
|
47
78
|
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
48
79
|
Requires-Dist: black>=22.0.0; extra == "dev"
|
49
80
|
Requires-Dist: flake8>=4.0.0; extra == "dev"
|
50
81
|
Requires-Dist: mypy>=0.991; extra == "dev"
|
51
82
|
Requires-Dist: twine>=4.0.0; extra == "dev"
|
83
|
+
Provides-Extra: api-only
|
84
|
+
Requires-Dist: isa-model[cloud,langchain]; extra == "api-only"
|
85
|
+
Provides-Extra: full-local
|
86
|
+
Requires-Dist: isa-model[audio,langchain,local,training,vision]; extra == "full-local"
|
87
|
+
Provides-Extra: production
|
88
|
+
Requires-Dist: isa-model[cloud,k8s,monitoring,storage]; extra == "production"
|
89
|
+
Provides-Extra: staging
|
90
|
+
Requires-Dist: isa-model[cloud,langchain,monitoring,storage]; extra == "staging"
|
91
|
+
Requires-Dist: python-consul>=1.1.0; extra == "staging"
|
92
|
+
Provides-Extra: all
|
93
|
+
Requires-Dist: isa-model[audio,cloud,gpu-cloud,k8s,langchain,local,monitoring,storage,training,vision]; extra == "all"
|
52
94
|
|
53
95
|
# isa_model_sdk - Unified AI Model Serving Framework
|
54
96
|
|