isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +1166 -584
- isa_model/core/cache/redis_cache.py +410 -0
- isa_model/core/config/config_manager.py +282 -12
- isa_model/core/config.py +91 -1
- isa_model/core/database/__init__.py +1 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +297 -0
- isa_model/core/database/supabase_client.py +258 -0
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +46 -0
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_billing_tracker.py +60 -88
- isa_model/core/models/model_manager.py +66 -25
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +217 -55
- isa_model/core/models/model_statistics_tracker.py +234 -0
- isa_model/core/models/model_storage.py +0 -1
- isa_model/core/models/model_version_manager.py +959 -0
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/pricing_manager.py +2 -249
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/resilience/circuit_breaker.py +366 -0
- isa_model/core/security/secrets.py +358 -0
- isa_model/core/services/__init__.py +2 -4
- isa_model/core/services/intelligent_model_selector.py +479 -370
- isa_model/core/storage/hf_storage.py +2 -2
- isa_model/core/types.py +8 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -368
- isa_model/deployment/local/__init__.py +31 -0
- isa_model/deployment/local/config.py +248 -0
- isa_model/deployment/local/gpu_gateway.py +607 -0
- isa_model/deployment/local/health_checker.py +428 -0
- isa_model/deployment/local/provider.py +586 -0
- isa_model/deployment/local/tensorrt_service.py +621 -0
- isa_model/deployment/local/transformers_service.py +644 -0
- isa_model/deployment/local/vllm_service.py +527 -0
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/modal/deployer.py +894 -0
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
- isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
- isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
- isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
- isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +179 -16
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/__init__.py +21 -0
- isa_model/inference/services/audio/base_realtime_service.py +225 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/isa_tts_service.py +0 -0
- isa_model/inference/services/audio/openai_realtime_service.py +320 -124
- isa_model/inference/services/audio/openai_stt_service.py +53 -11
- isa_model/inference/services/base_service.py +17 -1
- isa_model/inference/services/custom_model_manager.py +277 -0
- isa_model/inference/services/embedding/__init__.py +13 -0
- isa_model/inference/services/embedding/base_embed_service.py +111 -8
- isa_model/inference/services/embedding/isa_embed_service.py +305 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/openai_embed_service.py +2 -4
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
- isa_model/inference/services/img/__init__.py +2 -2
- isa_model/inference/services/img/base_image_gen_service.py +24 -7
- isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
- isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
- isa_model/inference/services/img/services/replicate_flux.py +226 -0
- isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
- isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
- isa_model/inference/services/img/tests/test_img_client.py +297 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +361 -26
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/local_llm_service.py +747 -0
- isa_model/inference/services/llm/ollama_llm_service.py +11 -3
- isa_model/inference/services/llm/openai_llm_service.py +670 -56
- isa_model/inference/services/llm/yyds_llm_service.py +10 -3
- isa_model/inference/services/vision/__init__.py +27 -6
- isa_model/inference/services/vision/base_vision_service.py +118 -185
- isa_model/inference/services/vision/blip_vision_service.py +359 -0
- isa_model/inference/services/vision/helpers/image_utils.py +19 -10
- isa_model/inference/services/vision/isa_vision_service.py +634 -0
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +240 -18
- isa_model/serving/api/middleware/auth.py +317 -0
- isa_model/serving/api/middleware/security.py +268 -0
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +489 -0
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +475 -0
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/logs.py +430 -0
- isa_model/serving/api/routes/settings.py +582 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +992 -171
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +318 -0
- isa_model/serving/modal_proxy_server.py +249 -0
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
- isa_model-0.4.3.dist-info/RECORD +193 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
- isa_model/deployment/cloud/modal/register_models.py +0 -321
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks.py +0 -469
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -18
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/factory.py +0 -531
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/metrics.py +0 -798
- isa_model/inference/adapter/unified_api.py +0 -248
- isa_model/inference/services/helpers/stacked_config.py +0 -148
- isa_model/inference/services/img/flux_professional_service.py +0 -603
- isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/others/table_transformer_service.py +0 -61
- isa_model/inference/services/vision/doc_analysis_service.py +0 -640
- isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/vision/ui_analysis_service.py +0 -823
- isa_model/scripts/inference_tracker.py +0 -283
- isa_model/scripts/mlflow_manager.py +0 -379
- isa_model/scripts/model_registry.py +0 -465
- isa_model/scripts/register_models.py +0 -370
- isa_model/scripts/register_models_with_embeddings.py +0 -510
- isa_model/scripts/start_mlflow.py +0 -95
- isa_model/scripts/training_tracker.py +0 -257
- isa_model/training/__init__.py +0 -74
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -23
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/factory.py +0 -424
- isa_model-0.3.91.dist-info/RECORD +0 -138
- /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,318 @@
|
|
1
|
+
"""
|
2
|
+
Server Startup Initialization for ISA Model
|
3
|
+
|
4
|
+
Handles automatic initialization of:
|
5
|
+
- Database migrations
|
6
|
+
- Model registry population
|
7
|
+
- Embedding generation
|
8
|
+
- System validation
|
9
|
+
"""
|
10
|
+
|
11
|
+
import logging
|
12
|
+
import asyncio
|
13
|
+
from typing import Dict, Any
|
14
|
+
import json
|
15
|
+
import os
|
16
|
+
|
17
|
+
from ...core.config.config_manager import ConfigManager
|
18
|
+
from ...core.models.model_repo import ModelRegistry
|
19
|
+
from ...core.types import ModelType, ModelCapability
|
20
|
+
|
21
|
+
logger = logging.getLogger(__name__)
|
22
|
+
|
23
|
+
class StartupInitializer:
|
24
|
+
"""Handles server startup initialization"""
|
25
|
+
|
26
|
+
def __init__(self):
|
27
|
+
self.config_manager = ConfigManager()
|
28
|
+
self._embedding_service = None
|
29
|
+
self._model_registry = None
|
30
|
+
|
31
|
+
async def initialize_system(self):
|
32
|
+
"""Run complete system initialization"""
|
33
|
+
print("🚀 Starting ISA Model system initialization...")
|
34
|
+
|
35
|
+
try:
|
36
|
+
# 1. Populate model registry
|
37
|
+
await self._populate_models()
|
38
|
+
|
39
|
+
# 2. Generate embeddings
|
40
|
+
await self._generate_embeddings()
|
41
|
+
|
42
|
+
# 3. Validate system
|
43
|
+
await self._validate_system()
|
44
|
+
|
45
|
+
print("✅ System initialization completed successfully!")
|
46
|
+
|
47
|
+
except Exception as e:
|
48
|
+
logger.error(f"❌ System initialization failed: {e}")
|
49
|
+
raise
|
50
|
+
|
51
|
+
async def _populate_models(self):
|
52
|
+
"""Populate model registry with all configured models"""
|
53
|
+
print("📚 Populating model registry...")
|
54
|
+
|
55
|
+
try:
|
56
|
+
registry = ModelRegistry()
|
57
|
+
self._model_registry = registry # Track for cleanup
|
58
|
+
|
59
|
+
# Check if models are already populated to avoid unnecessary database operations
|
60
|
+
try:
|
61
|
+
stats = registry.get_stats()
|
62
|
+
if stats and stats.get('total_models', 0) > 0:
|
63
|
+
print(f"✅ Model registry already populated: {stats['total_models']} models")
|
64
|
+
return
|
65
|
+
except Exception as e:
|
66
|
+
print(f"⚠️ Could not check existing models, proceeding with population: {e}")
|
67
|
+
|
68
|
+
# Get all configured models
|
69
|
+
all_models = self.config_manager.model_definitions
|
70
|
+
|
71
|
+
if not all_models:
|
72
|
+
print("⚠️ No models configured in providers")
|
73
|
+
return
|
74
|
+
|
75
|
+
registered_count = 0
|
76
|
+
|
77
|
+
for model_id, model_data in all_models.items():
|
78
|
+
try:
|
79
|
+
# Skip individual model check to avoid multiple database queries
|
80
|
+
# We already checked if any models exist above
|
81
|
+
|
82
|
+
# Map model type
|
83
|
+
model_type_str = model_data.get('type', 'llm')
|
84
|
+
model_type = self._map_model_type(model_type_str)
|
85
|
+
|
86
|
+
# Map capabilities
|
87
|
+
capabilities = self._map_capabilities(model_data.get('capabilities', []))
|
88
|
+
|
89
|
+
# Get provider
|
90
|
+
provider = model_data.get('provider', 'unknown')
|
91
|
+
|
92
|
+
# Register the model
|
93
|
+
success = registry.register_model(
|
94
|
+
model_id=model_id,
|
95
|
+
model_type=model_type,
|
96
|
+
capabilities=capabilities,
|
97
|
+
metadata=model_data,
|
98
|
+
provider=provider
|
99
|
+
)
|
100
|
+
|
101
|
+
if success:
|
102
|
+
registered_count += 1
|
103
|
+
else:
|
104
|
+
logger.warning(f"Failed to register {model_id}")
|
105
|
+
|
106
|
+
except Exception as e:
|
107
|
+
logger.error(f"Error registering {model_id}: {e}")
|
108
|
+
continue
|
109
|
+
|
110
|
+
print(f"✅ Model registry populated: {registered_count}/{len(all_models)} models")
|
111
|
+
|
112
|
+
except Exception as e:
|
113
|
+
logger.error(f"❌ Model population error: {e}")
|
114
|
+
raise
|
115
|
+
|
116
|
+
async def _generate_embeddings(self):
|
117
|
+
"""Generate embeddings for all registered models using OpenAI embedding service"""
|
118
|
+
print("🧠 Generating model embeddings...")
|
119
|
+
|
120
|
+
try:
|
121
|
+
# Initialize embedding service
|
122
|
+
from ...inference.ai_factory import AIFactory
|
123
|
+
factory = AIFactory.get_instance()
|
124
|
+
embedding_service = factory.get_embed("text-embedding-3-small", "openai")
|
125
|
+
self._embedding_service = embedding_service # Track for cleanup
|
126
|
+
|
127
|
+
if not embedding_service:
|
128
|
+
print("⚠️ Could not initialize embedding service, skipping embedding generation")
|
129
|
+
return
|
130
|
+
|
131
|
+
# Get all registered models
|
132
|
+
registry = ModelRegistry()
|
133
|
+
models = registry.list_models()
|
134
|
+
|
135
|
+
if not models:
|
136
|
+
print("⚠️ No models found in registry")
|
137
|
+
return
|
138
|
+
|
139
|
+
# Check existing embeddings using Supabase client
|
140
|
+
supabase_client = registry.supabase_client
|
141
|
+
existing_result = supabase_client.table("model_embeddings").select("model_id").execute()
|
142
|
+
existing_embeddings = {row['model_id'] for row in existing_result.data}
|
143
|
+
|
144
|
+
processed = 0
|
145
|
+
|
146
|
+
for model_id, model_data in models.items():
|
147
|
+
try:
|
148
|
+
# Skip if embedding already exists
|
149
|
+
if model_id in existing_embeddings:
|
150
|
+
continue
|
151
|
+
|
152
|
+
provider = model_data.get('provider', 'unknown')
|
153
|
+
model_type = model_data.get('type', 'llm')
|
154
|
+
metadata = model_data.get('metadata', {})
|
155
|
+
|
156
|
+
# Create searchable text from model information (same logic as intelligent_model_selector)
|
157
|
+
description = metadata.get('description', '')
|
158
|
+
specialized_tasks = metadata.get('specialized_tasks', [])
|
159
|
+
|
160
|
+
# Combine all text for embedding
|
161
|
+
search_text = f"{model_id} {provider} model. "
|
162
|
+
if description:
|
163
|
+
search_text += f"{description} "
|
164
|
+
if specialized_tasks:
|
165
|
+
search_text += f"Specialized for: {', '.join(specialized_tasks)}"
|
166
|
+
|
167
|
+
# Generate embedding using OpenAI service
|
168
|
+
embedding = await embedding_service.create_text_embedding(search_text)
|
169
|
+
|
170
|
+
# Store embedding in database
|
171
|
+
embedding_data = {
|
172
|
+
'model_id': model_id,
|
173
|
+
'provider': provider,
|
174
|
+
'description': search_text,
|
175
|
+
'embedding': embedding
|
176
|
+
}
|
177
|
+
|
178
|
+
result = supabase_client.table('model_embeddings').insert(embedding_data).execute()
|
179
|
+
|
180
|
+
if result.data:
|
181
|
+
processed += 1
|
182
|
+
else:
|
183
|
+
logger.warning(f"Failed to store embedding for {model_id}")
|
184
|
+
|
185
|
+
except Exception as e:
|
186
|
+
logger.error(f"Error creating embedding for {model_id}: {e}")
|
187
|
+
continue
|
188
|
+
|
189
|
+
print(f"✅ Generated {processed}/{len(models)} new embeddings")
|
190
|
+
|
191
|
+
# Close embedding service
|
192
|
+
await embedding_service.close()
|
193
|
+
|
194
|
+
except Exception as e:
|
195
|
+
logger.error(f"❌ Embedding generation error: {e}")
|
196
|
+
raise
|
197
|
+
|
198
|
+
async def _validate_system(self):
|
199
|
+
"""Validate system is working correctly"""
|
200
|
+
print("🔍 Validating system...")
|
201
|
+
|
202
|
+
try:
|
203
|
+
registry = ModelRegistry()
|
204
|
+
stats = registry.get_stats()
|
205
|
+
|
206
|
+
print(f"📊 System validation results:")
|
207
|
+
print(f" Models: {stats['total_models']}")
|
208
|
+
print(f" By type: {stats['models_by_type']}")
|
209
|
+
print(f" By capability: {stats['models_by_capability']}")
|
210
|
+
|
211
|
+
if stats['total_models'] == 0:
|
212
|
+
raise Exception("No models found in registry")
|
213
|
+
|
214
|
+
# Initialize and test intelligent selector
|
215
|
+
try:
|
216
|
+
from ...core.services.intelligent_model_selector import get_model_selector
|
217
|
+
selector = await get_model_selector()
|
218
|
+
|
219
|
+
# Test basic functionality
|
220
|
+
available_models = await selector.get_available_models()
|
221
|
+
print(f" Available models for selection: {len(available_models)}")
|
222
|
+
|
223
|
+
except Exception as e:
|
224
|
+
logger.warning(f"⚠️ Intelligent selector initialization failed: {e}")
|
225
|
+
|
226
|
+
print("✅ System validation completed")
|
227
|
+
|
228
|
+
except Exception as e:
|
229
|
+
logger.error(f"❌ System validation error: {e}")
|
230
|
+
raise
|
231
|
+
|
232
|
+
def _map_model_type(self, model_type_str: str) -> ModelType:
|
233
|
+
"""Map string model type to enum"""
|
234
|
+
mapping = {
|
235
|
+
'llm': ModelType.LLM,
|
236
|
+
'embedding': ModelType.EMBEDDING,
|
237
|
+
'rerank': ModelType.RERANK,
|
238
|
+
'image': ModelType.IMAGE,
|
239
|
+
'audio': ModelType.AUDIO,
|
240
|
+
'video': ModelType.VIDEO,
|
241
|
+
'vision': ModelType.VISION,
|
242
|
+
'omni': ModelType.LLM # Omni models are treated as LLM for now
|
243
|
+
}
|
244
|
+
return mapping.get(model_type_str.lower(), ModelType.LLM)
|
245
|
+
|
246
|
+
def _map_capabilities(self, capabilities_list: list) -> list:
|
247
|
+
"""Map capability strings to enums"""
|
248
|
+
mapping = {
|
249
|
+
'text_generation': ModelCapability.TEXT_GENERATION,
|
250
|
+
'chat': ModelCapability.CHAT,
|
251
|
+
'embedding': ModelCapability.EMBEDDING,
|
252
|
+
'reranking': ModelCapability.RERANKING,
|
253
|
+
'reasoning': ModelCapability.REASONING,
|
254
|
+
'image_generation': ModelCapability.IMAGE_GENERATION,
|
255
|
+
'image_analysis': ModelCapability.IMAGE_ANALYSIS,
|
256
|
+
'audio_transcription': ModelCapability.AUDIO_TRANSCRIPTION,
|
257
|
+
'audio_realtime': ModelCapability.AUDIO_REALTIME,
|
258
|
+
'speech_to_text': ModelCapability.SPEECH_TO_TEXT,
|
259
|
+
'text_to_speech': ModelCapability.TEXT_TO_SPEECH,
|
260
|
+
'conversation': ModelCapability.CONVERSATION,
|
261
|
+
'image_understanding': ModelCapability.IMAGE_UNDERSTANDING,
|
262
|
+
'ui_detection': ModelCapability.UI_DETECTION,
|
263
|
+
'ocr': ModelCapability.OCR,
|
264
|
+
'table_detection': ModelCapability.TABLE_DETECTION,
|
265
|
+
'table_structure_recognition': ModelCapability.TABLE_STRUCTURE_RECOGNITION
|
266
|
+
}
|
267
|
+
|
268
|
+
result = []
|
269
|
+
for cap in capabilities_list:
|
270
|
+
if cap in mapping:
|
271
|
+
result.append(mapping[cap])
|
272
|
+
else:
|
273
|
+
# Log unmapped capabilities for debugging
|
274
|
+
logger.warning(f"Unknown capability '{cap}' - skipping")
|
275
|
+
|
276
|
+
# Default to text generation if no capabilities
|
277
|
+
if not result:
|
278
|
+
result = [ModelCapability.TEXT_GENERATION]
|
279
|
+
|
280
|
+
return result
|
281
|
+
|
282
|
+
async def cleanup(self):
|
283
|
+
"""Clean up startup resources"""
|
284
|
+
logger.info("🧹 Starting startup initializer cleanup...")
|
285
|
+
|
286
|
+
try:
|
287
|
+
# Clean up any persistent connections or resources
|
288
|
+
# Most cleanup is handled by individual services, but we can do some general cleanup here
|
289
|
+
|
290
|
+
# If we have any cached embedding services, clean them up
|
291
|
+
if hasattr(self, '_embedding_service') and self._embedding_service:
|
292
|
+
try:
|
293
|
+
await self._embedding_service.close()
|
294
|
+
logger.info("✅ Embedding service closed")
|
295
|
+
except Exception as e:
|
296
|
+
logger.error(f"❌ Error closing embedding service: {e}")
|
297
|
+
|
298
|
+
# Clean up model registry connections if needed
|
299
|
+
if hasattr(self, '_model_registry'):
|
300
|
+
try:
|
301
|
+
# ModelRegistry doesn't need explicit cleanup currently
|
302
|
+
# but this is where we'd add it if needed
|
303
|
+
pass
|
304
|
+
except Exception as e:
|
305
|
+
logger.error(f"❌ Error cleaning up model registry: {e}")
|
306
|
+
|
307
|
+
logger.info("✅ Startup initializer cleanup completed")
|
308
|
+
|
309
|
+
except Exception as e:
|
310
|
+
logger.error(f"❌ Error during startup cleanup: {e}")
|
311
|
+
|
312
|
+
|
313
|
+
# Global initializer instance
|
314
|
+
startup_initializer = StartupInitializer()
|
315
|
+
|
316
|
+
async def run_startup_initialization():
|
317
|
+
"""Main startup initialization function"""
|
318
|
+
await startup_initializer.initialize_system()
|
@@ -0,0 +1,249 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
Modal Services Proxy Server (Port 8082)
|
4
|
+
|
5
|
+
This server acts as a proxy to Modal services, providing a unified interface
|
6
|
+
for all Modal-deployed AI services like vision, audio, embedding, etc.
|
7
|
+
"""
|
8
|
+
|
9
|
+
import os
|
10
|
+
import logging
|
11
|
+
import uvicorn
|
12
|
+
import httpx
|
13
|
+
import asyncio
|
14
|
+
from fastapi import FastAPI, HTTPException, Request, Depends
|
15
|
+
from fastapi.middleware.cors import CORSMiddleware
|
16
|
+
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
17
|
+
from typing import Dict, Any, Optional
|
18
|
+
import json
|
19
|
+
|
20
|
+
# Configure logging
|
21
|
+
logging.basicConfig(level=logging.INFO)
|
22
|
+
logger = logging.getLogger(__name__)
|
23
|
+
|
24
|
+
app = FastAPI(
|
25
|
+
title="isA Model Modal Proxy",
|
26
|
+
description="Proxy server for Modal-deployed AI services",
|
27
|
+
version="1.0.0"
|
28
|
+
)
|
29
|
+
|
30
|
+
# CORS middleware
|
31
|
+
app.add_middleware(
|
32
|
+
CORSMiddleware,
|
33
|
+
allow_origins=["*"],
|
34
|
+
allow_credentials=True,
|
35
|
+
allow_methods=["*"],
|
36
|
+
allow_headers=["*"],
|
37
|
+
)
|
38
|
+
|
39
|
+
# Security
|
40
|
+
security = HTTPBearer()
|
41
|
+
|
42
|
+
# Configuration
|
43
|
+
MODAL_SERVICES = {
|
44
|
+
"vision": os.getenv("MODAL_VISION_URL", ""),
|
45
|
+
"audio": os.getenv("MODAL_AUDIO_URL", ""),
|
46
|
+
"embedding": os.getenv("MODAL_EMBED_URL", ""),
|
47
|
+
"image_gen": os.getenv("MODAL_IMAGE_GEN_URL", "")
|
48
|
+
}
|
49
|
+
|
50
|
+
API_KEY = os.getenv("API_KEY", "")
|
51
|
+
REQUEST_TIMEOUT = int(os.getenv("MODAL_TIMEOUT", "120"))
|
52
|
+
|
53
|
+
|
54
|
+
def verify_api_key(credentials: HTTPAuthorizationCredentials = Depends(security)):
|
55
|
+
"""Verify API key if configured"""
|
56
|
+
if API_KEY and credentials.credentials != API_KEY:
|
57
|
+
raise HTTPException(
|
58
|
+
status_code=401,
|
59
|
+
detail="Invalid API key"
|
60
|
+
)
|
61
|
+
return credentials.credentials
|
62
|
+
|
63
|
+
|
64
|
+
@app.get("/health")
|
65
|
+
async def health_check():
|
66
|
+
"""Health check endpoint"""
|
67
|
+
# Test connectivity to Modal services
|
68
|
+
service_status = {}
|
69
|
+
async with httpx.AsyncClient(timeout=5.0) as client:
|
70
|
+
for service_name, service_url in MODAL_SERVICES.items():
|
71
|
+
if service_url:
|
72
|
+
try:
|
73
|
+
response = await client.get(f"{service_url}/health", timeout=5.0)
|
74
|
+
service_status[service_name] = "healthy" if response.status_code == 200 else "unhealthy"
|
75
|
+
except Exception:
|
76
|
+
service_status[service_name] = "unreachable"
|
77
|
+
else:
|
78
|
+
service_status[service_name] = "not_configured"
|
79
|
+
|
80
|
+
return {
|
81
|
+
"status": "healthy",
|
82
|
+
"service": "modal-proxy",
|
83
|
+
"port": 8082,
|
84
|
+
"modal_services": service_status
|
85
|
+
}
|
86
|
+
|
87
|
+
|
88
|
+
@app.get("/services")
|
89
|
+
async def list_services():
|
90
|
+
"""List available Modal services"""
|
91
|
+
return {
|
92
|
+
"available_services": list(MODAL_SERVICES.keys()),
|
93
|
+
"service_urls": {k: v for k, v in MODAL_SERVICES.items() if v},
|
94
|
+
"total_services": len([v for v in MODAL_SERVICES.values() if v])
|
95
|
+
}
|
96
|
+
|
97
|
+
|
98
|
+
@app.post("/modal/{service_name}/{endpoint:path}")
|
99
|
+
async def proxy_modal_service(
|
100
|
+
service_name: str,
|
101
|
+
endpoint: str,
|
102
|
+
request: Request,
|
103
|
+
api_key: str = Depends(verify_api_key)
|
104
|
+
):
|
105
|
+
"""Proxy requests to specific Modal service"""
|
106
|
+
|
107
|
+
# Validate service name
|
108
|
+
if service_name not in MODAL_SERVICES:
|
109
|
+
raise HTTPException(
|
110
|
+
status_code=404,
|
111
|
+
detail=f"Service '{service_name}' not found. Available: {list(MODAL_SERVICES.keys())}"
|
112
|
+
)
|
113
|
+
|
114
|
+
service_url = MODAL_SERVICES[service_name]
|
115
|
+
if not service_url:
|
116
|
+
raise HTTPException(
|
117
|
+
status_code=503,
|
118
|
+
detail=f"Service '{service_name}' not configured"
|
119
|
+
)
|
120
|
+
|
121
|
+
try:
|
122
|
+
# Get request body
|
123
|
+
body = await request.body()
|
124
|
+
|
125
|
+
# Prepare headers (exclude host and content-length)
|
126
|
+
headers = {}
|
127
|
+
for key, value in request.headers.items():
|
128
|
+
if key.lower() not in ['host', 'content-length']:
|
129
|
+
headers[key] = value
|
130
|
+
|
131
|
+
# Make request to Modal service
|
132
|
+
target_url = f"{service_url}/{endpoint}"
|
133
|
+
|
134
|
+
async with httpx.AsyncClient(timeout=REQUEST_TIMEOUT) as client:
|
135
|
+
response = await client.request(
|
136
|
+
method=request.method,
|
137
|
+
url=target_url,
|
138
|
+
headers=headers,
|
139
|
+
content=body,
|
140
|
+
params=dict(request.query_params)
|
141
|
+
)
|
142
|
+
|
143
|
+
# Return response
|
144
|
+
return response.json() if response.headers.get('content-type', '').startswith('application/json') else response.text
|
145
|
+
|
146
|
+
except httpx.TimeoutException:
|
147
|
+
logger.error(f"Timeout calling Modal service {service_name} at {endpoint}")
|
148
|
+
raise HTTPException(
|
149
|
+
status_code=504,
|
150
|
+
detail=f"Modal service '{service_name}' timeout"
|
151
|
+
)
|
152
|
+
except httpx.RequestError as e:
|
153
|
+
logger.error(f"Request error calling Modal service {service_name}: {e}")
|
154
|
+
raise HTTPException(
|
155
|
+
status_code=503,
|
156
|
+
detail=f"Modal service '{service_name}' unavailable: {str(e)}"
|
157
|
+
)
|
158
|
+
except Exception as e:
|
159
|
+
logger.error(f"Unexpected error calling Modal service {service_name}: {e}")
|
160
|
+
raise HTTPException(
|
161
|
+
status_code=500,
|
162
|
+
detail=f"Internal error: {str(e)}"
|
163
|
+
)
|
164
|
+
|
165
|
+
|
166
|
+
@app.get("/modal/{service_name}/{endpoint:path}")
|
167
|
+
async def proxy_modal_service_get(
|
168
|
+
service_name: str,
|
169
|
+
endpoint: str,
|
170
|
+
request: Request,
|
171
|
+
api_key: str = Depends(verify_api_key)
|
172
|
+
):
|
173
|
+
"""Proxy GET requests to Modal services"""
|
174
|
+
return await proxy_modal_service(service_name, endpoint, request, api_key)
|
175
|
+
|
176
|
+
|
177
|
+
# Convenience endpoints for common services
|
178
|
+
@app.post("/vision/{endpoint:path}")
|
179
|
+
async def vision_service(
|
180
|
+
endpoint: str,
|
181
|
+
request: Request,
|
182
|
+
api_key: str = Depends(verify_api_key)
|
183
|
+
):
|
184
|
+
"""Direct access to vision service"""
|
185
|
+
return await proxy_modal_service("vision", endpoint, request, api_key)
|
186
|
+
|
187
|
+
|
188
|
+
@app.post("/audio/{endpoint:path}")
|
189
|
+
async def audio_service(
|
190
|
+
endpoint: str,
|
191
|
+
request: Request,
|
192
|
+
api_key: str = Depends(verify_api_key)
|
193
|
+
):
|
194
|
+
"""Direct access to audio service"""
|
195
|
+
return await proxy_modal_service("audio", endpoint, request, api_key)
|
196
|
+
|
197
|
+
|
198
|
+
@app.post("/embedding/{endpoint:path}")
|
199
|
+
async def embedding_service(
|
200
|
+
endpoint: str,
|
201
|
+
request: Request,
|
202
|
+
api_key: str = Depends(verify_api_key)
|
203
|
+
):
|
204
|
+
"""Direct access to embedding service"""
|
205
|
+
return await proxy_modal_service("embedding", endpoint, request, api_key)
|
206
|
+
|
207
|
+
|
208
|
+
@app.post("/image-gen/{endpoint:path}")
|
209
|
+
async def image_gen_service(
|
210
|
+
endpoint: str,
|
211
|
+
request: Request,
|
212
|
+
api_key: str = Depends(verify_api_key)
|
213
|
+
):
|
214
|
+
"""Direct access to image generation service"""
|
215
|
+
return await proxy_modal_service("image_gen", endpoint, request, api_key)
|
216
|
+
|
217
|
+
|
218
|
+
# Error handlers
|
219
|
+
@app.exception_handler(404)
|
220
|
+
async def not_found_handler(request: Request, exc: HTTPException):
|
221
|
+
return {
|
222
|
+
"error": "Not Found",
|
223
|
+
"detail": "The requested endpoint was not found",
|
224
|
+
"available_endpoints": [
|
225
|
+
"/health",
|
226
|
+
"/services",
|
227
|
+
"/modal/{service_name}/{endpoint}",
|
228
|
+
"/vision/{endpoint}",
|
229
|
+
"/audio/{endpoint}",
|
230
|
+
"/embedding/{endpoint}",
|
231
|
+
"/image-gen/{endpoint}"
|
232
|
+
]
|
233
|
+
}
|
234
|
+
|
235
|
+
|
236
|
+
if __name__ == "__main__":
|
237
|
+
port = int(os.getenv("PORT", "8082"))
|
238
|
+
workers = int(os.getenv("WORKERS", "1"))
|
239
|
+
|
240
|
+
logger.info(f"Starting Modal Proxy Server on port {port}")
|
241
|
+
logger.info(f"Configured Modal services: {list(MODAL_SERVICES.keys())}")
|
242
|
+
|
243
|
+
uvicorn.run(
|
244
|
+
app,
|
245
|
+
host="0.0.0.0",
|
246
|
+
port=port,
|
247
|
+
workers=workers,
|
248
|
+
log_level="info"
|
249
|
+
)
|