isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. isa_model/client.py +1166 -584
  2. isa_model/core/cache/redis_cache.py +410 -0
  3. isa_model/core/config/config_manager.py +282 -12
  4. isa_model/core/config.py +91 -1
  5. isa_model/core/database/__init__.py +1 -0
  6. isa_model/core/database/direct_db_client.py +114 -0
  7. isa_model/core/database/migration_manager.py +563 -0
  8. isa_model/core/database/migrations.py +297 -0
  9. isa_model/core/database/supabase_client.py +258 -0
  10. isa_model/core/dependencies.py +316 -0
  11. isa_model/core/discovery/__init__.py +19 -0
  12. isa_model/core/discovery/consul_discovery.py +190 -0
  13. isa_model/core/logging/__init__.py +54 -0
  14. isa_model/core/logging/influx_logger.py +523 -0
  15. isa_model/core/logging/loki_logger.py +160 -0
  16. isa_model/core/models/__init__.py +46 -0
  17. isa_model/core/models/config_models.py +625 -0
  18. isa_model/core/models/deployment_billing_tracker.py +430 -0
  19. isa_model/core/models/model_billing_tracker.py +60 -88
  20. isa_model/core/models/model_manager.py +66 -25
  21. isa_model/core/models/model_metadata.py +690 -0
  22. isa_model/core/models/model_repo.py +217 -55
  23. isa_model/core/models/model_statistics_tracker.py +234 -0
  24. isa_model/core/models/model_storage.py +0 -1
  25. isa_model/core/models/model_version_manager.py +959 -0
  26. isa_model/core/models/system_models.py +857 -0
  27. isa_model/core/pricing_manager.py +2 -249
  28. isa_model/core/repositories/__init__.py +9 -0
  29. isa_model/core/repositories/config_repository.py +912 -0
  30. isa_model/core/resilience/circuit_breaker.py +366 -0
  31. isa_model/core/security/secrets.py +358 -0
  32. isa_model/core/services/__init__.py +2 -4
  33. isa_model/core/services/intelligent_model_selector.py +479 -370
  34. isa_model/core/storage/hf_storage.py +2 -2
  35. isa_model/core/types.py +8 -0
  36. isa_model/deployment/__init__.py +5 -48
  37. isa_model/deployment/core/__init__.py +2 -31
  38. isa_model/deployment/core/deployment_manager.py +1278 -368
  39. isa_model/deployment/local/__init__.py +31 -0
  40. isa_model/deployment/local/config.py +248 -0
  41. isa_model/deployment/local/gpu_gateway.py +607 -0
  42. isa_model/deployment/local/health_checker.py +428 -0
  43. isa_model/deployment/local/provider.py +586 -0
  44. isa_model/deployment/local/tensorrt_service.py +621 -0
  45. isa_model/deployment/local/transformers_service.py +644 -0
  46. isa_model/deployment/local/vllm_service.py +527 -0
  47. isa_model/deployment/modal/__init__.py +8 -0
  48. isa_model/deployment/modal/config.py +136 -0
  49. isa_model/deployment/modal/deployer.py +894 -0
  50. isa_model/deployment/modal/services/__init__.py +3 -0
  51. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  52. isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
  53. isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
  54. isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
  55. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  56. isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
  57. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  58. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  59. isa_model/deployment/modal/services/video/__init__.py +1 -0
  60. isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
  61. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  62. isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
  63. isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
  64. isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
  65. isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
  66. isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
  67. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  68. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  69. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  70. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  71. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  72. isa_model/deployment/storage/__init__.py +5 -0
  73. isa_model/deployment/storage/deployment_repository.py +824 -0
  74. isa_model/deployment/triton/__init__.py +10 -0
  75. isa_model/deployment/triton/config.py +196 -0
  76. isa_model/deployment/triton/configs/__init__.py +1 -0
  77. isa_model/deployment/triton/provider.py +512 -0
  78. isa_model/deployment/triton/scripts/__init__.py +1 -0
  79. isa_model/deployment/triton/templates/__init__.py +1 -0
  80. isa_model/inference/__init__.py +47 -1
  81. isa_model/inference/ai_factory.py +179 -16
  82. isa_model/inference/legacy_services/__init__.py +21 -0
  83. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  84. isa_model/inference/legacy_services/model_service.py +573 -0
  85. isa_model/inference/legacy_services/model_serving.py +717 -0
  86. isa_model/inference/legacy_services/model_training.py +561 -0
  87. isa_model/inference/models/__init__.py +21 -0
  88. isa_model/inference/models/inference_config.py +551 -0
  89. isa_model/inference/models/inference_record.py +675 -0
  90. isa_model/inference/models/performance_models.py +714 -0
  91. isa_model/inference/repositories/__init__.py +9 -0
  92. isa_model/inference/repositories/inference_repository.py +828 -0
  93. isa_model/inference/services/audio/__init__.py +21 -0
  94. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  95. isa_model/inference/services/audio/base_stt_service.py +184 -11
  96. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  97. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  98. isa_model/inference/services/audio/openai_stt_service.py +53 -11
  99. isa_model/inference/services/base_service.py +17 -1
  100. isa_model/inference/services/custom_model_manager.py +277 -0
  101. isa_model/inference/services/embedding/__init__.py +13 -0
  102. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  103. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  104. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  105. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  106. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  107. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  108. isa_model/inference/services/img/__init__.py +2 -2
  109. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  110. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  111. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  112. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  113. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  114. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  115. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  116. isa_model/inference/services/llm/__init__.py +10 -2
  117. isa_model/inference/services/llm/base_llm_service.py +361 -26
  118. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  119. isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
  120. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  121. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  122. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  123. isa_model/inference/services/llm/local_llm_service.py +747 -0
  124. isa_model/inference/services/llm/ollama_llm_service.py +11 -3
  125. isa_model/inference/services/llm/openai_llm_service.py +670 -56
  126. isa_model/inference/services/llm/yyds_llm_service.py +10 -3
  127. isa_model/inference/services/vision/__init__.py +27 -6
  128. isa_model/inference/services/vision/base_vision_service.py +118 -185
  129. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  130. isa_model/inference/services/vision/helpers/image_utils.py +19 -10
  131. isa_model/inference/services/vision/isa_vision_service.py +634 -0
  132. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  133. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  134. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  135. isa_model/serving/api/cache_manager.py +245 -0
  136. isa_model/serving/api/dependencies/__init__.py +1 -0
  137. isa_model/serving/api/dependencies/auth.py +194 -0
  138. isa_model/serving/api/dependencies/database.py +139 -0
  139. isa_model/serving/api/error_handlers.py +284 -0
  140. isa_model/serving/api/fastapi_server.py +240 -18
  141. isa_model/serving/api/middleware/auth.py +317 -0
  142. isa_model/serving/api/middleware/security.py +268 -0
  143. isa_model/serving/api/middleware/tenant_context.py +414 -0
  144. isa_model/serving/api/routes/analytics.py +489 -0
  145. isa_model/serving/api/routes/config.py +645 -0
  146. isa_model/serving/api/routes/deployment_billing.py +315 -0
  147. isa_model/serving/api/routes/deployments.py +475 -0
  148. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  149. isa_model/serving/api/routes/health.py +32 -12
  150. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  151. isa_model/serving/api/routes/local_deployments.py +448 -0
  152. isa_model/serving/api/routes/logs.py +430 -0
  153. isa_model/serving/api/routes/settings.py +582 -0
  154. isa_model/serving/api/routes/tenants.py +575 -0
  155. isa_model/serving/api/routes/unified.py +992 -171
  156. isa_model/serving/api/routes/webhooks.py +479 -0
  157. isa_model/serving/api/startup.py +318 -0
  158. isa_model/serving/modal_proxy_server.py +249 -0
  159. isa_model/utils/gpu_utils.py +311 -0
  160. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
  161. isa_model-0.4.3.dist-info/RECORD +193 -0
  162. isa_model/deployment/cloud/__init__.py +0 -9
  163. isa_model/deployment/cloud/modal/__init__.py +0 -10
  164. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  165. isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
  166. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
  167. isa_model/deployment/cloud/modal/register_models.py +0 -321
  168. isa_model/deployment/core/deployment_config.py +0 -356
  169. isa_model/deployment/core/isa_deployment_service.py +0 -401
  170. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  171. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  172. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  173. isa_model/deployment/runtime/deployed_service.py +0 -338
  174. isa_model/deployment/services/__init__.py +0 -9
  175. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  176. isa_model/deployment/services/model_service.py +0 -332
  177. isa_model/deployment/services/service_monitor.py +0 -356
  178. isa_model/deployment/services/service_registry.py +0 -527
  179. isa_model/eval/__init__.py +0 -92
  180. isa_model/eval/benchmarks.py +0 -469
  181. isa_model/eval/config/__init__.py +0 -10
  182. isa_model/eval/config/evaluation_config.py +0 -108
  183. isa_model/eval/evaluators/__init__.py +0 -18
  184. isa_model/eval/evaluators/base_evaluator.py +0 -503
  185. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  186. isa_model/eval/factory.py +0 -531
  187. isa_model/eval/infrastructure/__init__.py +0 -24
  188. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  189. isa_model/eval/metrics.py +0 -798
  190. isa_model/inference/adapter/unified_api.py +0 -248
  191. isa_model/inference/services/helpers/stacked_config.py +0 -148
  192. isa_model/inference/services/img/flux_professional_service.py +0 -603
  193. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  194. isa_model/inference/services/others/table_transformer_service.py +0 -61
  195. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  196. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  197. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  198. isa_model/scripts/inference_tracker.py +0 -283
  199. isa_model/scripts/mlflow_manager.py +0 -379
  200. isa_model/scripts/model_registry.py +0 -465
  201. isa_model/scripts/register_models.py +0 -370
  202. isa_model/scripts/register_models_with_embeddings.py +0 -510
  203. isa_model/scripts/start_mlflow.py +0 -95
  204. isa_model/scripts/training_tracker.py +0 -257
  205. isa_model/training/__init__.py +0 -74
  206. isa_model/training/annotation/annotation_schema.py +0 -47
  207. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  208. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  209. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  210. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  211. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  212. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  213. isa_model/training/annotation/views/annotation_controller.py +0 -158
  214. isa_model/training/cloud/__init__.py +0 -22
  215. isa_model/training/cloud/job_orchestrator.py +0 -402
  216. isa_model/training/cloud/runpod_trainer.py +0 -454
  217. isa_model/training/cloud/storage_manager.py +0 -482
  218. isa_model/training/core/__init__.py +0 -23
  219. isa_model/training/core/config.py +0 -181
  220. isa_model/training/core/dataset.py +0 -222
  221. isa_model/training/core/trainer.py +0 -720
  222. isa_model/training/core/utils.py +0 -213
  223. isa_model/training/factory.py +0 -424
  224. isa_model-0.3.91.dist-info/RECORD +0 -138
  225. /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
  226. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  227. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  228. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,318 @@
1
+ """
2
+ Server Startup Initialization for ISA Model
3
+
4
+ Handles automatic initialization of:
5
+ - Database migrations
6
+ - Model registry population
7
+ - Embedding generation
8
+ - System validation
9
+ """
10
+
11
+ import logging
12
+ import asyncio
13
+ from typing import Dict, Any
14
+ import json
15
+ import os
16
+
17
+ from ...core.config.config_manager import ConfigManager
18
+ from ...core.models.model_repo import ModelRegistry
19
+ from ...core.types import ModelType, ModelCapability
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ class StartupInitializer:
24
+ """Handles server startup initialization"""
25
+
26
+ def __init__(self):
27
+ self.config_manager = ConfigManager()
28
+ self._embedding_service = None
29
+ self._model_registry = None
30
+
31
+ async def initialize_system(self):
32
+ """Run complete system initialization"""
33
+ print("🚀 Starting ISA Model system initialization...")
34
+
35
+ try:
36
+ # 1. Populate model registry
37
+ await self._populate_models()
38
+
39
+ # 2. Generate embeddings
40
+ await self._generate_embeddings()
41
+
42
+ # 3. Validate system
43
+ await self._validate_system()
44
+
45
+ print("✅ System initialization completed successfully!")
46
+
47
+ except Exception as e:
48
+ logger.error(f"❌ System initialization failed: {e}")
49
+ raise
50
+
51
+ async def _populate_models(self):
52
+ """Populate model registry with all configured models"""
53
+ print("📚 Populating model registry...")
54
+
55
+ try:
56
+ registry = ModelRegistry()
57
+ self._model_registry = registry # Track for cleanup
58
+
59
+ # Check if models are already populated to avoid unnecessary database operations
60
+ try:
61
+ stats = registry.get_stats()
62
+ if stats and stats.get('total_models', 0) > 0:
63
+ print(f"✅ Model registry already populated: {stats['total_models']} models")
64
+ return
65
+ except Exception as e:
66
+ print(f"⚠️ Could not check existing models, proceeding with population: {e}")
67
+
68
+ # Get all configured models
69
+ all_models = self.config_manager.model_definitions
70
+
71
+ if not all_models:
72
+ print("⚠️ No models configured in providers")
73
+ return
74
+
75
+ registered_count = 0
76
+
77
+ for model_id, model_data in all_models.items():
78
+ try:
79
+ # Skip individual model check to avoid multiple database queries
80
+ # We already checked if any models exist above
81
+
82
+ # Map model type
83
+ model_type_str = model_data.get('type', 'llm')
84
+ model_type = self._map_model_type(model_type_str)
85
+
86
+ # Map capabilities
87
+ capabilities = self._map_capabilities(model_data.get('capabilities', []))
88
+
89
+ # Get provider
90
+ provider = model_data.get('provider', 'unknown')
91
+
92
+ # Register the model
93
+ success = registry.register_model(
94
+ model_id=model_id,
95
+ model_type=model_type,
96
+ capabilities=capabilities,
97
+ metadata=model_data,
98
+ provider=provider
99
+ )
100
+
101
+ if success:
102
+ registered_count += 1
103
+ else:
104
+ logger.warning(f"Failed to register {model_id}")
105
+
106
+ except Exception as e:
107
+ logger.error(f"Error registering {model_id}: {e}")
108
+ continue
109
+
110
+ print(f"✅ Model registry populated: {registered_count}/{len(all_models)} models")
111
+
112
+ except Exception as e:
113
+ logger.error(f"❌ Model population error: {e}")
114
+ raise
115
+
116
+ async def _generate_embeddings(self):
117
+ """Generate embeddings for all registered models using OpenAI embedding service"""
118
+ print("🧠 Generating model embeddings...")
119
+
120
+ try:
121
+ # Initialize embedding service
122
+ from ...inference.ai_factory import AIFactory
123
+ factory = AIFactory.get_instance()
124
+ embedding_service = factory.get_embed("text-embedding-3-small", "openai")
125
+ self._embedding_service = embedding_service # Track for cleanup
126
+
127
+ if not embedding_service:
128
+ print("⚠️ Could not initialize embedding service, skipping embedding generation")
129
+ return
130
+
131
+ # Get all registered models
132
+ registry = ModelRegistry()
133
+ models = registry.list_models()
134
+
135
+ if not models:
136
+ print("⚠️ No models found in registry")
137
+ return
138
+
139
+ # Check existing embeddings using Supabase client
140
+ supabase_client = registry.supabase_client
141
+ existing_result = supabase_client.table("model_embeddings").select("model_id").execute()
142
+ existing_embeddings = {row['model_id'] for row in existing_result.data}
143
+
144
+ processed = 0
145
+
146
+ for model_id, model_data in models.items():
147
+ try:
148
+ # Skip if embedding already exists
149
+ if model_id in existing_embeddings:
150
+ continue
151
+
152
+ provider = model_data.get('provider', 'unknown')
153
+ model_type = model_data.get('type', 'llm')
154
+ metadata = model_data.get('metadata', {})
155
+
156
+ # Create searchable text from model information (same logic as intelligent_model_selector)
157
+ description = metadata.get('description', '')
158
+ specialized_tasks = metadata.get('specialized_tasks', [])
159
+
160
+ # Combine all text for embedding
161
+ search_text = f"{model_id} {provider} model. "
162
+ if description:
163
+ search_text += f"{description} "
164
+ if specialized_tasks:
165
+ search_text += f"Specialized for: {', '.join(specialized_tasks)}"
166
+
167
+ # Generate embedding using OpenAI service
168
+ embedding = await embedding_service.create_text_embedding(search_text)
169
+
170
+ # Store embedding in database
171
+ embedding_data = {
172
+ 'model_id': model_id,
173
+ 'provider': provider,
174
+ 'description': search_text,
175
+ 'embedding': embedding
176
+ }
177
+
178
+ result = supabase_client.table('model_embeddings').insert(embedding_data).execute()
179
+
180
+ if result.data:
181
+ processed += 1
182
+ else:
183
+ logger.warning(f"Failed to store embedding for {model_id}")
184
+
185
+ except Exception as e:
186
+ logger.error(f"Error creating embedding for {model_id}: {e}")
187
+ continue
188
+
189
+ print(f"✅ Generated {processed}/{len(models)} new embeddings")
190
+
191
+ # Close embedding service
192
+ await embedding_service.close()
193
+
194
+ except Exception as e:
195
+ logger.error(f"❌ Embedding generation error: {e}")
196
+ raise
197
+
198
+ async def _validate_system(self):
199
+ """Validate system is working correctly"""
200
+ print("🔍 Validating system...")
201
+
202
+ try:
203
+ registry = ModelRegistry()
204
+ stats = registry.get_stats()
205
+
206
+ print(f"📊 System validation results:")
207
+ print(f" Models: {stats['total_models']}")
208
+ print(f" By type: {stats['models_by_type']}")
209
+ print(f" By capability: {stats['models_by_capability']}")
210
+
211
+ if stats['total_models'] == 0:
212
+ raise Exception("No models found in registry")
213
+
214
+ # Initialize and test intelligent selector
215
+ try:
216
+ from ...core.services.intelligent_model_selector import get_model_selector
217
+ selector = await get_model_selector()
218
+
219
+ # Test basic functionality
220
+ available_models = await selector.get_available_models()
221
+ print(f" Available models for selection: {len(available_models)}")
222
+
223
+ except Exception as e:
224
+ logger.warning(f"⚠️ Intelligent selector initialization failed: {e}")
225
+
226
+ print("✅ System validation completed")
227
+
228
+ except Exception as e:
229
+ logger.error(f"❌ System validation error: {e}")
230
+ raise
231
+
232
+ def _map_model_type(self, model_type_str: str) -> ModelType:
233
+ """Map string model type to enum"""
234
+ mapping = {
235
+ 'llm': ModelType.LLM,
236
+ 'embedding': ModelType.EMBEDDING,
237
+ 'rerank': ModelType.RERANK,
238
+ 'image': ModelType.IMAGE,
239
+ 'audio': ModelType.AUDIO,
240
+ 'video': ModelType.VIDEO,
241
+ 'vision': ModelType.VISION,
242
+ 'omni': ModelType.LLM # Omni models are treated as LLM for now
243
+ }
244
+ return mapping.get(model_type_str.lower(), ModelType.LLM)
245
+
246
+ def _map_capabilities(self, capabilities_list: list) -> list:
247
+ """Map capability strings to enums"""
248
+ mapping = {
249
+ 'text_generation': ModelCapability.TEXT_GENERATION,
250
+ 'chat': ModelCapability.CHAT,
251
+ 'embedding': ModelCapability.EMBEDDING,
252
+ 'reranking': ModelCapability.RERANKING,
253
+ 'reasoning': ModelCapability.REASONING,
254
+ 'image_generation': ModelCapability.IMAGE_GENERATION,
255
+ 'image_analysis': ModelCapability.IMAGE_ANALYSIS,
256
+ 'audio_transcription': ModelCapability.AUDIO_TRANSCRIPTION,
257
+ 'audio_realtime': ModelCapability.AUDIO_REALTIME,
258
+ 'speech_to_text': ModelCapability.SPEECH_TO_TEXT,
259
+ 'text_to_speech': ModelCapability.TEXT_TO_SPEECH,
260
+ 'conversation': ModelCapability.CONVERSATION,
261
+ 'image_understanding': ModelCapability.IMAGE_UNDERSTANDING,
262
+ 'ui_detection': ModelCapability.UI_DETECTION,
263
+ 'ocr': ModelCapability.OCR,
264
+ 'table_detection': ModelCapability.TABLE_DETECTION,
265
+ 'table_structure_recognition': ModelCapability.TABLE_STRUCTURE_RECOGNITION
266
+ }
267
+
268
+ result = []
269
+ for cap in capabilities_list:
270
+ if cap in mapping:
271
+ result.append(mapping[cap])
272
+ else:
273
+ # Log unmapped capabilities for debugging
274
+ logger.warning(f"Unknown capability '{cap}' - skipping")
275
+
276
+ # Default to text generation if no capabilities
277
+ if not result:
278
+ result = [ModelCapability.TEXT_GENERATION]
279
+
280
+ return result
281
+
282
+ async def cleanup(self):
283
+ """Clean up startup resources"""
284
+ logger.info("🧹 Starting startup initializer cleanup...")
285
+
286
+ try:
287
+ # Clean up any persistent connections or resources
288
+ # Most cleanup is handled by individual services, but we can do some general cleanup here
289
+
290
+ # If we have any cached embedding services, clean them up
291
+ if hasattr(self, '_embedding_service') and self._embedding_service:
292
+ try:
293
+ await self._embedding_service.close()
294
+ logger.info("✅ Embedding service closed")
295
+ except Exception as e:
296
+ logger.error(f"❌ Error closing embedding service: {e}")
297
+
298
+ # Clean up model registry connections if needed
299
+ if hasattr(self, '_model_registry'):
300
+ try:
301
+ # ModelRegistry doesn't need explicit cleanup currently
302
+ # but this is where we'd add it if needed
303
+ pass
304
+ except Exception as e:
305
+ logger.error(f"❌ Error cleaning up model registry: {e}")
306
+
307
+ logger.info("✅ Startup initializer cleanup completed")
308
+
309
+ except Exception as e:
310
+ logger.error(f"❌ Error during startup cleanup: {e}")
311
+
312
+
313
+ # Global initializer instance
314
+ startup_initializer = StartupInitializer()
315
+
316
+ async def run_startup_initialization():
317
+ """Main startup initialization function"""
318
+ await startup_initializer.initialize_system()
@@ -0,0 +1,249 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Modal Services Proxy Server (Port 8082)
4
+
5
+ This server acts as a proxy to Modal services, providing a unified interface
6
+ for all Modal-deployed AI services like vision, audio, embedding, etc.
7
+ """
8
+
9
+ import os
10
+ import logging
11
+ import uvicorn
12
+ import httpx
13
+ import asyncio
14
+ from fastapi import FastAPI, HTTPException, Request, Depends
15
+ from fastapi.middleware.cors import CORSMiddleware
16
+ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
17
+ from typing import Dict, Any, Optional
18
+ import json
19
+
20
+ # Configure logging
21
+ logging.basicConfig(level=logging.INFO)
22
+ logger = logging.getLogger(__name__)
23
+
24
+ app = FastAPI(
25
+ title="isA Model Modal Proxy",
26
+ description="Proxy server for Modal-deployed AI services",
27
+ version="1.0.0"
28
+ )
29
+
30
+ # CORS middleware
31
+ app.add_middleware(
32
+ CORSMiddleware,
33
+ allow_origins=["*"],
34
+ allow_credentials=True,
35
+ allow_methods=["*"],
36
+ allow_headers=["*"],
37
+ )
38
+
39
+ # Security
40
+ security = HTTPBearer()
41
+
42
+ # Configuration
43
+ MODAL_SERVICES = {
44
+ "vision": os.getenv("MODAL_VISION_URL", ""),
45
+ "audio": os.getenv("MODAL_AUDIO_URL", ""),
46
+ "embedding": os.getenv("MODAL_EMBED_URL", ""),
47
+ "image_gen": os.getenv("MODAL_IMAGE_GEN_URL", "")
48
+ }
49
+
50
+ API_KEY = os.getenv("API_KEY", "")
51
+ REQUEST_TIMEOUT = int(os.getenv("MODAL_TIMEOUT", "120"))
52
+
53
+
54
+ def verify_api_key(credentials: HTTPAuthorizationCredentials = Depends(security)):
55
+ """Verify API key if configured"""
56
+ if API_KEY and credentials.credentials != API_KEY:
57
+ raise HTTPException(
58
+ status_code=401,
59
+ detail="Invalid API key"
60
+ )
61
+ return credentials.credentials
62
+
63
+
64
+ @app.get("/health")
65
+ async def health_check():
66
+ """Health check endpoint"""
67
+ # Test connectivity to Modal services
68
+ service_status = {}
69
+ async with httpx.AsyncClient(timeout=5.0) as client:
70
+ for service_name, service_url in MODAL_SERVICES.items():
71
+ if service_url:
72
+ try:
73
+ response = await client.get(f"{service_url}/health", timeout=5.0)
74
+ service_status[service_name] = "healthy" if response.status_code == 200 else "unhealthy"
75
+ except Exception:
76
+ service_status[service_name] = "unreachable"
77
+ else:
78
+ service_status[service_name] = "not_configured"
79
+
80
+ return {
81
+ "status": "healthy",
82
+ "service": "modal-proxy",
83
+ "port": 8082,
84
+ "modal_services": service_status
85
+ }
86
+
87
+
88
+ @app.get("/services")
89
+ async def list_services():
90
+ """List available Modal services"""
91
+ return {
92
+ "available_services": list(MODAL_SERVICES.keys()),
93
+ "service_urls": {k: v for k, v in MODAL_SERVICES.items() if v},
94
+ "total_services": len([v for v in MODAL_SERVICES.values() if v])
95
+ }
96
+
97
+
98
+ @app.post("/modal/{service_name}/{endpoint:path}")
99
+ async def proxy_modal_service(
100
+ service_name: str,
101
+ endpoint: str,
102
+ request: Request,
103
+ api_key: str = Depends(verify_api_key)
104
+ ):
105
+ """Proxy requests to specific Modal service"""
106
+
107
+ # Validate service name
108
+ if service_name not in MODAL_SERVICES:
109
+ raise HTTPException(
110
+ status_code=404,
111
+ detail=f"Service '{service_name}' not found. Available: {list(MODAL_SERVICES.keys())}"
112
+ )
113
+
114
+ service_url = MODAL_SERVICES[service_name]
115
+ if not service_url:
116
+ raise HTTPException(
117
+ status_code=503,
118
+ detail=f"Service '{service_name}' not configured"
119
+ )
120
+
121
+ try:
122
+ # Get request body
123
+ body = await request.body()
124
+
125
+ # Prepare headers (exclude host and content-length)
126
+ headers = {}
127
+ for key, value in request.headers.items():
128
+ if key.lower() not in ['host', 'content-length']:
129
+ headers[key] = value
130
+
131
+ # Make request to Modal service
132
+ target_url = f"{service_url}/{endpoint}"
133
+
134
+ async with httpx.AsyncClient(timeout=REQUEST_TIMEOUT) as client:
135
+ response = await client.request(
136
+ method=request.method,
137
+ url=target_url,
138
+ headers=headers,
139
+ content=body,
140
+ params=dict(request.query_params)
141
+ )
142
+
143
+ # Return response
144
+ return response.json() if response.headers.get('content-type', '').startswith('application/json') else response.text
145
+
146
+ except httpx.TimeoutException:
147
+ logger.error(f"Timeout calling Modal service {service_name} at {endpoint}")
148
+ raise HTTPException(
149
+ status_code=504,
150
+ detail=f"Modal service '{service_name}' timeout"
151
+ )
152
+ except httpx.RequestError as e:
153
+ logger.error(f"Request error calling Modal service {service_name}: {e}")
154
+ raise HTTPException(
155
+ status_code=503,
156
+ detail=f"Modal service '{service_name}' unavailable: {str(e)}"
157
+ )
158
+ except Exception as e:
159
+ logger.error(f"Unexpected error calling Modal service {service_name}: {e}")
160
+ raise HTTPException(
161
+ status_code=500,
162
+ detail=f"Internal error: {str(e)}"
163
+ )
164
+
165
+
166
+ @app.get("/modal/{service_name}/{endpoint:path}")
167
+ async def proxy_modal_service_get(
168
+ service_name: str,
169
+ endpoint: str,
170
+ request: Request,
171
+ api_key: str = Depends(verify_api_key)
172
+ ):
173
+ """Proxy GET requests to Modal services"""
174
+ return await proxy_modal_service(service_name, endpoint, request, api_key)
175
+
176
+
177
+ # Convenience endpoints for common services
178
+ @app.post("/vision/{endpoint:path}")
179
+ async def vision_service(
180
+ endpoint: str,
181
+ request: Request,
182
+ api_key: str = Depends(verify_api_key)
183
+ ):
184
+ """Direct access to vision service"""
185
+ return await proxy_modal_service("vision", endpoint, request, api_key)
186
+
187
+
188
+ @app.post("/audio/{endpoint:path}")
189
+ async def audio_service(
190
+ endpoint: str,
191
+ request: Request,
192
+ api_key: str = Depends(verify_api_key)
193
+ ):
194
+ """Direct access to audio service"""
195
+ return await proxy_modal_service("audio", endpoint, request, api_key)
196
+
197
+
198
+ @app.post("/embedding/{endpoint:path}")
199
+ async def embedding_service(
200
+ endpoint: str,
201
+ request: Request,
202
+ api_key: str = Depends(verify_api_key)
203
+ ):
204
+ """Direct access to embedding service"""
205
+ return await proxy_modal_service("embedding", endpoint, request, api_key)
206
+
207
+
208
+ @app.post("/image-gen/{endpoint:path}")
209
+ async def image_gen_service(
210
+ endpoint: str,
211
+ request: Request,
212
+ api_key: str = Depends(verify_api_key)
213
+ ):
214
+ """Direct access to image generation service"""
215
+ return await proxy_modal_service("image_gen", endpoint, request, api_key)
216
+
217
+
218
+ # Error handlers
219
+ @app.exception_handler(404)
220
+ async def not_found_handler(request: Request, exc: HTTPException):
221
+ return {
222
+ "error": "Not Found",
223
+ "detail": "The requested endpoint was not found",
224
+ "available_endpoints": [
225
+ "/health",
226
+ "/services",
227
+ "/modal/{service_name}/{endpoint}",
228
+ "/vision/{endpoint}",
229
+ "/audio/{endpoint}",
230
+ "/embedding/{endpoint}",
231
+ "/image-gen/{endpoint}"
232
+ ]
233
+ }
234
+
235
+
236
+ if __name__ == "__main__":
237
+ port = int(os.getenv("PORT", "8082"))
238
+ workers = int(os.getenv("WORKERS", "1"))
239
+
240
+ logger.info(f"Starting Modal Proxy Server on port {port}")
241
+ logger.info(f"Configured Modal services: {list(MODAL_SERVICES.keys())}")
242
+
243
+ uvicorn.run(
244
+ app,
245
+ host="0.0.0.0",
246
+ port=port,
247
+ workers=workers,
248
+ log_level="info"
249
+ )