isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +35 -80
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/types.py +1 -0
  26. isa_model/deployment/__init__.py +5 -48
  27. isa_model/deployment/core/__init__.py +2 -31
  28. isa_model/deployment/core/deployment_manager.py +1278 -370
  29. isa_model/deployment/modal/__init__.py +8 -0
  30. isa_model/deployment/modal/config.py +136 -0
  31. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  32. isa_model/deployment/modal/services/__init__.py +3 -0
  33. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  34. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  35. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  36. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  37. isa_model/deployment/modal/services/video/__init__.py +1 -0
  38. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  39. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  40. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  41. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  42. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  43. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  44. isa_model/deployment/storage/__init__.py +5 -0
  45. isa_model/deployment/storage/deployment_repository.py +824 -0
  46. isa_model/deployment/triton/__init__.py +10 -0
  47. isa_model/deployment/triton/config.py +196 -0
  48. isa_model/deployment/triton/configs/__init__.py +1 -0
  49. isa_model/deployment/triton/provider.py +512 -0
  50. isa_model/deployment/triton/scripts/__init__.py +1 -0
  51. isa_model/deployment/triton/templates/__init__.py +1 -0
  52. isa_model/inference/__init__.py +47 -1
  53. isa_model/inference/ai_factory.py +137 -10
  54. isa_model/inference/legacy_services/__init__.py +21 -0
  55. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  56. isa_model/inference/legacy_services/model_service.py +573 -0
  57. isa_model/inference/legacy_services/model_serving.py +717 -0
  58. isa_model/inference/legacy_services/model_training.py +561 -0
  59. isa_model/inference/models/__init__.py +21 -0
  60. isa_model/inference/models/inference_config.py +551 -0
  61. isa_model/inference/models/inference_record.py +675 -0
  62. isa_model/inference/models/performance_models.py +714 -0
  63. isa_model/inference/repositories/__init__.py +9 -0
  64. isa_model/inference/repositories/inference_repository.py +828 -0
  65. isa_model/inference/services/audio/base_stt_service.py +184 -11
  66. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  67. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  68. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  69. isa_model/inference/services/llm/__init__.py +10 -2
  70. isa_model/inference/services/llm/base_llm_service.py +335 -24
  71. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  72. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  73. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  74. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  75. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  76. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  77. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  78. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  79. isa_model/inference/services/vision/__init__.py +22 -1
  80. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  81. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  82. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  83. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  84. isa_model/serving/api/cache_manager.py +245 -0
  85. isa_model/serving/api/dependencies/__init__.py +1 -0
  86. isa_model/serving/api/dependencies/auth.py +194 -0
  87. isa_model/serving/api/dependencies/database.py +139 -0
  88. isa_model/serving/api/error_handlers.py +284 -0
  89. isa_model/serving/api/fastapi_server.py +172 -22
  90. isa_model/serving/api/middleware/auth.py +8 -2
  91. isa_model/serving/api/middleware/security.py +23 -33
  92. isa_model/serving/api/middleware/tenant_context.py +414 -0
  93. isa_model/serving/api/routes/analytics.py +4 -1
  94. isa_model/serving/api/routes/config.py +645 -0
  95. isa_model/serving/api/routes/deployment_billing.py +315 -0
  96. isa_model/serving/api/routes/deployments.py +138 -2
  97. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  98. isa_model/serving/api/routes/health.py +32 -12
  99. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  100. isa_model/serving/api/routes/local_deployments.py +448 -0
  101. isa_model/serving/api/routes/tenants.py +575 -0
  102. isa_model/serving/api/routes/unified.py +680 -18
  103. isa_model/serving/api/routes/webhooks.py +479 -0
  104. isa_model/serving/api/startup.py +68 -54
  105. isa_model/utils/gpu_utils.py +311 -0
  106. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
  107. isa_model-0.4.4.dist-info/RECORD +180 -0
  108. isa_model/core/security/secrets.py +0 -358
  109. isa_model/core/storage/hf_storage.py +0 -419
  110. isa_model/core/storage/minio_storage.py +0 -0
  111. isa_model/deployment/cloud/__init__.py +0 -9
  112. isa_model/deployment/cloud/modal/__init__.py +0 -10
  113. isa_model/deployment/core/deployment_config.py +0 -356
  114. isa_model/deployment/core/isa_deployment_service.py +0 -401
  115. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  116. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  117. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  118. isa_model/deployment/runtime/deployed_service.py +0 -338
  119. isa_model/deployment/services/__init__.py +0 -9
  120. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  121. isa_model/deployment/services/model_service.py +0 -332
  122. isa_model/deployment/services/service_monitor.py +0 -356
  123. isa_model/deployment/services/service_registry.py +0 -527
  124. isa_model/eval/__init__.py +0 -92
  125. isa_model/eval/benchmarks/__init__.py +0 -27
  126. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  127. isa_model/eval/benchmarks.py +0 -701
  128. isa_model/eval/config/__init__.py +0 -10
  129. isa_model/eval/config/evaluation_config.py +0 -108
  130. isa_model/eval/evaluators/__init__.py +0 -24
  131. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  132. isa_model/eval/evaluators/base_evaluator.py +0 -503
  133. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  134. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  135. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  136. isa_model/eval/example_evaluation.py +0 -395
  137. isa_model/eval/factory.py +0 -798
  138. isa_model/eval/infrastructure/__init__.py +0 -24
  139. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  140. isa_model/eval/isa_benchmarks.py +0 -700
  141. isa_model/eval/isa_integration.py +0 -582
  142. isa_model/eval/metrics.py +0 -951
  143. isa_model/eval/tests/unit/test_basic.py +0 -396
  144. isa_model/serving/api/routes/evaluations.py +0 -579
  145. isa_model/training/__init__.py +0 -168
  146. isa_model/training/annotation/annotation_schema.py +0 -47
  147. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  148. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  149. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  150. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  151. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  152. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  153. isa_model/training/annotation/views/annotation_controller.py +0 -158
  154. isa_model/training/cloud/__init__.py +0 -22
  155. isa_model/training/cloud/job_orchestrator.py +0 -402
  156. isa_model/training/cloud/runpod_trainer.py +0 -454
  157. isa_model/training/cloud/storage_manager.py +0 -482
  158. isa_model/training/core/__init__.py +0 -26
  159. isa_model/training/core/config.py +0 -181
  160. isa_model/training/core/dataset.py +0 -222
  161. isa_model/training/core/trainer.py +0 -720
  162. isa_model/training/core/utils.py +0 -213
  163. isa_model/training/examples/intelligent_training_example.py +0 -281
  164. isa_model/training/factory.py +0 -424
  165. isa_model/training/intelligent/__init__.py +0 -25
  166. isa_model/training/intelligent/decision_engine.py +0 -643
  167. isa_model/training/intelligent/intelligent_factory.py +0 -888
  168. isa_model/training/intelligent/knowledge_base.py +0 -751
  169. isa_model/training/intelligent/resource_optimizer.py +0 -839
  170. isa_model/training/intelligent/task_classifier.py +0 -576
  171. isa_model/training/storage/__init__.py +0 -24
  172. isa_model/training/storage/core_integration.py +0 -439
  173. isa_model/training/storage/training_repository.py +0 -552
  174. isa_model/training/storage/training_storage.py +0 -628
  175. isa_model-0.4.0.dist-info/RECORD +0 -182
  176. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  177. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  178. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  179. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  180. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  181. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  182. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  183. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  184. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  185. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  186. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  187. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  188. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
  189. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
@@ -13,7 +13,7 @@ from typing import Dict, List, Any, Optional, Tuple
13
13
 
14
14
  logger = logging.getLogger(__name__)
15
15
 
16
- from ..database.supabase_client import get_supabase_client
16
+ from ..database.supabase_client import get_supabase_client, get_supabase_table
17
17
  from ...inference.ai_factory import AIFactory
18
18
 
19
19
 
@@ -32,16 +32,45 @@ class IntelligentModelSelector:
32
32
  self.config = config or {}
33
33
  self.supabase_client = None
34
34
  self.embedding_service = None
35
+ self.nlp = None # spaCy NLP model
35
36
  self.models_metadata: Dict[str, Dict[str, Any]] = {}
36
37
 
37
- # Default models for each service type
38
+ # Default models for each service type (updated to GPT-5-nano for best cost/performance)
38
39
  self.default_models = {
39
- "vision": {"model_id": "gpt-4.1-mini", "provider": "openai"},
40
+ "vision": {"model_id": "gpt-5-mini", "provider": "openai"},
40
41
  "audio": {"model_id": "whisper-1", "provider": "openai"},
41
- "text": {"model_id": "gpt-4.1-mini", "provider": "openai"},
42
+ "text": {"model_id": "gpt-5-nano", "provider": "openai"}, # Primary: 50% cheaper than gpt-4.1-nano
42
43
  "image": {"model_id": "black-forest-labs/flux-schnell", "provider": "replicate"},
43
44
  "embedding": {"model_id": "text-embedding-3-small", "provider": "openai"},
44
- "omni": {"model_id": "gpt-4.1", "provider": "openai"}
45
+ "omni": {"model_id": "gpt-5", "provider": "openai"}
46
+ }
47
+
48
+ # Rate limit fallback: same models with different providers
49
+ self.rate_limit_fallbacks = {
50
+ "text": {"model_id": "gpt-5-nano", "provider": "yyds"}, # Same model, yyds provider
51
+ "vision": {"model_id": "gpt-5-mini", "provider": "yyds"},
52
+ "omni": {"model_id": "gpt-5", "provider": "yyds"}
53
+ }
54
+
55
+ # Entity-based model mappings
56
+ self.entity_model_mappings = {
57
+ # Domain-specific mappings
58
+ "medical": {"preferred_models": ["microsoft/BioGPT", "medalpaca/medalpaca-7b"]},
59
+ "legal": {"preferred_models": ["saul-7b", "legal-bert"]},
60
+ "financial": {"preferred_models": ["ProsusAI/finbert", "financialbert"]},
61
+ "scientific": {"preferred_models": ["microsoft/DialoGPT-medium", "allenai/scibert"]},
62
+ "code": {"preferred_models": ["microsoft/CodeBERT", "codeparrot/codeparrot"]},
63
+
64
+ # Task-specific mappings
65
+ "translation": {"preferred_models": ["facebook/m2m100", "google/mt5"]},
66
+ "summarization": {"preferred_models": ["facebook/bart-large", "google/pegasus"]},
67
+ "question_answering": {"preferred_models": ["deepset/roberta-base-squad2", "distilbert-base-uncased-distilled-squad"]},
68
+ "sentiment": {"preferred_models": ["cardiffnlp/twitter-roberta-base-sentiment", "nlptown/bert-base-multilingual-uncased-sentiment"]},
69
+
70
+ # Language-specific mappings
71
+ "chinese": {"preferred_models": ["THUDM/chatglm2-6b", "baichuan-inc/Baichuan2-7B-Chat"]},
72
+ "japanese": {"preferred_models": ["rinna/japanese-gpt2-medium", "sonoisa/sentence-bert-base-ja-mean-tokens"]},
73
+ "multilingual": {"preferred_models": ["facebook/mbart-large-50", "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"]}
45
74
  }
46
75
 
47
76
  logger.info("Intelligent Model Selector initialized")
@@ -56,6 +85,9 @@ class IntelligentModelSelector:
56
85
  # Initialize embedding service
57
86
  await self._init_embedding_service()
58
87
 
88
+ # Initialize spaCy NLP
89
+ await self._init_spacy_nlp()
90
+
59
91
  # Load models from database
60
92
  await self._load_models_from_database()
61
93
 
@@ -74,11 +106,194 @@ class IntelligentModelSelector:
74
106
  except Exception as e:
75
107
  logger.warning(f"Failed to initialize embedding service: {e}")
76
108
 
109
+ async def _init_spacy_nlp(self):
110
+ """Initialize spaCy NLP model for entity extraction"""
111
+ try:
112
+ import spacy
113
+
114
+ # Try to load the English model
115
+ models_to_try = ["en_core_web_sm", "en_core_web_md", "en_core_web_lg"]
116
+
117
+ for model_name in models_to_try:
118
+ try:
119
+ self.nlp = spacy.load(model_name)
120
+ logger.info(f"spaCy model '{model_name}' loaded successfully")
121
+ break
122
+ except OSError:
123
+ logger.warning(f"spaCy model '{model_name}' not found")
124
+ continue
125
+
126
+ if not self.nlp:
127
+ # Try to download the smallest model automatically
128
+ try:
129
+ import subprocess
130
+ result = subprocess.run(
131
+ ["python", "-m", "spacy", "download", "en_core_web_sm"],
132
+ capture_output=True,
133
+ text=True,
134
+ timeout=300 # 5 minutes timeout
135
+ )
136
+
137
+ if result.returncode == 0:
138
+ self.nlp = spacy.load("en_core_web_sm")
139
+ logger.info("spaCy en_core_web_sm downloaded and loaded successfully")
140
+ else:
141
+ logger.warning(f"Failed to download spaCy model: {result.stderr}")
142
+
143
+ except Exception as download_error:
144
+ logger.warning(f"Failed to download spaCy model: {download_error}")
145
+
146
+ # If still no model, create a blank model with NER
147
+ if not self.nlp:
148
+ logger.warning("No spaCy model available, creating blank model with NER")
149
+ self.nlp = spacy.blank("en")
150
+ # Add basic NER component
151
+ try:
152
+ self.nlp.add_pipe("ner")
153
+ except:
154
+ pass # NER might not be available in blank model
155
+
156
+ except ImportError:
157
+ # spaCy not available, will use regex fallback
158
+ self.nlp = None
159
+ except Exception as e:
160
+ # Failed to init spaCy, will use regex fallback
161
+ self.nlp = None
162
+
163
+ def extract_entities_and_keywords(self, text: str) -> Dict[str, Any]:
164
+ """Extract entities and keywords from text using spaCy and heuristics"""
165
+ try:
166
+ entities_info = {
167
+ "domains": set(),
168
+ "tasks": set(),
169
+ "languages": set(),
170
+ "technical_terms": set(),
171
+ "named_entities": {},
172
+ "keywords": set()
173
+ }
174
+
175
+ # Use spaCy if available
176
+ if self.nlp:
177
+ try:
178
+ doc = self.nlp(text)
179
+
180
+ # Extract named entities
181
+ for ent in doc.ents:
182
+ label = ent.label_
183
+ if label not in entities_info["named_entities"]:
184
+ entities_info["named_entities"][label] = []
185
+ entities_info["named_entities"][label].append(ent.text)
186
+
187
+ # Extract noun phrases as potential keywords
188
+ for chunk in doc.noun_chunks:
189
+ entities_info["keywords"].add(chunk.text.lower())
190
+
191
+ except Exception as e:
192
+ logger.warning(f"spaCy processing failed: {e}")
193
+
194
+ # Heuristic-based extraction
195
+ text_lower = text.lower()
196
+
197
+ # Domain detection
198
+ domain_keywords = {
199
+ "medical": ["medical", "health", "doctor", "patient", "diagnosis", "treatment", "clinical", "healthcare", "hospital", "medicine", "drug"],
200
+ "legal": ["legal", "law", "court", "lawyer", "attorney", "contract", "litigation", "compliance", "regulation", "statute"],
201
+ "financial": ["financial", "finance", "banking", "investment", "trading", "market", "stock", "currency", "economic", "accounting"],
202
+ "scientific": ["scientific", "research", "study", "experiment", "analysis", "data", "hypothesis", "theory", "academic", "journal"],
203
+ "code": ["code", "programming", "software", "development", "algorithm", "function", "variable", "debug", "compile", "syntax"],
204
+ "educational": ["education", "learning", "teaching", "student", "school", "university", "course", "lesson", "curriculum"]
205
+ }
206
+
207
+ # Task detection
208
+ task_keywords = {
209
+ "translation": ["translate", "translation", "language", "multilingual", "convert"],
210
+ "summarization": ["summarize", "summary", "summarization", "abstract", "brief", "condense"],
211
+ "question_answering": ["question", "answer", "qa", "ask", "respond", "query"],
212
+ "sentiment": ["sentiment", "emotion", "feeling", "opinion", "positive", "negative", "mood"],
213
+ "classification": ["classify", "classification", "category", "categorize", "label", "predict"],
214
+ "generation": ["generate", "creation", "create", "produce", "write", "compose"]
215
+ }
216
+
217
+ # Language detection
218
+ language_keywords = {
219
+ "chinese": ["chinese", "mandarin", "cantonese", "zh", "中文", "汉语"],
220
+ "japanese": ["japanese", "日本語", "ja", "nihongo"],
221
+ "spanish": ["spanish", "español", "es", "castellano"],
222
+ "french": ["french", "français", "fr", "francais"],
223
+ "german": ["german", "deutsch", "de", "german"],
224
+ "multilingual": ["multilingual", "multiple languages", "multi-language", "cross-lingual"]
225
+ }
226
+
227
+ # Extract domains
228
+ for domain, keywords in domain_keywords.items():
229
+ if any(keyword in text_lower for keyword in keywords):
230
+ entities_info["domains"].add(domain)
231
+
232
+ # Extract tasks
233
+ for task, keywords in task_keywords.items():
234
+ if any(keyword in text_lower for keyword in keywords):
235
+ entities_info["tasks"].add(task)
236
+
237
+ # Extract languages
238
+ for lang, keywords in language_keywords.items():
239
+ if any(keyword in text_lower for keyword in keywords):
240
+ entities_info["languages"].add(lang)
241
+
242
+ # Extract technical terms and model names
243
+ import re
244
+
245
+ # Add specific model name patterns
246
+ model_patterns = [
247
+ r'\bgpt-[0-9]+\.?[0-9]*-?\w*\b', # GPT models (gpt-4, gpt-5-nano, etc)
248
+ r'\bclaude-[0-9]+\.?[0-9]*-?\w*\b', # Claude models
249
+ r'\bllama-?\d*-?\w*\b', # Llama models
250
+ r'\bgemini-?\w*\b', # Gemini models
251
+ r'\bmistral-?\w*\b', # Mistral models
252
+ ]
253
+
254
+ for pattern in model_patterns:
255
+ matches = re.findall(pattern, text_lower)
256
+ entities_info["technical_terms"].update(matches)
257
+ entities_info["keywords"].update(matches)
258
+
259
+ # General technical patterns
260
+ tech_patterns = [
261
+ r'\b\w*bert\w*\b', # BERT variants
262
+ r'\b\w*gpt\w*\b', # GPT variants
263
+ r'\b\w*llm\w*\b', # LLM variants
264
+ r'\b\w*ai\w*\b', # AI-related
265
+ r'\b\w*ml\w*\b', # ML-related
266
+ r'\b\w*neural\w*\b', # Neural networks
267
+ ]
268
+
269
+ for pattern in tech_patterns:
270
+ matches = re.findall(pattern, text_lower, re.IGNORECASE)
271
+ for match in matches:
272
+ if len(match) > 2: # Filter out too short matches
273
+ entities_info["technical_terms"].add(match)
274
+
275
+ # Convert sets to lists for JSON serialization
276
+ return {
277
+ key: list(value) if isinstance(value, set) else value
278
+ for key, value in entities_info.items()
279
+ }
280
+
281
+ except Exception as e:
282
+ logger.error(f"Entity extraction failed: {e}")
283
+ return {
284
+ "domains": [],
285
+ "tasks": [],
286
+ "languages": [],
287
+ "technical_terms": [],
288
+ "named_entities": {},
289
+ "keywords": []
290
+ }
291
+
77
292
  async def _load_models_from_database(self):
78
293
  """Load models from database registry"""
79
294
  try:
80
295
  # Get all models from database
81
- result = self.supabase_client.table('models').select('*').execute()
296
+ result = get_supabase_table('models').select('*').execute()
82
297
  models = result.data
83
298
 
84
299
  logger.info(f"Found {len(models)} models in database registry")
@@ -105,7 +320,7 @@ class IntelligentModelSelector:
105
320
  }
106
321
 
107
322
  # Check embeddings status
108
- embeddings_result = self.supabase_client.table('model_embeddings').select('model_id').execute()
323
+ embeddings_result = get_supabase_table('model_embeddings').select('model_id').execute()
109
324
  existing_embeddings = {row['model_id'] for row in embeddings_result.data}
110
325
 
111
326
  logger.info(f"Found {len(existing_embeddings)} model embeddings")
@@ -128,7 +343,7 @@ class IntelligentModelSelector:
128
343
  context: Optional[Dict[str, Any]] = None
129
344
  ) -> Dict[str, Any]:
130
345
  """
131
- Select best model using similarity matching
346
+ Select best model using entity extraction and similarity matching
132
347
 
133
348
  Args:
134
349
  request: User's request/query
@@ -139,9 +354,115 @@ class IntelligentModelSelector:
139
354
  Selection result with model info and reasoning
140
355
  """
141
356
  try:
142
- # Get embedding for user request
357
+ # Extract entities and keywords from the request
358
+ entities_info = self.extract_entities_and_keywords(request)
359
+ logger.debug(f"Extracted entities: {entities_info}")
360
+
361
+ # Try entity-based selection first
362
+ entity_based_result = await self._select_model_by_entities(entities_info, service_type, request)
363
+ if entity_based_result and entity_based_result.get("success"):
364
+ return entity_based_result
365
+
366
+ # Fallback to similarity-based selection
367
+ similarity_result = await self._select_model_by_similarity(request, service_type, entities_info)
368
+ if similarity_result and similarity_result.get("success"):
369
+ return similarity_result
370
+
371
+ # Final fallback to default
372
+ return self._get_default_selection(service_type, "No suitable models found after entity and similarity matching")
373
+
374
+ except Exception as e:
375
+ logger.error(f"Model selection failed: {e}")
376
+ return self._get_default_selection(service_type, f"Selection error: {e}")
377
+
378
+ async def _select_model_by_entities(
379
+ self,
380
+ entities_info: Dict[str, Any],
381
+ service_type: str,
382
+ request: str
383
+ ) -> Optional[Dict[str, Any]]:
384
+ """Select model based on extracted entities"""
385
+ try:
386
+ reasoning_parts = []
387
+ candidate_models = []
388
+
389
+ # Check for domain-specific models
390
+ for domain in entities_info.get("domains", []):
391
+ if domain in self.entity_model_mappings:
392
+ models = self.entity_model_mappings[domain]["preferred_models"]
393
+ candidate_models.extend(models)
394
+ reasoning_parts.append(f"domain: {domain}")
395
+
396
+ # Check for task-specific models
397
+ for task in entities_info.get("tasks", []):
398
+ if task in self.entity_model_mappings:
399
+ models = self.entity_model_mappings[task]["preferred_models"]
400
+ candidate_models.extend(models)
401
+ reasoning_parts.append(f"task: {task}")
402
+
403
+ # Check for language-specific models
404
+ for lang in entities_info.get("languages", []):
405
+ if lang in self.entity_model_mappings:
406
+ models = self.entity_model_mappings[lang]["preferred_models"]
407
+ candidate_models.extend(models)
408
+ reasoning_parts.append(f"language: {lang}")
409
+
410
+ if not candidate_models:
411
+ return None
412
+
413
+ # Remove duplicates while preserving order
414
+ unique_candidates = list(dict.fromkeys(candidate_models))
415
+
416
+ # Check which models are actually available in our database
417
+ available_models = []
418
+ for model_id in unique_candidates:
419
+ if model_id in self.models_metadata:
420
+ model_info = self.models_metadata[model_id]
421
+ model_type = model_info.get('model_type')
422
+
423
+ # Filter by service type compatibility
424
+ if model_type == service_type or model_type == 'omni':
425
+ available_models.append({
426
+ "model_id": model_id,
427
+ "provider": model_info.get('provider', 'unknown'),
428
+ "model_type": model_type,
429
+ "entity_match_score": 1.0 # High score for entity matches
430
+ })
431
+
432
+ if not available_models:
433
+ logger.debug(f"No entity-based models available for {unique_candidates}")
434
+ return None
435
+
436
+ # Return the first available model (highest priority)
437
+ selected_model = available_models[0]
438
+
439
+ return {
440
+ "success": True,
441
+ "selected_model": {
442
+ "model_id": selected_model["model_id"],
443
+ "provider": selected_model["provider"]
444
+ },
445
+ "selection_reason": f"Entity-based match ({', '.join(reasoning_parts)})",
446
+ "alternatives": available_models[1:3],
447
+ "entity_match_score": selected_model["entity_match_score"],
448
+ "entities_detected": entities_info,
449
+ "method": "entity_based"
450
+ }
451
+
452
+ except Exception as e:
453
+ logger.error(f"Entity-based model selection failed: {e}")
454
+ return None
455
+
456
+ async def _select_model_by_similarity(
457
+ self,
458
+ request: str,
459
+ service_type: str,
460
+ entities_info: Dict[str, Any]
461
+ ) -> Optional[Dict[str, Any]]:
462
+ """Select model using embedding similarity (enhanced with entity info)"""
463
+ try:
143
464
  if not self.embedding_service:
144
- return self._get_default_selection(service_type, "No embedding service available")
465
+ return None
145
466
 
146
467
  request_embedding = await self.embedding_service.create_text_embedding(request)
147
468
 
@@ -149,10 +470,39 @@ class IntelligentModelSelector:
149
470
  candidates = await self._find_similar_models_supabase(request_embedding, service_type)
150
471
 
151
472
  if not candidates:
152
- return self._get_default_selection(service_type, "No suitable models found")
473
+ return None
153
474
 
154
- # Return best match
155
- best_match = candidates[0]
475
+ # Boost scores for models that match extracted entities
476
+ enhanced_candidates = []
477
+ for candidate in candidates:
478
+ model_id = candidate["model_id"]
479
+ base_score = candidate["similarity"]
480
+
481
+ # Apply entity-based boosting
482
+ entity_boost = 0.0
483
+
484
+ # Boost based on domain match
485
+ for domain in entities_info.get("domains", []):
486
+ if domain in model_id.lower() or any(domain in desc.lower() for desc in [candidate.get("description", "")]):
487
+ entity_boost += 0.1
488
+
489
+ # Boost based on task match
490
+ for task in entities_info.get("tasks", []):
491
+ if task in model_id.lower() or any(task in desc.lower() for desc in [candidate.get("description", "")]):
492
+ entity_boost += 0.1
493
+
494
+ # Apply boost
495
+ enhanced_score = min(base_score + entity_boost, 1.0)
496
+
497
+ enhanced_candidate = candidate.copy()
498
+ enhanced_candidate["similarity"] = enhanced_score
499
+ enhanced_candidate["entity_boost"] = entity_boost
500
+ enhanced_candidates.append(enhanced_candidate)
501
+
502
+ # Re-sort by enhanced similarity
503
+ enhanced_candidates.sort(key=lambda x: x["similarity"], reverse=True)
504
+
505
+ best_match = enhanced_candidates[0]
156
506
 
157
507
  return {
158
508
  "success": True,
@@ -160,14 +510,16 @@ class IntelligentModelSelector:
160
510
  "model_id": best_match["model_id"],
161
511
  "provider": best_match["provider"]
162
512
  },
163
- "selection_reason": f"Best similarity match (score: {best_match['similarity']:.3f})",
164
- "alternatives": candidates[1:3], # Top 2 alternatives
165
- "similarity_score": best_match["similarity"]
513
+ "selection_reason": f"Enhanced similarity match (base: {best_match['similarity']:.3f}, entity boost: {best_match.get('entity_boost', 0):.3f})",
514
+ "alternatives": enhanced_candidates[1:3],
515
+ "similarity_score": best_match["similarity"],
516
+ "entities_detected": entities_info,
517
+ "method": "enhanced_similarity"
166
518
  }
167
519
 
168
520
  except Exception as e:
169
- logger.error(f"Model selection failed: {e}")
170
- return self._get_default_selection(service_type, f"Selection error: {e}")
521
+ logger.error(f"Similarity-based model selection failed: {e}")
522
+ return None
171
523
 
172
524
  async def _find_similar_models_supabase(
173
525
  self,
@@ -177,7 +529,7 @@ class IntelligentModelSelector:
177
529
  """Find similar models using Supabase and embedding service similarity"""
178
530
  try:
179
531
  # Get all model embeddings from database
180
- embeddings_result = self.supabase_client.table('model_embeddings').select('*').execute()
532
+ embeddings_result = get_supabase_table('model_embeddings').select('*').execute()
181
533
  model_embeddings = embeddings_result.data
182
534
 
183
535
  if not model_embeddings:
@@ -240,15 +592,41 @@ class IntelligentModelSelector:
240
592
  "similarity_score": 0.0
241
593
  }
242
594
 
595
+ def get_rate_limit_fallback(self, service_type: str, original_provider: str = "openai") -> Dict[str, Any]:
596
+ """
597
+ Get fallback model when hitting rate limits
598
+
599
+ Args:
600
+ service_type: Type of service (text, vision, etc.)
601
+ original_provider: The provider that hit rate limit
602
+
603
+ Returns:
604
+ Fallback model selection result
605
+ """
606
+ if original_provider == "openai" and service_type in self.rate_limit_fallbacks:
607
+ fallback = self.rate_limit_fallbacks[service_type]
608
+
609
+ return {
610
+ "success": True,
611
+ "selected_model": fallback,
612
+ "selection_reason": f"Rate limit fallback from {original_provider} to {fallback['provider']}",
613
+ "alternatives": [],
614
+ "is_fallback": True,
615
+ "original_provider": original_provider
616
+ }
617
+
618
+ # If no specific fallback, return default
619
+ return self._get_default_selection(service_type, f"No fallback available for {original_provider}")
620
+
243
621
  async def get_available_models(self, service_type: Optional[str] = None) -> List[Dict[str, Any]]:
244
622
  """Get list of available models"""
245
623
  try:
246
624
  if service_type:
247
625
  # Filter by service type
248
- query = self.supabase_client.table('models').select('*').or_(f'model_type.eq.{service_type},model_type.eq.omni')
626
+ query = get_supabase_table('models').select('*').or_(f'model_type.eq.{service_type},model_type.eq.omni')
249
627
  else:
250
628
  # Get all models
251
- query = self.supabase_client.table('models').select('*')
629
+ query = get_supabase_table('models').select('*')
252
630
 
253
631
  result = query.order('model_id').execute()
254
632
  return result.data
isa_model/core/types.py CHANGED
@@ -188,6 +188,7 @@ class Provider(str, Enum):
188
188
  ANTHROPIC = "anthropic"
189
189
  GOOGLE = "google"
190
190
  YYDS = "yyds"
191
+ CEREBRAS = "cerebras"
191
192
  MODAL = "modal"
192
193
 
193
194
  # ===== DATA CLASSES =====
@@ -1,54 +1,11 @@
1
1
  """
2
- ISA Model Deployment Module
2
+ ISA Model Deployment - Multi-provider deployment system
3
3
 
4
- Provides comprehensive deployment capabilities for AI models including:
5
- - Multi-cloud deployment (RunPod, AWS, GCP, Azure)
6
- - Multiple inference engines (Triton, vLLM, TensorRT-LLM)
7
- - Model optimization and containerization
8
- - Deployment monitoring and management
9
-
10
- Main Components:
11
- - DeploymentManager: Orchestrates complete deployment workflow
12
- - DeploymentConfig: Configuration classes for different deployment scenarios
13
- - Cloud providers: RunPod, AWS, GCP, Azure integrations
14
- - Inference engines: Triton, vLLM, TensorRT-LLM support
4
+ Unified deployment architecture supporting Modal and Triton platforms.
15
5
  """
16
6
 
7
+ from .modal.deployer import ModalDeployer
8
+ from .triton.provider import TritonProvider
17
9
  from .core.deployment_manager import DeploymentManager
18
- from .core.deployment_config import (
19
- DeploymentConfig,
20
- DeploymentProvider,
21
- InferenceEngine,
22
- ModelConfig,
23
- ModelFormat,
24
- TritonConfig,
25
- RunPodServerlessConfig,
26
- create_gemma_runpod_triton_config,
27
- create_local_triton_config
28
- )
29
- from .services import AutoDeployVisionService
30
-
31
- __all__ = [
32
- # Main classes
33
- "DeploymentManager",
34
- "DeploymentConfig",
35
- "AutoDeployVisionService",
36
-
37
- # Configuration classes
38
- "ModelConfig",
39
- "TritonConfig",
40
- "RunPodServerlessConfig",
41
-
42
- # Enums
43
- "DeploymentProvider",
44
- "InferenceEngine",
45
- "ModelFormat",
46
-
47
- # Helper functions
48
- "create_gemma_runpod_triton_config",
49
- "create_local_triton_config"
50
- ]
51
10
 
52
- # Version info
53
- __version__ = "0.1.0"
54
- __author__ = "ISA Model Team"
11
+ __all__ = ["ModalDeployer", "TritonProvider", "DeploymentManager"]
@@ -1,34 +1,5 @@
1
- """
2
- Deployment Core Module
3
-
4
- Contains the core deployment functionality including configuration management
5
- and deployment orchestration.
6
- """
1
+ """Core deployment functionality"""
7
2
 
8
3
  from .deployment_manager import DeploymentManager
9
- from .deployment_config import (
10
- DeploymentConfig,
11
- DeploymentProvider,
12
- InferenceEngine,
13
- ModelConfig,
14
- ModelFormat,
15
- TritonConfig,
16
- RunPodServerlessConfig,
17
- create_gemma_runpod_triton_config,
18
- create_local_triton_config
19
- )
20
- from .isa_deployment_service import ISADeploymentService
21
4
 
22
- __all__ = [
23
- "DeploymentManager",
24
- "DeploymentConfig",
25
- "DeploymentProvider",
26
- "InferenceEngine",
27
- "ModelConfig",
28
- "ModelFormat",
29
- "TritonConfig",
30
- "RunPodServerlessConfig",
31
- "ISADeploymentService",
32
- "create_gemma_runpod_triton_config",
33
- "create_local_triton_config"
34
- ]
5
+ __all__ = ["DeploymentManager"]