isa-model 0.4.0__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +40 -17
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/storage/hf_storage.py +1 -1
  26. isa_model/core/types.py +1 -0
  27. isa_model/deployment/__init__.py +5 -48
  28. isa_model/deployment/core/__init__.py +2 -31
  29. isa_model/deployment/core/deployment_manager.py +1278 -370
  30. isa_model/deployment/local/__init__.py +31 -0
  31. isa_model/deployment/local/config.py +248 -0
  32. isa_model/deployment/local/gpu_gateway.py +607 -0
  33. isa_model/deployment/local/health_checker.py +428 -0
  34. isa_model/deployment/local/provider.py +586 -0
  35. isa_model/deployment/local/tensorrt_service.py +621 -0
  36. isa_model/deployment/local/transformers_service.py +644 -0
  37. isa_model/deployment/local/vllm_service.py +527 -0
  38. isa_model/deployment/modal/__init__.py +8 -0
  39. isa_model/deployment/modal/config.py +136 -0
  40. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  41. isa_model/deployment/modal/services/__init__.py +3 -0
  42. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  43. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  44. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  45. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  46. isa_model/deployment/modal/services/video/__init__.py +1 -0
  47. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  48. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  49. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  50. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  51. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  52. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  53. isa_model/deployment/storage/__init__.py +5 -0
  54. isa_model/deployment/storage/deployment_repository.py +824 -0
  55. isa_model/deployment/triton/__init__.py +10 -0
  56. isa_model/deployment/triton/config.py +196 -0
  57. isa_model/deployment/triton/configs/__init__.py +1 -0
  58. isa_model/deployment/triton/provider.py +512 -0
  59. isa_model/deployment/triton/scripts/__init__.py +1 -0
  60. isa_model/deployment/triton/templates/__init__.py +1 -0
  61. isa_model/inference/__init__.py +47 -1
  62. isa_model/inference/ai_factory.py +137 -10
  63. isa_model/inference/legacy_services/__init__.py +21 -0
  64. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  65. isa_model/inference/legacy_services/model_service.py +573 -0
  66. isa_model/inference/legacy_services/model_serving.py +717 -0
  67. isa_model/inference/legacy_services/model_training.py +561 -0
  68. isa_model/inference/models/__init__.py +21 -0
  69. isa_model/inference/models/inference_config.py +551 -0
  70. isa_model/inference/models/inference_record.py +675 -0
  71. isa_model/inference/models/performance_models.py +714 -0
  72. isa_model/inference/repositories/__init__.py +9 -0
  73. isa_model/inference/repositories/inference_repository.py +828 -0
  74. isa_model/inference/services/audio/base_stt_service.py +184 -11
  75. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  76. isa_model/inference/services/custom_model_manager.py +277 -0
  77. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  78. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  79. isa_model/inference/services/llm/__init__.py +10 -2
  80. isa_model/inference/services/llm/base_llm_service.py +335 -24
  81. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  82. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  83. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  84. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  85. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  86. isa_model/inference/services/llm/local_llm_service.py +747 -0
  87. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  88. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  89. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  90. isa_model/inference/services/vision/__init__.py +22 -1
  91. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  92. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  93. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  94. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  95. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  96. isa_model/serving/api/cache_manager.py +245 -0
  97. isa_model/serving/api/dependencies/__init__.py +1 -0
  98. isa_model/serving/api/dependencies/auth.py +194 -0
  99. isa_model/serving/api/dependencies/database.py +139 -0
  100. isa_model/serving/api/error_handlers.py +284 -0
  101. isa_model/serving/api/fastapi_server.py +172 -22
  102. isa_model/serving/api/middleware/auth.py +8 -2
  103. isa_model/serving/api/middleware/security.py +23 -33
  104. isa_model/serving/api/middleware/tenant_context.py +414 -0
  105. isa_model/serving/api/routes/analytics.py +4 -1
  106. isa_model/serving/api/routes/config.py +645 -0
  107. isa_model/serving/api/routes/deployment_billing.py +315 -0
  108. isa_model/serving/api/routes/deployments.py +138 -2
  109. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  110. isa_model/serving/api/routes/health.py +32 -12
  111. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  112. isa_model/serving/api/routes/local_deployments.py +448 -0
  113. isa_model/serving/api/routes/tenants.py +575 -0
  114. isa_model/serving/api/routes/unified.py +680 -18
  115. isa_model/serving/api/routes/webhooks.py +479 -0
  116. isa_model/serving/api/startup.py +68 -54
  117. isa_model/utils/gpu_utils.py +311 -0
  118. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/METADATA +66 -24
  119. isa_model-0.4.3.dist-info/RECORD +193 -0
  120. isa_model/core/storage/minio_storage.py +0 -0
  121. isa_model/deployment/cloud/__init__.py +0 -9
  122. isa_model/deployment/cloud/modal/__init__.py +0 -10
  123. isa_model/deployment/core/deployment_config.py +0 -356
  124. isa_model/deployment/core/isa_deployment_service.py +0 -401
  125. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  126. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  127. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  128. isa_model/deployment/runtime/deployed_service.py +0 -338
  129. isa_model/deployment/services/__init__.py +0 -9
  130. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  131. isa_model/deployment/services/model_service.py +0 -332
  132. isa_model/deployment/services/service_monitor.py +0 -356
  133. isa_model/deployment/services/service_registry.py +0 -527
  134. isa_model/eval/__init__.py +0 -92
  135. isa_model/eval/benchmarks/__init__.py +0 -27
  136. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  137. isa_model/eval/benchmarks.py +0 -701
  138. isa_model/eval/config/__init__.py +0 -10
  139. isa_model/eval/config/evaluation_config.py +0 -108
  140. isa_model/eval/evaluators/__init__.py +0 -24
  141. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  142. isa_model/eval/evaluators/base_evaluator.py +0 -503
  143. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  144. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  145. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  146. isa_model/eval/example_evaluation.py +0 -395
  147. isa_model/eval/factory.py +0 -798
  148. isa_model/eval/infrastructure/__init__.py +0 -24
  149. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  150. isa_model/eval/isa_benchmarks.py +0 -700
  151. isa_model/eval/isa_integration.py +0 -582
  152. isa_model/eval/metrics.py +0 -951
  153. isa_model/eval/tests/unit/test_basic.py +0 -396
  154. isa_model/serving/api/routes/evaluations.py +0 -579
  155. isa_model/training/__init__.py +0 -168
  156. isa_model/training/annotation/annotation_schema.py +0 -47
  157. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  158. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  159. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  160. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  161. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  162. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  163. isa_model/training/annotation/views/annotation_controller.py +0 -158
  164. isa_model/training/cloud/__init__.py +0 -22
  165. isa_model/training/cloud/job_orchestrator.py +0 -402
  166. isa_model/training/cloud/runpod_trainer.py +0 -454
  167. isa_model/training/cloud/storage_manager.py +0 -482
  168. isa_model/training/core/__init__.py +0 -26
  169. isa_model/training/core/config.py +0 -181
  170. isa_model/training/core/dataset.py +0 -222
  171. isa_model/training/core/trainer.py +0 -720
  172. isa_model/training/core/utils.py +0 -213
  173. isa_model/training/examples/intelligent_training_example.py +0 -281
  174. isa_model/training/factory.py +0 -424
  175. isa_model/training/intelligent/__init__.py +0 -25
  176. isa_model/training/intelligent/decision_engine.py +0 -643
  177. isa_model/training/intelligent/intelligent_factory.py +0 -888
  178. isa_model/training/intelligent/knowledge_base.py +0 -751
  179. isa_model/training/intelligent/resource_optimizer.py +0 -839
  180. isa_model/training/intelligent/task_classifier.py +0 -576
  181. isa_model/training/storage/__init__.py +0 -24
  182. isa_model/training/storage/core_integration.py +0 -439
  183. isa_model/training/storage/training_repository.py +0 -552
  184. isa_model/training/storage/training_storage.py +0 -628
  185. isa_model-0.4.0.dist-info/RECORD +0 -182
  186. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  187. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  188. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  189. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  190. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  191. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  192. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  193. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  194. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  195. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  196. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  197. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  198. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  199. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -13,7 +13,7 @@ from typing import Dict, List, Any, Optional, Tuple
13
13
 
14
14
  logger = logging.getLogger(__name__)
15
15
 
16
- from ..database.supabase_client import get_supabase_client
16
+ from ..database.supabase_client import get_supabase_client, get_supabase_table
17
17
  from ...inference.ai_factory import AIFactory
18
18
 
19
19
 
@@ -32,16 +32,45 @@ class IntelligentModelSelector:
32
32
  self.config = config or {}
33
33
  self.supabase_client = None
34
34
  self.embedding_service = None
35
+ self.nlp = None # spaCy NLP model
35
36
  self.models_metadata: Dict[str, Dict[str, Any]] = {}
36
37
 
37
- # Default models for each service type
38
+ # Default models for each service type (updated to GPT-5-nano for best cost/performance)
38
39
  self.default_models = {
39
- "vision": {"model_id": "gpt-4.1-mini", "provider": "openai"},
40
+ "vision": {"model_id": "gpt-5-mini", "provider": "openai"},
40
41
  "audio": {"model_id": "whisper-1", "provider": "openai"},
41
- "text": {"model_id": "gpt-4.1-mini", "provider": "openai"},
42
+ "text": {"model_id": "gpt-5-nano", "provider": "openai"}, # Primary: 50% cheaper than gpt-4.1-nano
42
43
  "image": {"model_id": "black-forest-labs/flux-schnell", "provider": "replicate"},
43
44
  "embedding": {"model_id": "text-embedding-3-small", "provider": "openai"},
44
- "omni": {"model_id": "gpt-4.1", "provider": "openai"}
45
+ "omni": {"model_id": "gpt-5", "provider": "openai"}
46
+ }
47
+
48
+ # Rate limit fallback: same models with different providers
49
+ self.rate_limit_fallbacks = {
50
+ "text": {"model_id": "gpt-5-nano", "provider": "yyds"}, # Same model, yyds provider
51
+ "vision": {"model_id": "gpt-5-mini", "provider": "yyds"},
52
+ "omni": {"model_id": "gpt-5", "provider": "yyds"}
53
+ }
54
+
55
+ # Entity-based model mappings
56
+ self.entity_model_mappings = {
57
+ # Domain-specific mappings
58
+ "medical": {"preferred_models": ["microsoft/BioGPT", "medalpaca/medalpaca-7b"]},
59
+ "legal": {"preferred_models": ["saul-7b", "legal-bert"]},
60
+ "financial": {"preferred_models": ["ProsusAI/finbert", "financialbert"]},
61
+ "scientific": {"preferred_models": ["microsoft/DialoGPT-medium", "allenai/scibert"]},
62
+ "code": {"preferred_models": ["microsoft/CodeBERT", "codeparrot/codeparrot"]},
63
+
64
+ # Task-specific mappings
65
+ "translation": {"preferred_models": ["facebook/m2m100", "google/mt5"]},
66
+ "summarization": {"preferred_models": ["facebook/bart-large", "google/pegasus"]},
67
+ "question_answering": {"preferred_models": ["deepset/roberta-base-squad2", "distilbert-base-uncased-distilled-squad"]},
68
+ "sentiment": {"preferred_models": ["cardiffnlp/twitter-roberta-base-sentiment", "nlptown/bert-base-multilingual-uncased-sentiment"]},
69
+
70
+ # Language-specific mappings
71
+ "chinese": {"preferred_models": ["THUDM/chatglm2-6b", "baichuan-inc/Baichuan2-7B-Chat"]},
72
+ "japanese": {"preferred_models": ["rinna/japanese-gpt2-medium", "sonoisa/sentence-bert-base-ja-mean-tokens"]},
73
+ "multilingual": {"preferred_models": ["facebook/mbart-large-50", "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"]}
45
74
  }
46
75
 
47
76
  logger.info("Intelligent Model Selector initialized")
@@ -56,6 +85,9 @@ class IntelligentModelSelector:
56
85
  # Initialize embedding service
57
86
  await self._init_embedding_service()
58
87
 
88
+ # Initialize spaCy NLP
89
+ await self._init_spacy_nlp()
90
+
59
91
  # Load models from database
60
92
  await self._load_models_from_database()
61
93
 
@@ -74,11 +106,194 @@ class IntelligentModelSelector:
74
106
  except Exception as e:
75
107
  logger.warning(f"Failed to initialize embedding service: {e}")
76
108
 
109
+ async def _init_spacy_nlp(self):
110
+ """Initialize spaCy NLP model for entity extraction"""
111
+ try:
112
+ import spacy
113
+
114
+ # Try to load the English model
115
+ models_to_try = ["en_core_web_sm", "en_core_web_md", "en_core_web_lg"]
116
+
117
+ for model_name in models_to_try:
118
+ try:
119
+ self.nlp = spacy.load(model_name)
120
+ logger.info(f"spaCy model '{model_name}' loaded successfully")
121
+ break
122
+ except OSError:
123
+ logger.warning(f"spaCy model '{model_name}' not found")
124
+ continue
125
+
126
+ if not self.nlp:
127
+ # Try to download the smallest model automatically
128
+ try:
129
+ import subprocess
130
+ result = subprocess.run(
131
+ ["python", "-m", "spacy", "download", "en_core_web_sm"],
132
+ capture_output=True,
133
+ text=True,
134
+ timeout=300 # 5 minutes timeout
135
+ )
136
+
137
+ if result.returncode == 0:
138
+ self.nlp = spacy.load("en_core_web_sm")
139
+ logger.info("spaCy en_core_web_sm downloaded and loaded successfully")
140
+ else:
141
+ logger.warning(f"Failed to download spaCy model: {result.stderr}")
142
+
143
+ except Exception as download_error:
144
+ logger.warning(f"Failed to download spaCy model: {download_error}")
145
+
146
+ # If still no model, create a blank model with NER
147
+ if not self.nlp:
148
+ logger.warning("No spaCy model available, creating blank model with NER")
149
+ self.nlp = spacy.blank("en")
150
+ # Add basic NER component
151
+ try:
152
+ self.nlp.add_pipe("ner")
153
+ except:
154
+ pass # NER might not be available in blank model
155
+
156
+ except ImportError:
157
+ # spaCy not available, will use regex fallback
158
+ self.nlp = None
159
+ except Exception as e:
160
+ # Failed to init spaCy, will use regex fallback
161
+ self.nlp = None
162
+
163
+ def extract_entities_and_keywords(self, text: str) -> Dict[str, Any]:
164
+ """Extract entities and keywords from text using spaCy and heuristics"""
165
+ try:
166
+ entities_info = {
167
+ "domains": set(),
168
+ "tasks": set(),
169
+ "languages": set(),
170
+ "technical_terms": set(),
171
+ "named_entities": {},
172
+ "keywords": set()
173
+ }
174
+
175
+ # Use spaCy if available
176
+ if self.nlp:
177
+ try:
178
+ doc = self.nlp(text)
179
+
180
+ # Extract named entities
181
+ for ent in doc.ents:
182
+ label = ent.label_
183
+ if label not in entities_info["named_entities"]:
184
+ entities_info["named_entities"][label] = []
185
+ entities_info["named_entities"][label].append(ent.text)
186
+
187
+ # Extract noun phrases as potential keywords
188
+ for chunk in doc.noun_chunks:
189
+ entities_info["keywords"].add(chunk.text.lower())
190
+
191
+ except Exception as e:
192
+ logger.warning(f"spaCy processing failed: {e}")
193
+
194
+ # Heuristic-based extraction
195
+ text_lower = text.lower()
196
+
197
+ # Domain detection
198
+ domain_keywords = {
199
+ "medical": ["medical", "health", "doctor", "patient", "diagnosis", "treatment", "clinical", "healthcare", "hospital", "medicine", "drug"],
200
+ "legal": ["legal", "law", "court", "lawyer", "attorney", "contract", "litigation", "compliance", "regulation", "statute"],
201
+ "financial": ["financial", "finance", "banking", "investment", "trading", "market", "stock", "currency", "economic", "accounting"],
202
+ "scientific": ["scientific", "research", "study", "experiment", "analysis", "data", "hypothesis", "theory", "academic", "journal"],
203
+ "code": ["code", "programming", "software", "development", "algorithm", "function", "variable", "debug", "compile", "syntax"],
204
+ "educational": ["education", "learning", "teaching", "student", "school", "university", "course", "lesson", "curriculum"]
205
+ }
206
+
207
+ # Task detection
208
+ task_keywords = {
209
+ "translation": ["translate", "translation", "language", "multilingual", "convert"],
210
+ "summarization": ["summarize", "summary", "summarization", "abstract", "brief", "condense"],
211
+ "question_answering": ["question", "answer", "qa", "ask", "respond", "query"],
212
+ "sentiment": ["sentiment", "emotion", "feeling", "opinion", "positive", "negative", "mood"],
213
+ "classification": ["classify", "classification", "category", "categorize", "label", "predict"],
214
+ "generation": ["generate", "creation", "create", "produce", "write", "compose"]
215
+ }
216
+
217
+ # Language detection
218
+ language_keywords = {
219
+ "chinese": ["chinese", "mandarin", "cantonese", "zh", "中文", "汉语"],
220
+ "japanese": ["japanese", "日本語", "ja", "nihongo"],
221
+ "spanish": ["spanish", "español", "es", "castellano"],
222
+ "french": ["french", "français", "fr", "francais"],
223
+ "german": ["german", "deutsch", "de", "german"],
224
+ "multilingual": ["multilingual", "multiple languages", "multi-language", "cross-lingual"]
225
+ }
226
+
227
+ # Extract domains
228
+ for domain, keywords in domain_keywords.items():
229
+ if any(keyword in text_lower for keyword in keywords):
230
+ entities_info["domains"].add(domain)
231
+
232
+ # Extract tasks
233
+ for task, keywords in task_keywords.items():
234
+ if any(keyword in text_lower for keyword in keywords):
235
+ entities_info["tasks"].add(task)
236
+
237
+ # Extract languages
238
+ for lang, keywords in language_keywords.items():
239
+ if any(keyword in text_lower for keyword in keywords):
240
+ entities_info["languages"].add(lang)
241
+
242
+ # Extract technical terms and model names
243
+ import re
244
+
245
+ # Add specific model name patterns
246
+ model_patterns = [
247
+ r'\bgpt-[0-9]+\.?[0-9]*-?\w*\b', # GPT models (gpt-4, gpt-5-nano, etc)
248
+ r'\bclaude-[0-9]+\.?[0-9]*-?\w*\b', # Claude models
249
+ r'\bllama-?\d*-?\w*\b', # Llama models
250
+ r'\bgemini-?\w*\b', # Gemini models
251
+ r'\bmistral-?\w*\b', # Mistral models
252
+ ]
253
+
254
+ for pattern in model_patterns:
255
+ matches = re.findall(pattern, text_lower)
256
+ entities_info["technical_terms"].update(matches)
257
+ entities_info["keywords"].update(matches)
258
+
259
+ # General technical patterns
260
+ tech_patterns = [
261
+ r'\b\w*bert\w*\b', # BERT variants
262
+ r'\b\w*gpt\w*\b', # GPT variants
263
+ r'\b\w*llm\w*\b', # LLM variants
264
+ r'\b\w*ai\w*\b', # AI-related
265
+ r'\b\w*ml\w*\b', # ML-related
266
+ r'\b\w*neural\w*\b', # Neural networks
267
+ ]
268
+
269
+ for pattern in tech_patterns:
270
+ matches = re.findall(pattern, text_lower, re.IGNORECASE)
271
+ for match in matches:
272
+ if len(match) > 2: # Filter out too short matches
273
+ entities_info["technical_terms"].add(match)
274
+
275
+ # Convert sets to lists for JSON serialization
276
+ return {
277
+ key: list(value) if isinstance(value, set) else value
278
+ for key, value in entities_info.items()
279
+ }
280
+
281
+ except Exception as e:
282
+ logger.error(f"Entity extraction failed: {e}")
283
+ return {
284
+ "domains": [],
285
+ "tasks": [],
286
+ "languages": [],
287
+ "technical_terms": [],
288
+ "named_entities": {},
289
+ "keywords": []
290
+ }
291
+
77
292
  async def _load_models_from_database(self):
78
293
  """Load models from database registry"""
79
294
  try:
80
295
  # Get all models from database
81
- result = self.supabase_client.table('models').select('*').execute()
296
+ result = get_supabase_table('models').select('*').execute()
82
297
  models = result.data
83
298
 
84
299
  logger.info(f"Found {len(models)} models in database registry")
@@ -105,7 +320,7 @@ class IntelligentModelSelector:
105
320
  }
106
321
 
107
322
  # Check embeddings status
108
- embeddings_result = self.supabase_client.table('model_embeddings').select('model_id').execute()
323
+ embeddings_result = get_supabase_table('model_embeddings').select('model_id').execute()
109
324
  existing_embeddings = {row['model_id'] for row in embeddings_result.data}
110
325
 
111
326
  logger.info(f"Found {len(existing_embeddings)} model embeddings")
@@ -128,7 +343,7 @@ class IntelligentModelSelector:
128
343
  context: Optional[Dict[str, Any]] = None
129
344
  ) -> Dict[str, Any]:
130
345
  """
131
- Select best model using similarity matching
346
+ Select best model using entity extraction and similarity matching
132
347
 
133
348
  Args:
134
349
  request: User's request/query
@@ -139,9 +354,115 @@ class IntelligentModelSelector:
139
354
  Selection result with model info and reasoning
140
355
  """
141
356
  try:
142
- # Get embedding for user request
357
+ # Extract entities and keywords from the request
358
+ entities_info = self.extract_entities_and_keywords(request)
359
+ logger.debug(f"Extracted entities: {entities_info}")
360
+
361
+ # Try entity-based selection first
362
+ entity_based_result = await self._select_model_by_entities(entities_info, service_type, request)
363
+ if entity_based_result and entity_based_result.get("success"):
364
+ return entity_based_result
365
+
366
+ # Fallback to similarity-based selection
367
+ similarity_result = await self._select_model_by_similarity(request, service_type, entities_info)
368
+ if similarity_result and similarity_result.get("success"):
369
+ return similarity_result
370
+
371
+ # Final fallback to default
372
+ return self._get_default_selection(service_type, "No suitable models found after entity and similarity matching")
373
+
374
+ except Exception as e:
375
+ logger.error(f"Model selection failed: {e}")
376
+ return self._get_default_selection(service_type, f"Selection error: {e}")
377
+
378
+ async def _select_model_by_entities(
379
+ self,
380
+ entities_info: Dict[str, Any],
381
+ service_type: str,
382
+ request: str
383
+ ) -> Optional[Dict[str, Any]]:
384
+ """Select model based on extracted entities"""
385
+ try:
386
+ reasoning_parts = []
387
+ candidate_models = []
388
+
389
+ # Check for domain-specific models
390
+ for domain in entities_info.get("domains", []):
391
+ if domain in self.entity_model_mappings:
392
+ models = self.entity_model_mappings[domain]["preferred_models"]
393
+ candidate_models.extend(models)
394
+ reasoning_parts.append(f"domain: {domain}")
395
+
396
+ # Check for task-specific models
397
+ for task in entities_info.get("tasks", []):
398
+ if task in self.entity_model_mappings:
399
+ models = self.entity_model_mappings[task]["preferred_models"]
400
+ candidate_models.extend(models)
401
+ reasoning_parts.append(f"task: {task}")
402
+
403
+ # Check for language-specific models
404
+ for lang in entities_info.get("languages", []):
405
+ if lang in self.entity_model_mappings:
406
+ models = self.entity_model_mappings[lang]["preferred_models"]
407
+ candidate_models.extend(models)
408
+ reasoning_parts.append(f"language: {lang}")
409
+
410
+ if not candidate_models:
411
+ return None
412
+
413
+ # Remove duplicates while preserving order
414
+ unique_candidates = list(dict.fromkeys(candidate_models))
415
+
416
+ # Check which models are actually available in our database
417
+ available_models = []
418
+ for model_id in unique_candidates:
419
+ if model_id in self.models_metadata:
420
+ model_info = self.models_metadata[model_id]
421
+ model_type = model_info.get('model_type')
422
+
423
+ # Filter by service type compatibility
424
+ if model_type == service_type or model_type == 'omni':
425
+ available_models.append({
426
+ "model_id": model_id,
427
+ "provider": model_info.get('provider', 'unknown'),
428
+ "model_type": model_type,
429
+ "entity_match_score": 1.0 # High score for entity matches
430
+ })
431
+
432
+ if not available_models:
433
+ logger.debug(f"No entity-based models available for {unique_candidates}")
434
+ return None
435
+
436
+ # Return the first available model (highest priority)
437
+ selected_model = available_models[0]
438
+
439
+ return {
440
+ "success": True,
441
+ "selected_model": {
442
+ "model_id": selected_model["model_id"],
443
+ "provider": selected_model["provider"]
444
+ },
445
+ "selection_reason": f"Entity-based match ({', '.join(reasoning_parts)})",
446
+ "alternatives": available_models[1:3],
447
+ "entity_match_score": selected_model["entity_match_score"],
448
+ "entities_detected": entities_info,
449
+ "method": "entity_based"
450
+ }
451
+
452
+ except Exception as e:
453
+ logger.error(f"Entity-based model selection failed: {e}")
454
+ return None
455
+
456
+ async def _select_model_by_similarity(
457
+ self,
458
+ request: str,
459
+ service_type: str,
460
+ entities_info: Dict[str, Any]
461
+ ) -> Optional[Dict[str, Any]]:
462
+ """Select model using embedding similarity (enhanced with entity info)"""
463
+ try:
143
464
  if not self.embedding_service:
144
- return self._get_default_selection(service_type, "No embedding service available")
465
+ return None
145
466
 
146
467
  request_embedding = await self.embedding_service.create_text_embedding(request)
147
468
 
@@ -149,10 +470,39 @@ class IntelligentModelSelector:
149
470
  candidates = await self._find_similar_models_supabase(request_embedding, service_type)
150
471
 
151
472
  if not candidates:
152
- return self._get_default_selection(service_type, "No suitable models found")
473
+ return None
153
474
 
154
- # Return best match
155
- best_match = candidates[0]
475
+ # Boost scores for models that match extracted entities
476
+ enhanced_candidates = []
477
+ for candidate in candidates:
478
+ model_id = candidate["model_id"]
479
+ base_score = candidate["similarity"]
480
+
481
+ # Apply entity-based boosting
482
+ entity_boost = 0.0
483
+
484
+ # Boost based on domain match
485
+ for domain in entities_info.get("domains", []):
486
+ if domain in model_id.lower() or any(domain in desc.lower() for desc in [candidate.get("description", "")]):
487
+ entity_boost += 0.1
488
+
489
+ # Boost based on task match
490
+ for task in entities_info.get("tasks", []):
491
+ if task in model_id.lower() or any(task in desc.lower() for desc in [candidate.get("description", "")]):
492
+ entity_boost += 0.1
493
+
494
+ # Apply boost
495
+ enhanced_score = min(base_score + entity_boost, 1.0)
496
+
497
+ enhanced_candidate = candidate.copy()
498
+ enhanced_candidate["similarity"] = enhanced_score
499
+ enhanced_candidate["entity_boost"] = entity_boost
500
+ enhanced_candidates.append(enhanced_candidate)
501
+
502
+ # Re-sort by enhanced similarity
503
+ enhanced_candidates.sort(key=lambda x: x["similarity"], reverse=True)
504
+
505
+ best_match = enhanced_candidates[0]
156
506
 
157
507
  return {
158
508
  "success": True,
@@ -160,14 +510,16 @@ class IntelligentModelSelector:
160
510
  "model_id": best_match["model_id"],
161
511
  "provider": best_match["provider"]
162
512
  },
163
- "selection_reason": f"Best similarity match (score: {best_match['similarity']:.3f})",
164
- "alternatives": candidates[1:3], # Top 2 alternatives
165
- "similarity_score": best_match["similarity"]
513
+ "selection_reason": f"Enhanced similarity match (base: {best_match['similarity']:.3f}, entity boost: {best_match.get('entity_boost', 0):.3f})",
514
+ "alternatives": enhanced_candidates[1:3],
515
+ "similarity_score": best_match["similarity"],
516
+ "entities_detected": entities_info,
517
+ "method": "enhanced_similarity"
166
518
  }
167
519
 
168
520
  except Exception as e:
169
- logger.error(f"Model selection failed: {e}")
170
- return self._get_default_selection(service_type, f"Selection error: {e}")
521
+ logger.error(f"Similarity-based model selection failed: {e}")
522
+ return None
171
523
 
172
524
  async def _find_similar_models_supabase(
173
525
  self,
@@ -177,7 +529,7 @@ class IntelligentModelSelector:
177
529
  """Find similar models using Supabase and embedding service similarity"""
178
530
  try:
179
531
  # Get all model embeddings from database
180
- embeddings_result = self.supabase_client.table('model_embeddings').select('*').execute()
532
+ embeddings_result = get_supabase_table('model_embeddings').select('*').execute()
181
533
  model_embeddings = embeddings_result.data
182
534
 
183
535
  if not model_embeddings:
@@ -240,15 +592,41 @@ class IntelligentModelSelector:
240
592
  "similarity_score": 0.0
241
593
  }
242
594
 
595
+ def get_rate_limit_fallback(self, service_type: str, original_provider: str = "openai") -> Dict[str, Any]:
596
+ """
597
+ Get fallback model when hitting rate limits
598
+
599
+ Args:
600
+ service_type: Type of service (text, vision, etc.)
601
+ original_provider: The provider that hit rate limit
602
+
603
+ Returns:
604
+ Fallback model selection result
605
+ """
606
+ if original_provider == "openai" and service_type in self.rate_limit_fallbacks:
607
+ fallback = self.rate_limit_fallbacks[service_type]
608
+
609
+ return {
610
+ "success": True,
611
+ "selected_model": fallback,
612
+ "selection_reason": f"Rate limit fallback from {original_provider} to {fallback['provider']}",
613
+ "alternatives": [],
614
+ "is_fallback": True,
615
+ "original_provider": original_provider
616
+ }
617
+
618
+ # If no specific fallback, return default
619
+ return self._get_default_selection(service_type, f"No fallback available for {original_provider}")
620
+
243
621
  async def get_available_models(self, service_type: Optional[str] = None) -> List[Dict[str, Any]]:
244
622
  """Get list of available models"""
245
623
  try:
246
624
  if service_type:
247
625
  # Filter by service type
248
- query = self.supabase_client.table('models').select('*').or_(f'model_type.eq.{service_type},model_type.eq.omni')
626
+ query = get_supabase_table('models').select('*').or_(f'model_type.eq.{service_type},model_type.eq.omni')
249
627
  else:
250
628
  # Get all models
251
- query = self.supabase_client.table('models').select('*')
629
+ query = get_supabase_table('models').select('*')
252
630
 
253
631
  result = query.order('model_id').execute()
254
632
  return result.data
@@ -40,7 +40,7 @@ class HuggingFaceStorage(ModelStorage):
40
40
 
41
41
  storage = HuggingFaceStorage(
42
42
  username="xenobordom",
43
- token="your_hf_token"
43
+ token=os.getenv("HF_TOKEN") # Set in environment
44
44
  )
45
45
 
46
46
  # Save a trained model to HuggingFace Hub
isa_model/core/types.py CHANGED
@@ -188,6 +188,7 @@ class Provider(str, Enum):
188
188
  ANTHROPIC = "anthropic"
189
189
  GOOGLE = "google"
190
190
  YYDS = "yyds"
191
+ CEREBRAS = "cerebras"
191
192
  MODAL = "modal"
192
193
 
193
194
  # ===== DATA CLASSES =====
@@ -1,54 +1,11 @@
1
1
  """
2
- ISA Model Deployment Module
2
+ ISA Model Deployment - Multi-provider deployment system
3
3
 
4
- Provides comprehensive deployment capabilities for AI models including:
5
- - Multi-cloud deployment (RunPod, AWS, GCP, Azure)
6
- - Multiple inference engines (Triton, vLLM, TensorRT-LLM)
7
- - Model optimization and containerization
8
- - Deployment monitoring and management
9
-
10
- Main Components:
11
- - DeploymentManager: Orchestrates complete deployment workflow
12
- - DeploymentConfig: Configuration classes for different deployment scenarios
13
- - Cloud providers: RunPod, AWS, GCP, Azure integrations
14
- - Inference engines: Triton, vLLM, TensorRT-LLM support
4
+ Unified deployment architecture supporting Modal and Triton platforms.
15
5
  """
16
6
 
7
+ from .modal.deployer import ModalDeployer
8
+ from .triton.provider import TritonProvider
17
9
  from .core.deployment_manager import DeploymentManager
18
- from .core.deployment_config import (
19
- DeploymentConfig,
20
- DeploymentProvider,
21
- InferenceEngine,
22
- ModelConfig,
23
- ModelFormat,
24
- TritonConfig,
25
- RunPodServerlessConfig,
26
- create_gemma_runpod_triton_config,
27
- create_local_triton_config
28
- )
29
- from .services import AutoDeployVisionService
30
-
31
- __all__ = [
32
- # Main classes
33
- "DeploymentManager",
34
- "DeploymentConfig",
35
- "AutoDeployVisionService",
36
-
37
- # Configuration classes
38
- "ModelConfig",
39
- "TritonConfig",
40
- "RunPodServerlessConfig",
41
-
42
- # Enums
43
- "DeploymentProvider",
44
- "InferenceEngine",
45
- "ModelFormat",
46
-
47
- # Helper functions
48
- "create_gemma_runpod_triton_config",
49
- "create_local_triton_config"
50
- ]
51
10
 
52
- # Version info
53
- __version__ = "0.1.0"
54
- __author__ = "ISA Model Team"
11
+ __all__ = ["ModalDeployer", "TritonProvider", "DeploymentManager"]
@@ -1,34 +1,5 @@
1
- """
2
- Deployment Core Module
3
-
4
- Contains the core deployment functionality including configuration management
5
- and deployment orchestration.
6
- """
1
+ """Core deployment functionality"""
7
2
 
8
3
  from .deployment_manager import DeploymentManager
9
- from .deployment_config import (
10
- DeploymentConfig,
11
- DeploymentProvider,
12
- InferenceEngine,
13
- ModelConfig,
14
- ModelFormat,
15
- TritonConfig,
16
- RunPodServerlessConfig,
17
- create_gemma_runpod_triton_config,
18
- create_local_triton_config
19
- )
20
- from .isa_deployment_service import ISADeploymentService
21
4
 
22
- __all__ = [
23
- "DeploymentManager",
24
- "DeploymentConfig",
25
- "DeploymentProvider",
26
- "InferenceEngine",
27
- "ModelConfig",
28
- "ModelFormat",
29
- "TritonConfig",
30
- "RunPodServerlessConfig",
31
- "ISADeploymentService",
32
- "create_gemma_runpod_triton_config",
33
- "create_local_triton_config"
34
- ]
5
+ __all__ = ["DeploymentManager"]