isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +466 -43
- isa_model/core/cache/redis_cache.py +12 -3
- isa_model/core/config/config_manager.py +230 -3
- isa_model/core/config.py +90 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +21 -1
- isa_model/core/database/supabase_client.py +154 -19
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +27 -18
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_manager.py +35 -80
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +174 -18
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/services/intelligent_model_selector.py +399 -21
- isa_model/core/types.py +1 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -370
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +137 -10
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/openai_stt_service.py +22 -6
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +335 -24
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/ollama_llm_service.py +9 -2
- isa_model/inference/services/llm/openai_llm_service.py +33 -16
- isa_model/inference/services/llm/yyds_llm_service.py +8 -2
- isa_model/inference/services/vision/__init__.py +22 -1
- isa_model/inference/services/vision/helpers/image_utils.py +8 -5
- isa_model/inference/services/vision/isa_vision_service.py +65 -4
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +172 -22
- isa_model/serving/api/middleware/auth.py +8 -2
- isa_model/serving/api/middleware/security.py +23 -33
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +4 -1
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +138 -2
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +680 -18
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +68 -54
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
- isa_model-0.4.4.dist-info/RECORD +180 -0
- isa_model/core/security/secrets.py +0 -358
- isa_model/core/storage/hf_storage.py +0 -419
- isa_model/core/storage/minio_storage.py +0 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks/__init__.py +0 -27
- isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
- isa_model/eval/benchmarks.py +0 -701
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -24
- isa_model/eval/evaluators/audio_evaluator.py +0 -727
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/embedding_evaluator.py +0 -742
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/evaluators/vision_evaluator.py +0 -564
- isa_model/eval/example_evaluation.py +0 -395
- isa_model/eval/factory.py +0 -798
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/isa_benchmarks.py +0 -700
- isa_model/eval/isa_integration.py +0 -582
- isa_model/eval/metrics.py +0 -951
- isa_model/eval/tests/unit/test_basic.py +0 -396
- isa_model/serving/api/routes/evaluations.py +0 -579
- isa_model/training/__init__.py +0 -168
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -26
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/examples/intelligent_training_example.py +0 -281
- isa_model/training/factory.py +0 -424
- isa_model/training/intelligent/__init__.py +0 -25
- isa_model/training/intelligent/decision_engine.py +0 -643
- isa_model/training/intelligent/intelligent_factory.py +0 -888
- isa_model/training/intelligent/knowledge_base.py +0 -751
- isa_model/training/intelligent/resource_optimizer.py +0 -839
- isa_model/training/intelligent/task_classifier.py +0 -576
- isa_model/training/storage/__init__.py +0 -24
- isa_model/training/storage/core_integration.py +0 -439
- isa_model/training/storage/training_repository.py +0 -552
- isa_model/training/storage/training_storage.py +0 -628
- isa_model-0.4.0.dist-info/RECORD +0 -182
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
@@ -13,7 +13,7 @@ from typing import Dict, List, Any, Optional, Tuple
|
|
13
13
|
|
14
14
|
logger = logging.getLogger(__name__)
|
15
15
|
|
16
|
-
from ..database.supabase_client import get_supabase_client
|
16
|
+
from ..database.supabase_client import get_supabase_client, get_supabase_table
|
17
17
|
from ...inference.ai_factory import AIFactory
|
18
18
|
|
19
19
|
|
@@ -32,16 +32,45 @@ class IntelligentModelSelector:
|
|
32
32
|
self.config = config or {}
|
33
33
|
self.supabase_client = None
|
34
34
|
self.embedding_service = None
|
35
|
+
self.nlp = None # spaCy NLP model
|
35
36
|
self.models_metadata: Dict[str, Dict[str, Any]] = {}
|
36
37
|
|
37
|
-
# Default models for each service type
|
38
|
+
# Default models for each service type (updated to GPT-5-nano for best cost/performance)
|
38
39
|
self.default_models = {
|
39
|
-
"vision": {"model_id": "gpt-
|
40
|
+
"vision": {"model_id": "gpt-5-mini", "provider": "openai"},
|
40
41
|
"audio": {"model_id": "whisper-1", "provider": "openai"},
|
41
|
-
"text": {"model_id": "gpt-
|
42
|
+
"text": {"model_id": "gpt-5-nano", "provider": "openai"}, # Primary: 50% cheaper than gpt-4.1-nano
|
42
43
|
"image": {"model_id": "black-forest-labs/flux-schnell", "provider": "replicate"},
|
43
44
|
"embedding": {"model_id": "text-embedding-3-small", "provider": "openai"},
|
44
|
-
"omni": {"model_id": "gpt-
|
45
|
+
"omni": {"model_id": "gpt-5", "provider": "openai"}
|
46
|
+
}
|
47
|
+
|
48
|
+
# Rate limit fallback: same models with different providers
|
49
|
+
self.rate_limit_fallbacks = {
|
50
|
+
"text": {"model_id": "gpt-5-nano", "provider": "yyds"}, # Same model, yyds provider
|
51
|
+
"vision": {"model_id": "gpt-5-mini", "provider": "yyds"},
|
52
|
+
"omni": {"model_id": "gpt-5", "provider": "yyds"}
|
53
|
+
}
|
54
|
+
|
55
|
+
# Entity-based model mappings
|
56
|
+
self.entity_model_mappings = {
|
57
|
+
# Domain-specific mappings
|
58
|
+
"medical": {"preferred_models": ["microsoft/BioGPT", "medalpaca/medalpaca-7b"]},
|
59
|
+
"legal": {"preferred_models": ["saul-7b", "legal-bert"]},
|
60
|
+
"financial": {"preferred_models": ["ProsusAI/finbert", "financialbert"]},
|
61
|
+
"scientific": {"preferred_models": ["microsoft/DialoGPT-medium", "allenai/scibert"]},
|
62
|
+
"code": {"preferred_models": ["microsoft/CodeBERT", "codeparrot/codeparrot"]},
|
63
|
+
|
64
|
+
# Task-specific mappings
|
65
|
+
"translation": {"preferred_models": ["facebook/m2m100", "google/mt5"]},
|
66
|
+
"summarization": {"preferred_models": ["facebook/bart-large", "google/pegasus"]},
|
67
|
+
"question_answering": {"preferred_models": ["deepset/roberta-base-squad2", "distilbert-base-uncased-distilled-squad"]},
|
68
|
+
"sentiment": {"preferred_models": ["cardiffnlp/twitter-roberta-base-sentiment", "nlptown/bert-base-multilingual-uncased-sentiment"]},
|
69
|
+
|
70
|
+
# Language-specific mappings
|
71
|
+
"chinese": {"preferred_models": ["THUDM/chatglm2-6b", "baichuan-inc/Baichuan2-7B-Chat"]},
|
72
|
+
"japanese": {"preferred_models": ["rinna/japanese-gpt2-medium", "sonoisa/sentence-bert-base-ja-mean-tokens"]},
|
73
|
+
"multilingual": {"preferred_models": ["facebook/mbart-large-50", "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"]}
|
45
74
|
}
|
46
75
|
|
47
76
|
logger.info("Intelligent Model Selector initialized")
|
@@ -56,6 +85,9 @@ class IntelligentModelSelector:
|
|
56
85
|
# Initialize embedding service
|
57
86
|
await self._init_embedding_service()
|
58
87
|
|
88
|
+
# Initialize spaCy NLP
|
89
|
+
await self._init_spacy_nlp()
|
90
|
+
|
59
91
|
# Load models from database
|
60
92
|
await self._load_models_from_database()
|
61
93
|
|
@@ -74,11 +106,194 @@ class IntelligentModelSelector:
|
|
74
106
|
except Exception as e:
|
75
107
|
logger.warning(f"Failed to initialize embedding service: {e}")
|
76
108
|
|
109
|
+
async def _init_spacy_nlp(self):
|
110
|
+
"""Initialize spaCy NLP model for entity extraction"""
|
111
|
+
try:
|
112
|
+
import spacy
|
113
|
+
|
114
|
+
# Try to load the English model
|
115
|
+
models_to_try = ["en_core_web_sm", "en_core_web_md", "en_core_web_lg"]
|
116
|
+
|
117
|
+
for model_name in models_to_try:
|
118
|
+
try:
|
119
|
+
self.nlp = spacy.load(model_name)
|
120
|
+
logger.info(f"spaCy model '{model_name}' loaded successfully")
|
121
|
+
break
|
122
|
+
except OSError:
|
123
|
+
logger.warning(f"spaCy model '{model_name}' not found")
|
124
|
+
continue
|
125
|
+
|
126
|
+
if not self.nlp:
|
127
|
+
# Try to download the smallest model automatically
|
128
|
+
try:
|
129
|
+
import subprocess
|
130
|
+
result = subprocess.run(
|
131
|
+
["python", "-m", "spacy", "download", "en_core_web_sm"],
|
132
|
+
capture_output=True,
|
133
|
+
text=True,
|
134
|
+
timeout=300 # 5 minutes timeout
|
135
|
+
)
|
136
|
+
|
137
|
+
if result.returncode == 0:
|
138
|
+
self.nlp = spacy.load("en_core_web_sm")
|
139
|
+
logger.info("spaCy en_core_web_sm downloaded and loaded successfully")
|
140
|
+
else:
|
141
|
+
logger.warning(f"Failed to download spaCy model: {result.stderr}")
|
142
|
+
|
143
|
+
except Exception as download_error:
|
144
|
+
logger.warning(f"Failed to download spaCy model: {download_error}")
|
145
|
+
|
146
|
+
# If still no model, create a blank model with NER
|
147
|
+
if not self.nlp:
|
148
|
+
logger.warning("No spaCy model available, creating blank model with NER")
|
149
|
+
self.nlp = spacy.blank("en")
|
150
|
+
# Add basic NER component
|
151
|
+
try:
|
152
|
+
self.nlp.add_pipe("ner")
|
153
|
+
except:
|
154
|
+
pass # NER might not be available in blank model
|
155
|
+
|
156
|
+
except ImportError:
|
157
|
+
# spaCy not available, will use regex fallback
|
158
|
+
self.nlp = None
|
159
|
+
except Exception as e:
|
160
|
+
# Failed to init spaCy, will use regex fallback
|
161
|
+
self.nlp = None
|
162
|
+
|
163
|
+
def extract_entities_and_keywords(self, text: str) -> Dict[str, Any]:
|
164
|
+
"""Extract entities and keywords from text using spaCy and heuristics"""
|
165
|
+
try:
|
166
|
+
entities_info = {
|
167
|
+
"domains": set(),
|
168
|
+
"tasks": set(),
|
169
|
+
"languages": set(),
|
170
|
+
"technical_terms": set(),
|
171
|
+
"named_entities": {},
|
172
|
+
"keywords": set()
|
173
|
+
}
|
174
|
+
|
175
|
+
# Use spaCy if available
|
176
|
+
if self.nlp:
|
177
|
+
try:
|
178
|
+
doc = self.nlp(text)
|
179
|
+
|
180
|
+
# Extract named entities
|
181
|
+
for ent in doc.ents:
|
182
|
+
label = ent.label_
|
183
|
+
if label not in entities_info["named_entities"]:
|
184
|
+
entities_info["named_entities"][label] = []
|
185
|
+
entities_info["named_entities"][label].append(ent.text)
|
186
|
+
|
187
|
+
# Extract noun phrases as potential keywords
|
188
|
+
for chunk in doc.noun_chunks:
|
189
|
+
entities_info["keywords"].add(chunk.text.lower())
|
190
|
+
|
191
|
+
except Exception as e:
|
192
|
+
logger.warning(f"spaCy processing failed: {e}")
|
193
|
+
|
194
|
+
# Heuristic-based extraction
|
195
|
+
text_lower = text.lower()
|
196
|
+
|
197
|
+
# Domain detection
|
198
|
+
domain_keywords = {
|
199
|
+
"medical": ["medical", "health", "doctor", "patient", "diagnosis", "treatment", "clinical", "healthcare", "hospital", "medicine", "drug"],
|
200
|
+
"legal": ["legal", "law", "court", "lawyer", "attorney", "contract", "litigation", "compliance", "regulation", "statute"],
|
201
|
+
"financial": ["financial", "finance", "banking", "investment", "trading", "market", "stock", "currency", "economic", "accounting"],
|
202
|
+
"scientific": ["scientific", "research", "study", "experiment", "analysis", "data", "hypothesis", "theory", "academic", "journal"],
|
203
|
+
"code": ["code", "programming", "software", "development", "algorithm", "function", "variable", "debug", "compile", "syntax"],
|
204
|
+
"educational": ["education", "learning", "teaching", "student", "school", "university", "course", "lesson", "curriculum"]
|
205
|
+
}
|
206
|
+
|
207
|
+
# Task detection
|
208
|
+
task_keywords = {
|
209
|
+
"translation": ["translate", "translation", "language", "multilingual", "convert"],
|
210
|
+
"summarization": ["summarize", "summary", "summarization", "abstract", "brief", "condense"],
|
211
|
+
"question_answering": ["question", "answer", "qa", "ask", "respond", "query"],
|
212
|
+
"sentiment": ["sentiment", "emotion", "feeling", "opinion", "positive", "negative", "mood"],
|
213
|
+
"classification": ["classify", "classification", "category", "categorize", "label", "predict"],
|
214
|
+
"generation": ["generate", "creation", "create", "produce", "write", "compose"]
|
215
|
+
}
|
216
|
+
|
217
|
+
# Language detection
|
218
|
+
language_keywords = {
|
219
|
+
"chinese": ["chinese", "mandarin", "cantonese", "zh", "中文", "汉语"],
|
220
|
+
"japanese": ["japanese", "日本語", "ja", "nihongo"],
|
221
|
+
"spanish": ["spanish", "español", "es", "castellano"],
|
222
|
+
"french": ["french", "français", "fr", "francais"],
|
223
|
+
"german": ["german", "deutsch", "de", "german"],
|
224
|
+
"multilingual": ["multilingual", "multiple languages", "multi-language", "cross-lingual"]
|
225
|
+
}
|
226
|
+
|
227
|
+
# Extract domains
|
228
|
+
for domain, keywords in domain_keywords.items():
|
229
|
+
if any(keyword in text_lower for keyword in keywords):
|
230
|
+
entities_info["domains"].add(domain)
|
231
|
+
|
232
|
+
# Extract tasks
|
233
|
+
for task, keywords in task_keywords.items():
|
234
|
+
if any(keyword in text_lower for keyword in keywords):
|
235
|
+
entities_info["tasks"].add(task)
|
236
|
+
|
237
|
+
# Extract languages
|
238
|
+
for lang, keywords in language_keywords.items():
|
239
|
+
if any(keyword in text_lower for keyword in keywords):
|
240
|
+
entities_info["languages"].add(lang)
|
241
|
+
|
242
|
+
# Extract technical terms and model names
|
243
|
+
import re
|
244
|
+
|
245
|
+
# Add specific model name patterns
|
246
|
+
model_patterns = [
|
247
|
+
r'\bgpt-[0-9]+\.?[0-9]*-?\w*\b', # GPT models (gpt-4, gpt-5-nano, etc)
|
248
|
+
r'\bclaude-[0-9]+\.?[0-9]*-?\w*\b', # Claude models
|
249
|
+
r'\bllama-?\d*-?\w*\b', # Llama models
|
250
|
+
r'\bgemini-?\w*\b', # Gemini models
|
251
|
+
r'\bmistral-?\w*\b', # Mistral models
|
252
|
+
]
|
253
|
+
|
254
|
+
for pattern in model_patterns:
|
255
|
+
matches = re.findall(pattern, text_lower)
|
256
|
+
entities_info["technical_terms"].update(matches)
|
257
|
+
entities_info["keywords"].update(matches)
|
258
|
+
|
259
|
+
# General technical patterns
|
260
|
+
tech_patterns = [
|
261
|
+
r'\b\w*bert\w*\b', # BERT variants
|
262
|
+
r'\b\w*gpt\w*\b', # GPT variants
|
263
|
+
r'\b\w*llm\w*\b', # LLM variants
|
264
|
+
r'\b\w*ai\w*\b', # AI-related
|
265
|
+
r'\b\w*ml\w*\b', # ML-related
|
266
|
+
r'\b\w*neural\w*\b', # Neural networks
|
267
|
+
]
|
268
|
+
|
269
|
+
for pattern in tech_patterns:
|
270
|
+
matches = re.findall(pattern, text_lower, re.IGNORECASE)
|
271
|
+
for match in matches:
|
272
|
+
if len(match) > 2: # Filter out too short matches
|
273
|
+
entities_info["technical_terms"].add(match)
|
274
|
+
|
275
|
+
# Convert sets to lists for JSON serialization
|
276
|
+
return {
|
277
|
+
key: list(value) if isinstance(value, set) else value
|
278
|
+
for key, value in entities_info.items()
|
279
|
+
}
|
280
|
+
|
281
|
+
except Exception as e:
|
282
|
+
logger.error(f"Entity extraction failed: {e}")
|
283
|
+
return {
|
284
|
+
"domains": [],
|
285
|
+
"tasks": [],
|
286
|
+
"languages": [],
|
287
|
+
"technical_terms": [],
|
288
|
+
"named_entities": {},
|
289
|
+
"keywords": []
|
290
|
+
}
|
291
|
+
|
77
292
|
async def _load_models_from_database(self):
|
78
293
|
"""Load models from database registry"""
|
79
294
|
try:
|
80
295
|
# Get all models from database
|
81
|
-
result =
|
296
|
+
result = get_supabase_table('models').select('*').execute()
|
82
297
|
models = result.data
|
83
298
|
|
84
299
|
logger.info(f"Found {len(models)} models in database registry")
|
@@ -105,7 +320,7 @@ class IntelligentModelSelector:
|
|
105
320
|
}
|
106
321
|
|
107
322
|
# Check embeddings status
|
108
|
-
embeddings_result =
|
323
|
+
embeddings_result = get_supabase_table('model_embeddings').select('model_id').execute()
|
109
324
|
existing_embeddings = {row['model_id'] for row in embeddings_result.data}
|
110
325
|
|
111
326
|
logger.info(f"Found {len(existing_embeddings)} model embeddings")
|
@@ -128,7 +343,7 @@ class IntelligentModelSelector:
|
|
128
343
|
context: Optional[Dict[str, Any]] = None
|
129
344
|
) -> Dict[str, Any]:
|
130
345
|
"""
|
131
|
-
Select best model using similarity matching
|
346
|
+
Select best model using entity extraction and similarity matching
|
132
347
|
|
133
348
|
Args:
|
134
349
|
request: User's request/query
|
@@ -139,9 +354,115 @@ class IntelligentModelSelector:
|
|
139
354
|
Selection result with model info and reasoning
|
140
355
|
"""
|
141
356
|
try:
|
142
|
-
#
|
357
|
+
# Extract entities and keywords from the request
|
358
|
+
entities_info = self.extract_entities_and_keywords(request)
|
359
|
+
logger.debug(f"Extracted entities: {entities_info}")
|
360
|
+
|
361
|
+
# Try entity-based selection first
|
362
|
+
entity_based_result = await self._select_model_by_entities(entities_info, service_type, request)
|
363
|
+
if entity_based_result and entity_based_result.get("success"):
|
364
|
+
return entity_based_result
|
365
|
+
|
366
|
+
# Fallback to similarity-based selection
|
367
|
+
similarity_result = await self._select_model_by_similarity(request, service_type, entities_info)
|
368
|
+
if similarity_result and similarity_result.get("success"):
|
369
|
+
return similarity_result
|
370
|
+
|
371
|
+
# Final fallback to default
|
372
|
+
return self._get_default_selection(service_type, "No suitable models found after entity and similarity matching")
|
373
|
+
|
374
|
+
except Exception as e:
|
375
|
+
logger.error(f"Model selection failed: {e}")
|
376
|
+
return self._get_default_selection(service_type, f"Selection error: {e}")
|
377
|
+
|
378
|
+
async def _select_model_by_entities(
|
379
|
+
self,
|
380
|
+
entities_info: Dict[str, Any],
|
381
|
+
service_type: str,
|
382
|
+
request: str
|
383
|
+
) -> Optional[Dict[str, Any]]:
|
384
|
+
"""Select model based on extracted entities"""
|
385
|
+
try:
|
386
|
+
reasoning_parts = []
|
387
|
+
candidate_models = []
|
388
|
+
|
389
|
+
# Check for domain-specific models
|
390
|
+
for domain in entities_info.get("domains", []):
|
391
|
+
if domain in self.entity_model_mappings:
|
392
|
+
models = self.entity_model_mappings[domain]["preferred_models"]
|
393
|
+
candidate_models.extend(models)
|
394
|
+
reasoning_parts.append(f"domain: {domain}")
|
395
|
+
|
396
|
+
# Check for task-specific models
|
397
|
+
for task in entities_info.get("tasks", []):
|
398
|
+
if task in self.entity_model_mappings:
|
399
|
+
models = self.entity_model_mappings[task]["preferred_models"]
|
400
|
+
candidate_models.extend(models)
|
401
|
+
reasoning_parts.append(f"task: {task}")
|
402
|
+
|
403
|
+
# Check for language-specific models
|
404
|
+
for lang in entities_info.get("languages", []):
|
405
|
+
if lang in self.entity_model_mappings:
|
406
|
+
models = self.entity_model_mappings[lang]["preferred_models"]
|
407
|
+
candidate_models.extend(models)
|
408
|
+
reasoning_parts.append(f"language: {lang}")
|
409
|
+
|
410
|
+
if not candidate_models:
|
411
|
+
return None
|
412
|
+
|
413
|
+
# Remove duplicates while preserving order
|
414
|
+
unique_candidates = list(dict.fromkeys(candidate_models))
|
415
|
+
|
416
|
+
# Check which models are actually available in our database
|
417
|
+
available_models = []
|
418
|
+
for model_id in unique_candidates:
|
419
|
+
if model_id in self.models_metadata:
|
420
|
+
model_info = self.models_metadata[model_id]
|
421
|
+
model_type = model_info.get('model_type')
|
422
|
+
|
423
|
+
# Filter by service type compatibility
|
424
|
+
if model_type == service_type or model_type == 'omni':
|
425
|
+
available_models.append({
|
426
|
+
"model_id": model_id,
|
427
|
+
"provider": model_info.get('provider', 'unknown'),
|
428
|
+
"model_type": model_type,
|
429
|
+
"entity_match_score": 1.0 # High score for entity matches
|
430
|
+
})
|
431
|
+
|
432
|
+
if not available_models:
|
433
|
+
logger.debug(f"No entity-based models available for {unique_candidates}")
|
434
|
+
return None
|
435
|
+
|
436
|
+
# Return the first available model (highest priority)
|
437
|
+
selected_model = available_models[0]
|
438
|
+
|
439
|
+
return {
|
440
|
+
"success": True,
|
441
|
+
"selected_model": {
|
442
|
+
"model_id": selected_model["model_id"],
|
443
|
+
"provider": selected_model["provider"]
|
444
|
+
},
|
445
|
+
"selection_reason": f"Entity-based match ({', '.join(reasoning_parts)})",
|
446
|
+
"alternatives": available_models[1:3],
|
447
|
+
"entity_match_score": selected_model["entity_match_score"],
|
448
|
+
"entities_detected": entities_info,
|
449
|
+
"method": "entity_based"
|
450
|
+
}
|
451
|
+
|
452
|
+
except Exception as e:
|
453
|
+
logger.error(f"Entity-based model selection failed: {e}")
|
454
|
+
return None
|
455
|
+
|
456
|
+
async def _select_model_by_similarity(
|
457
|
+
self,
|
458
|
+
request: str,
|
459
|
+
service_type: str,
|
460
|
+
entities_info: Dict[str, Any]
|
461
|
+
) -> Optional[Dict[str, Any]]:
|
462
|
+
"""Select model using embedding similarity (enhanced with entity info)"""
|
463
|
+
try:
|
143
464
|
if not self.embedding_service:
|
144
|
-
return
|
465
|
+
return None
|
145
466
|
|
146
467
|
request_embedding = await self.embedding_service.create_text_embedding(request)
|
147
468
|
|
@@ -149,10 +470,39 @@ class IntelligentModelSelector:
|
|
149
470
|
candidates = await self._find_similar_models_supabase(request_embedding, service_type)
|
150
471
|
|
151
472
|
if not candidates:
|
152
|
-
return
|
473
|
+
return None
|
153
474
|
|
154
|
-
#
|
155
|
-
|
475
|
+
# Boost scores for models that match extracted entities
|
476
|
+
enhanced_candidates = []
|
477
|
+
for candidate in candidates:
|
478
|
+
model_id = candidate["model_id"]
|
479
|
+
base_score = candidate["similarity"]
|
480
|
+
|
481
|
+
# Apply entity-based boosting
|
482
|
+
entity_boost = 0.0
|
483
|
+
|
484
|
+
# Boost based on domain match
|
485
|
+
for domain in entities_info.get("domains", []):
|
486
|
+
if domain in model_id.lower() or any(domain in desc.lower() for desc in [candidate.get("description", "")]):
|
487
|
+
entity_boost += 0.1
|
488
|
+
|
489
|
+
# Boost based on task match
|
490
|
+
for task in entities_info.get("tasks", []):
|
491
|
+
if task in model_id.lower() or any(task in desc.lower() for desc in [candidate.get("description", "")]):
|
492
|
+
entity_boost += 0.1
|
493
|
+
|
494
|
+
# Apply boost
|
495
|
+
enhanced_score = min(base_score + entity_boost, 1.0)
|
496
|
+
|
497
|
+
enhanced_candidate = candidate.copy()
|
498
|
+
enhanced_candidate["similarity"] = enhanced_score
|
499
|
+
enhanced_candidate["entity_boost"] = entity_boost
|
500
|
+
enhanced_candidates.append(enhanced_candidate)
|
501
|
+
|
502
|
+
# Re-sort by enhanced similarity
|
503
|
+
enhanced_candidates.sort(key=lambda x: x["similarity"], reverse=True)
|
504
|
+
|
505
|
+
best_match = enhanced_candidates[0]
|
156
506
|
|
157
507
|
return {
|
158
508
|
"success": True,
|
@@ -160,14 +510,16 @@ class IntelligentModelSelector:
|
|
160
510
|
"model_id": best_match["model_id"],
|
161
511
|
"provider": best_match["provider"]
|
162
512
|
},
|
163
|
-
"selection_reason": f"
|
164
|
-
"alternatives":
|
165
|
-
"similarity_score": best_match["similarity"]
|
513
|
+
"selection_reason": f"Enhanced similarity match (base: {best_match['similarity']:.3f}, entity boost: {best_match.get('entity_boost', 0):.3f})",
|
514
|
+
"alternatives": enhanced_candidates[1:3],
|
515
|
+
"similarity_score": best_match["similarity"],
|
516
|
+
"entities_detected": entities_info,
|
517
|
+
"method": "enhanced_similarity"
|
166
518
|
}
|
167
519
|
|
168
520
|
except Exception as e:
|
169
|
-
logger.error(f"
|
170
|
-
return
|
521
|
+
logger.error(f"Similarity-based model selection failed: {e}")
|
522
|
+
return None
|
171
523
|
|
172
524
|
async def _find_similar_models_supabase(
|
173
525
|
self,
|
@@ -177,7 +529,7 @@ class IntelligentModelSelector:
|
|
177
529
|
"""Find similar models using Supabase and embedding service similarity"""
|
178
530
|
try:
|
179
531
|
# Get all model embeddings from database
|
180
|
-
embeddings_result =
|
532
|
+
embeddings_result = get_supabase_table('model_embeddings').select('*').execute()
|
181
533
|
model_embeddings = embeddings_result.data
|
182
534
|
|
183
535
|
if not model_embeddings:
|
@@ -240,15 +592,41 @@ class IntelligentModelSelector:
|
|
240
592
|
"similarity_score": 0.0
|
241
593
|
}
|
242
594
|
|
595
|
+
def get_rate_limit_fallback(self, service_type: str, original_provider: str = "openai") -> Dict[str, Any]:
|
596
|
+
"""
|
597
|
+
Get fallback model when hitting rate limits
|
598
|
+
|
599
|
+
Args:
|
600
|
+
service_type: Type of service (text, vision, etc.)
|
601
|
+
original_provider: The provider that hit rate limit
|
602
|
+
|
603
|
+
Returns:
|
604
|
+
Fallback model selection result
|
605
|
+
"""
|
606
|
+
if original_provider == "openai" and service_type in self.rate_limit_fallbacks:
|
607
|
+
fallback = self.rate_limit_fallbacks[service_type]
|
608
|
+
|
609
|
+
return {
|
610
|
+
"success": True,
|
611
|
+
"selected_model": fallback,
|
612
|
+
"selection_reason": f"Rate limit fallback from {original_provider} to {fallback['provider']}",
|
613
|
+
"alternatives": [],
|
614
|
+
"is_fallback": True,
|
615
|
+
"original_provider": original_provider
|
616
|
+
}
|
617
|
+
|
618
|
+
# If no specific fallback, return default
|
619
|
+
return self._get_default_selection(service_type, f"No fallback available for {original_provider}")
|
620
|
+
|
243
621
|
async def get_available_models(self, service_type: Optional[str] = None) -> List[Dict[str, Any]]:
|
244
622
|
"""Get list of available models"""
|
245
623
|
try:
|
246
624
|
if service_type:
|
247
625
|
# Filter by service type
|
248
|
-
query =
|
626
|
+
query = get_supabase_table('models').select('*').or_(f'model_type.eq.{service_type},model_type.eq.omni')
|
249
627
|
else:
|
250
628
|
# Get all models
|
251
|
-
query =
|
629
|
+
query = get_supabase_table('models').select('*')
|
252
630
|
|
253
631
|
result = query.order('model_id').execute()
|
254
632
|
return result.data
|
isa_model/core/types.py
CHANGED
isa_model/deployment/__init__.py
CHANGED
@@ -1,54 +1,11 @@
|
|
1
1
|
"""
|
2
|
-
ISA Model Deployment
|
2
|
+
ISA Model Deployment - Multi-provider deployment system
|
3
3
|
|
4
|
-
|
5
|
-
- Multi-cloud deployment (RunPod, AWS, GCP, Azure)
|
6
|
-
- Multiple inference engines (Triton, vLLM, TensorRT-LLM)
|
7
|
-
- Model optimization and containerization
|
8
|
-
- Deployment monitoring and management
|
9
|
-
|
10
|
-
Main Components:
|
11
|
-
- DeploymentManager: Orchestrates complete deployment workflow
|
12
|
-
- DeploymentConfig: Configuration classes for different deployment scenarios
|
13
|
-
- Cloud providers: RunPod, AWS, GCP, Azure integrations
|
14
|
-
- Inference engines: Triton, vLLM, TensorRT-LLM support
|
4
|
+
Unified deployment architecture supporting Modal and Triton platforms.
|
15
5
|
"""
|
16
6
|
|
7
|
+
from .modal.deployer import ModalDeployer
|
8
|
+
from .triton.provider import TritonProvider
|
17
9
|
from .core.deployment_manager import DeploymentManager
|
18
|
-
from .core.deployment_config import (
|
19
|
-
DeploymentConfig,
|
20
|
-
DeploymentProvider,
|
21
|
-
InferenceEngine,
|
22
|
-
ModelConfig,
|
23
|
-
ModelFormat,
|
24
|
-
TritonConfig,
|
25
|
-
RunPodServerlessConfig,
|
26
|
-
create_gemma_runpod_triton_config,
|
27
|
-
create_local_triton_config
|
28
|
-
)
|
29
|
-
from .services import AutoDeployVisionService
|
30
|
-
|
31
|
-
__all__ = [
|
32
|
-
# Main classes
|
33
|
-
"DeploymentManager",
|
34
|
-
"DeploymentConfig",
|
35
|
-
"AutoDeployVisionService",
|
36
|
-
|
37
|
-
# Configuration classes
|
38
|
-
"ModelConfig",
|
39
|
-
"TritonConfig",
|
40
|
-
"RunPodServerlessConfig",
|
41
|
-
|
42
|
-
# Enums
|
43
|
-
"DeploymentProvider",
|
44
|
-
"InferenceEngine",
|
45
|
-
"ModelFormat",
|
46
|
-
|
47
|
-
# Helper functions
|
48
|
-
"create_gemma_runpod_triton_config",
|
49
|
-
"create_local_triton_config"
|
50
|
-
]
|
51
10
|
|
52
|
-
|
53
|
-
__version__ = "0.1.0"
|
54
|
-
__author__ = "ISA Model Team"
|
11
|
+
__all__ = ["ModalDeployer", "TritonProvider", "DeploymentManager"]
|
@@ -1,34 +1,5 @@
|
|
1
|
-
"""
|
2
|
-
Deployment Core Module
|
3
|
-
|
4
|
-
Contains the core deployment functionality including configuration management
|
5
|
-
and deployment orchestration.
|
6
|
-
"""
|
1
|
+
"""Core deployment functionality"""
|
7
2
|
|
8
3
|
from .deployment_manager import DeploymentManager
|
9
|
-
from .deployment_config import (
|
10
|
-
DeploymentConfig,
|
11
|
-
DeploymentProvider,
|
12
|
-
InferenceEngine,
|
13
|
-
ModelConfig,
|
14
|
-
ModelFormat,
|
15
|
-
TritonConfig,
|
16
|
-
RunPodServerlessConfig,
|
17
|
-
create_gemma_runpod_triton_config,
|
18
|
-
create_local_triton_config
|
19
|
-
)
|
20
|
-
from .isa_deployment_service import ISADeploymentService
|
21
4
|
|
22
|
-
__all__ = [
|
23
|
-
"DeploymentManager",
|
24
|
-
"DeploymentConfig",
|
25
|
-
"DeploymentProvider",
|
26
|
-
"InferenceEngine",
|
27
|
-
"ModelConfig",
|
28
|
-
"ModelFormat",
|
29
|
-
"TritonConfig",
|
30
|
-
"RunPodServerlessConfig",
|
31
|
-
"ISADeploymentService",
|
32
|
-
"create_gemma_runpod_triton_config",
|
33
|
-
"create_local_triton_config"
|
34
|
-
]
|
5
|
+
__all__ = ["DeploymentManager"]
|