PyPI - isa-model - Versions diffs - 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl - Mend

isa-model 0.4.0py3-none-any.whl → 0.4.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (189) hide show

isa_model/client.py +466 -43
isa_model/core/cache/redis_cache.py +12 -3
isa_model/core/config/config_manager.py +230 -3
isa_model/core/config.py +90 -0
isa_model/core/database/direct_db_client.py +114 -0
isa_model/core/database/migration_manager.py +563 -0
isa_model/core/database/migrations.py +21 -1
isa_model/core/database/supabase_client.py +154 -19
isa_model/core/dependencies.py +316 -0
isa_model/core/discovery/__init__.py +19 -0
isa_model/core/discovery/consul_discovery.py +190 -0
isa_model/core/logging/__init__.py +54 -0
isa_model/core/logging/influx_logger.py +523 -0
isa_model/core/logging/loki_logger.py +160 -0
isa_model/core/models/__init__.py +27 -18
isa_model/core/models/config_models.py +625 -0
isa_model/core/models/deployment_billing_tracker.py +430 -0
isa_model/core/models/model_manager.py +35 -80
isa_model/core/models/model_metadata.py +690 -0
isa_model/core/models/model_repo.py +174 -18
isa_model/core/models/system_models.py +857 -0
isa_model/core/repositories/__init__.py +9 -0
isa_model/core/repositories/config_repository.py +912 -0
isa_model/core/services/intelligent_model_selector.py +399 -21
isa_model/core/types.py +1 -0
isa_model/deployment/__init__.py +5 -48
isa_model/deployment/core/__init__.py +2 -31
isa_model/deployment/core/deployment_manager.py +1278 -370
isa_model/deployment/modal/__init__.py +8 -0
isa_model/deployment/modal/config.py +136 -0
isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
isa_model/deployment/modal/services/__init__.py +3 -0
isa_model/deployment/modal/services/audio/__init__.py +1 -0
isa_model/deployment/modal/services/embedding/__init__.py +1 -0
isa_model/deployment/modal/services/llm/__init__.py +1 -0
isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
isa_model/deployment/modal/services/video/__init__.py +1 -0
isa_model/deployment/modal/services/vision/__init__.py +1 -0
isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
isa_model/deployment/storage/__init__.py +5 -0
isa_model/deployment/storage/deployment_repository.py +824 -0
isa_model/deployment/triton/__init__.py +10 -0
isa_model/deployment/triton/config.py +196 -0
isa_model/deployment/triton/configs/__init__.py +1 -0
isa_model/deployment/triton/provider.py +512 -0
isa_model/deployment/triton/scripts/__init__.py +1 -0
isa_model/deployment/triton/templates/__init__.py +1 -0
isa_model/inference/__init__.py +47 -1
isa_model/inference/ai_factory.py +137 -10
isa_model/inference/legacy_services/__init__.py +21 -0
isa_model/inference/legacy_services/model_evaluation.py +637 -0
isa_model/inference/legacy_services/model_service.py +573 -0
isa_model/inference/legacy_services/model_serving.py +717 -0
isa_model/inference/legacy_services/model_training.py +561 -0
isa_model/inference/models/__init__.py +21 -0
isa_model/inference/models/inference_config.py +551 -0
isa_model/inference/models/inference_record.py +675 -0
isa_model/inference/models/performance_models.py +714 -0
isa_model/inference/repositories/__init__.py +9 -0
isa_model/inference/repositories/inference_repository.py +828 -0
isa_model/inference/services/audio/base_stt_service.py +184 -11
isa_model/inference/services/audio/openai_stt_service.py +22 -6
isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
isa_model/inference/services/llm/__init__.py +10 -2
isa_model/inference/services/llm/base_llm_service.py +335 -24
isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
isa_model/inference/services/llm/ollama_llm_service.py +9 -2
isa_model/inference/services/llm/openai_llm_service.py +33 -16
isa_model/inference/services/llm/yyds_llm_service.py +8 -2
isa_model/inference/services/vision/__init__.py +22 -1
isa_model/inference/services/vision/helpers/image_utils.py +8 -5
isa_model/inference/services/vision/isa_vision_service.py +65 -4
isa_model/inference/services/vision/openai_vision_service.py +19 -10
isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
isa_model/serving/api/cache_manager.py +245 -0
isa_model/serving/api/dependencies/__init__.py +1 -0
isa_model/serving/api/dependencies/auth.py +194 -0
isa_model/serving/api/dependencies/database.py +139 -0
isa_model/serving/api/error_handlers.py +284 -0
isa_model/serving/api/fastapi_server.py +172 -22
isa_model/serving/api/middleware/auth.py +8 -2
isa_model/serving/api/middleware/security.py +23 -33
isa_model/serving/api/middleware/tenant_context.py +414 -0
isa_model/serving/api/routes/analytics.py +4 -1
isa_model/serving/api/routes/config.py +645 -0
isa_model/serving/api/routes/deployment_billing.py +315 -0
isa_model/serving/api/routes/deployments.py +138 -2
isa_model/serving/api/routes/gpu_gateway.py +440 -0
isa_model/serving/api/routes/health.py +32 -12
isa_model/serving/api/routes/inference_monitoring.py +486 -0
isa_model/serving/api/routes/local_deployments.py +448 -0
isa_model/serving/api/routes/tenants.py +575 -0
isa_model/serving/api/routes/unified.py +680 -18
isa_model/serving/api/routes/webhooks.py +479 -0
isa_model/serving/api/startup.py +68 -54
isa_model/utils/gpu_utils.py +311 -0
{isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
isa_model-0.4.4.dist-info/RECORD +180 -0
isa_model/core/security/secrets.py +0 -358
isa_model/core/storage/hf_storage.py +0 -419
isa_model/core/storage/minio_storage.py +0 -0
isa_model/deployment/cloud/__init__.py +0 -9
isa_model/deployment/cloud/modal/__init__.py +0 -10
isa_model/deployment/core/deployment_config.py +0 -356
isa_model/deployment/core/isa_deployment_service.py +0 -401
isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
isa_model/deployment/runtime/deployed_service.py +0 -338
isa_model/deployment/services/__init__.py +0 -9
isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
isa_model/deployment/services/model_service.py +0 -332
isa_model/deployment/services/service_monitor.py +0 -356
isa_model/deployment/services/service_registry.py +0 -527
isa_model/eval/__init__.py +0 -92
isa_model/eval/benchmarks/__init__.py +0 -27
isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
isa_model/eval/benchmarks.py +0 -701
isa_model/eval/config/__init__.py +0 -10
isa_model/eval/config/evaluation_config.py +0 -108
isa_model/eval/evaluators/__init__.py +0 -24
isa_model/eval/evaluators/audio_evaluator.py +0 -727
isa_model/eval/evaluators/base_evaluator.py +0 -503
isa_model/eval/evaluators/embedding_evaluator.py +0 -742
isa_model/eval/evaluators/llm_evaluator.py +0 -472
isa_model/eval/evaluators/vision_evaluator.py +0 -564
isa_model/eval/example_evaluation.py +0 -395
isa_model/eval/factory.py +0 -798
isa_model/eval/infrastructure/__init__.py +0 -24
isa_model/eval/infrastructure/experiment_tracker.py +0 -466
isa_model/eval/isa_benchmarks.py +0 -700
isa_model/eval/isa_integration.py +0 -582
isa_model/eval/metrics.py +0 -951
isa_model/eval/tests/unit/test_basic.py +0 -396
isa_model/serving/api/routes/evaluations.py +0 -579
isa_model/training/__init__.py +0 -168
isa_model/training/annotation/annotation_schema.py +0 -47
isa_model/training/annotation/processors/annotation_processor.py +0 -126
isa_model/training/annotation/storage/dataset_manager.py +0 -131
isa_model/training/annotation/storage/dataset_schema.py +0 -44
isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
isa_model/training/annotation/tests/test_minio copy.py +0 -113
isa_model/training/annotation/tests/test_minio_upload.py +0 -43
isa_model/training/annotation/views/annotation_controller.py +0 -158
isa_model/training/cloud/__init__.py +0 -22
isa_model/training/cloud/job_orchestrator.py +0 -402
isa_model/training/cloud/runpod_trainer.py +0 -454
isa_model/training/cloud/storage_manager.py +0 -482
isa_model/training/core/__init__.py +0 -26
isa_model/training/core/config.py +0 -181
isa_model/training/core/dataset.py +0 -222
isa_model/training/core/trainer.py +0 -720
isa_model/training/core/utils.py +0 -213
isa_model/training/examples/intelligent_training_example.py +0 -281
isa_model/training/factory.py +0 -424
isa_model/training/intelligent/__init__.py +0 -25
isa_model/training/intelligent/decision_engine.py +0 -643
isa_model/training/intelligent/intelligent_factory.py +0 -888
isa_model/training/intelligent/knowledge_base.py +0 -751
isa_model/training/intelligent/resource_optimizer.py +0 -839
isa_model/training/intelligent/task_classifier.py +0 -576
isa_model/training/storage/__init__.py +0 -24
isa_model/training/storage/core_integration.py +0 -439
isa_model/training/storage/training_repository.py +0 -552
isa_model/training/storage/training_storage.py +0 -628
isa_model-0.4.0.dist-info/RECORD +0 -182
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
/isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
{isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
{isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0

isa_model/core/services/intelligent_model_selector.py CHANGED Viewed

@@ -13,7 +13,7 @@ from typing import Dict, List, Any, Optional, Tuple
 logger = logging.getLogger(__name__)
-from ..database.supabase_client import get_supabase_client
+from ..database.supabase_client import get_supabase_client, get_supabase_table
 from ...inference.ai_factory import AIFactory
@@ -32,16 +32,45 @@ class IntelligentModelSelector:
         self.config = config or {}
         self.supabase_client = None
         self.embedding_service = None
+        self.nlp = None  # spaCy NLP model
         self.models_metadata: Dict[str, Dict[str, Any]] = {}
-        # Default models for each service type
+        # Default models for each service type (updated to GPT-5-nano for best cost/performance)
         self.default_models = {
-            "vision": {"model_id": "gpt-4.1-mini", "provider": "openai"},
+            "vision": {"model_id": "gpt-5-mini", "provider": "openai"},
             "audio": {"model_id": "whisper-1", "provider": "openai"},
-            "text": {"model_id": "gpt-4.1-mini", "provider": "openai"},
+            "text": {"model_id": "gpt-5-nano", "provider": "openai"},  # Primary: 50% cheaper than gpt-4.1-nano
             "image": {"model_id": "black-forest-labs/flux-schnell", "provider": "replicate"},
             "embedding": {"model_id": "text-embedding-3-small", "provider": "openai"},
-            "omni": {"model_id": "gpt-4.1", "provider": "openai"}
+            "omni": {"model_id": "gpt-5", "provider": "openai"}
+        }
+        # Rate limit fallback: same models with different providers
+        self.rate_limit_fallbacks = {
+            "text": {"model_id": "gpt-5-nano", "provider": "yyds"},  # Same model, yyds provider
+            "vision": {"model_id": "gpt-5-mini", "provider": "yyds"},
+            "omni": {"model_id": "gpt-5", "provider": "yyds"}
+        }
+        # Entity-based model mappings
+        self.entity_model_mappings = {
+            # Domain-specific mappings
+            "medical": {"preferred_models": ["microsoft/BioGPT", "medalpaca/medalpaca-7b"]},
+            "legal": {"preferred_models": ["saul-7b", "legal-bert"]},
+            "financial": {"preferred_models": ["ProsusAI/finbert", "financialbert"]},
+            "scientific": {"preferred_models": ["microsoft/DialoGPT-medium", "allenai/scibert"]},
+            "code": {"preferred_models": ["microsoft/CodeBERT", "codeparrot/codeparrot"]},
+            # Task-specific mappings
+            "translation": {"preferred_models": ["facebook/m2m100", "google/mt5"]},
+            "summarization": {"preferred_models": ["facebook/bart-large", "google/pegasus"]},
+            "question_answering": {"preferred_models": ["deepset/roberta-base-squad2", "distilbert-base-uncased-distilled-squad"]},
+            "sentiment": {"preferred_models": ["cardiffnlp/twitter-roberta-base-sentiment", "nlptown/bert-base-multilingual-uncased-sentiment"]},
+            # Language-specific mappings
+            "chinese": {"preferred_models": ["THUDM/chatglm2-6b", "baichuan-inc/Baichuan2-7B-Chat"]},
+            "japanese": {"preferred_models": ["rinna/japanese-gpt2-medium", "sonoisa/sentence-bert-base-ja-mean-tokens"]},
+            "multilingual": {"preferred_models": ["facebook/mbart-large-50", "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"]}
         }
         logger.info("Intelligent Model Selector initialized")
@@ -56,6 +85,9 @@ class IntelligentModelSelector:
             # Initialize embedding service
             await self._init_embedding_service()
+            # Initialize spaCy NLP
+            await self._init_spacy_nlp()
             # Load models from database
             await self._load_models_from_database()
@@ -74,11 +106,194 @@ class IntelligentModelSelector:
         except Exception as e:
             logger.warning(f"Failed to initialize embedding service: {e}")
+    async def _init_spacy_nlp(self):
+        """Initialize spaCy NLP model for entity extraction"""
+        try:
+            import spacy
+            # Try to load the English model
+            models_to_try = ["en_core_web_sm", "en_core_web_md", "en_core_web_lg"]
+            for model_name in models_to_try:
+                try:
+                    self.nlp = spacy.load(model_name)
+                    logger.info(f"spaCy model '{model_name}' loaded successfully")
+                    break
+                except OSError:
+                    logger.warning(f"spaCy model '{model_name}' not found")
+                    continue
+            if not self.nlp:
+                # Try to download the smallest model automatically
+                try:
+                    import subprocess
+                    result = subprocess.run(
+                        ["python", "-m", "spacy", "download", "en_core_web_sm"],
+                        capture_output=True,
+                        text=True,
+                        timeout=300  # 5 minutes timeout
+                    )
+                    if result.returncode == 0:
+                        self.nlp = spacy.load("en_core_web_sm")
+                        logger.info("spaCy en_core_web_sm downloaded and loaded successfully")
+                    else:
+                        logger.warning(f"Failed to download spaCy model: {result.stderr}")
+                except Exception as download_error:
+                    logger.warning(f"Failed to download spaCy model: {download_error}")
+            # If still no model, create a blank model with NER
+            if not self.nlp:
+                logger.warning("No spaCy model available, creating blank model with NER")
+                self.nlp = spacy.blank("en")
+                # Add basic NER component
+                try:
+                    self.nlp.add_pipe("ner")
+                except:
+                    pass  # NER might not be available in blank model
+        except ImportError:
+            # spaCy not available, will use regex fallback
+            self.nlp = None
+        except Exception as e:
+            # Failed to init spaCy, will use regex fallback
+            self.nlp = None
+    def extract_entities_and_keywords(self, text: str) -> Dict[str, Any]:
+        """Extract entities and keywords from text using spaCy and heuristics"""
+        try:
+            entities_info = {
+                "domains": set(),
+                "tasks": set(),
+                "languages": set(),
+                "technical_terms": set(),
+                "named_entities": {},
+                "keywords": set()
+            }
+            # Use spaCy if available
+            if self.nlp:
+                try:
+                    doc = self.nlp(text)
+                    # Extract named entities
+                    for ent in doc.ents:
+                        label = ent.label_
+                        if label not in entities_info["named_entities"]:
+                            entities_info["named_entities"][label] = []
+                        entities_info["named_entities"][label].append(ent.text)
+                    # Extract noun phrases as potential keywords
+                    for chunk in doc.noun_chunks:
+                        entities_info["keywords"].add(chunk.text.lower())
+                except Exception as e:
+                    logger.warning(f"spaCy processing failed: {e}")
+            # Heuristic-based extraction
+            text_lower = text.lower()
+            # Domain detection
+            domain_keywords = {
+                "medical": ["medical", "health", "doctor", "patient", "diagnosis", "treatment", "clinical", "healthcare", "hospital", "medicine", "drug"],
+                "legal": ["legal", "law", "court", "lawyer", "attorney", "contract", "litigation", "compliance", "regulation", "statute"],
+                "financial": ["financial", "finance", "banking", "investment", "trading", "market", "stock", "currency", "economic", "accounting"],
+                "scientific": ["scientific", "research", "study", "experiment", "analysis", "data", "hypothesis", "theory", "academic", "journal"],
+                "code": ["code", "programming", "software", "development", "algorithm", "function", "variable", "debug", "compile", "syntax"],
+                "educational": ["education", "learning", "teaching", "student", "school", "university", "course", "lesson", "curriculum"]
+            }
+            # Task detection
+            task_keywords = {
+                "translation": ["translate", "translation", "language", "multilingual", "convert"],
+                "summarization": ["summarize", "summary", "summarization", "abstract", "brief", "condense"],
+                "question_answering": ["question", "answer", "qa", "ask", "respond", "query"],
+                "sentiment": ["sentiment", "emotion", "feeling", "opinion", "positive", "negative", "mood"],
+                "classification": ["classify", "classification", "category", "categorize", "label", "predict"],
+                "generation": ["generate", "creation", "create", "produce", "write", "compose"]
+            }
+            # Language detection
+            language_keywords = {
+                "chinese": ["chinese", "mandarin", "cantonese", "zh", "中文", "汉语"],
+                "japanese": ["japanese", "日本語", "ja", "nihongo"],
+                "spanish": ["spanish", "español", "es", "castellano"],
+                "french": ["french", "français", "fr", "francais"],
+                "german": ["german", "deutsch", "de", "german"],
+                "multilingual": ["multilingual", "multiple languages", "multi-language", "cross-lingual"]
+            }
+            # Extract domains
+            for domain, keywords in domain_keywords.items():
+                if any(keyword in text_lower for keyword in keywords):
+                    entities_info["domains"].add(domain)
+            # Extract tasks
+            for task, keywords in task_keywords.items():
+                if any(keyword in text_lower for keyword in keywords):
+                    entities_info["tasks"].add(task)
+            # Extract languages
+            for lang, keywords in language_keywords.items():
+                if any(keyword in text_lower for keyword in keywords):
+                    entities_info["languages"].add(lang)
+            # Extract technical terms and model names
+            import re
+            # Add specific model name patterns
+            model_patterns = [
+                r'\bgpt-[0-9]+\.?[0-9]*-?\w*\b',  # GPT models (gpt-4, gpt-5-nano, etc)
+                r'\bclaude-[0-9]+\.?[0-9]*-?\w*\b',  # Claude models
+                r'\bllama-?\d*-?\w*\b',  # Llama models
+                r'\bgemini-?\w*\b',  # Gemini models
+                r'\bmistral-?\w*\b',  # Mistral models
+            ]
+            for pattern in model_patterns:
+                matches = re.findall(pattern, text_lower)
+                entities_info["technical_terms"].update(matches)
+                entities_info["keywords"].update(matches)
+            # General technical patterns
+            tech_patterns = [
+                r'\b\w*bert\w*\b',  # BERT variants
+                r'\b\w*gpt\w*\b',   # GPT variants
+                r'\b\w*llm\w*\b',   # LLM variants
+                r'\b\w*ai\w*\b',    # AI-related
+                r'\b\w*ml\w*\b',    # ML-related
+                r'\b\w*neural\w*\b', # Neural networks
+            ]
+            for pattern in tech_patterns:
+                matches = re.findall(pattern, text_lower, re.IGNORECASE)
+                for match in matches:
+                    if len(match) > 2:  # Filter out too short matches
+                        entities_info["technical_terms"].add(match)
+            # Convert sets to lists for JSON serialization
+            return {
+                key: list(value) if isinstance(value, set) else value
+                for key, value in entities_info.items()
+            }
+        except Exception as e:
+            logger.error(f"Entity extraction failed: {e}")
+            return {
+                "domains": [],
+                "tasks": [],
+                "languages": [],
+                "technical_terms": [],
+                "named_entities": {},
+                "keywords": []
+            }
     async def _load_models_from_database(self):
         """Load models from database registry"""
         try:
             # Get all models from database
-            result = self.supabase_client.table('models').select('*').execute()
+            result = get_supabase_table('models').select('*').execute()
             models = result.data
             logger.info(f"Found {len(models)} models in database registry")
@@ -105,7 +320,7 @@ class IntelligentModelSelector:
                 }
             # Check embeddings status
-            embeddings_result = self.supabase_client.table('model_embeddings').select('model_id').execute()
+            embeddings_result = get_supabase_table('model_embeddings').select('model_id').execute()
             existing_embeddings = {row['model_id'] for row in embeddings_result.data}
             logger.info(f"Found {len(existing_embeddings)} model embeddings")
@@ -128,7 +343,7 @@ class IntelligentModelSelector:
         context: Optional[Dict[str, Any]] = None
     ) -> Dict[str, Any]:
         """
-        Select best model using similarity matching
+        Select best model using entity extraction and similarity matching
         Args:
             request: User's request/query
@@ -139,9 +354,115 @@ class IntelligentModelSelector:
             Selection result with model info and reasoning
         """
         try:
-            # Get embedding for user request
+            # Extract entities and keywords from the request
+            entities_info = self.extract_entities_and_keywords(request)
+            logger.debug(f"Extracted entities: {entities_info}")
+            # Try entity-based selection first
+            entity_based_result = await self._select_model_by_entities(entities_info, service_type, request)
+            if entity_based_result and entity_based_result.get("success"):
+                return entity_based_result
+            # Fallback to similarity-based selection
+            similarity_result = await self._select_model_by_similarity(request, service_type, entities_info)
+            if similarity_result and similarity_result.get("success"):
+                return similarity_result
+            # Final fallback to default
+            return self._get_default_selection(service_type, "No suitable models found after entity and similarity matching")
+        except Exception as e:
+            logger.error(f"Model selection failed: {e}")
+            return self._get_default_selection(service_type, f"Selection error: {e}")
+    async def _select_model_by_entities(
+        self,
+        entities_info: Dict[str, Any],
+        service_type: str,
+        request: str
+    ) -> Optional[Dict[str, Any]]:
+        """Select model based on extracted entities"""
+        try:
+            reasoning_parts = []
+            candidate_models = []
+            # Check for domain-specific models
+            for domain in entities_info.get("domains", []):
+                if domain in self.entity_model_mappings:
+                    models = self.entity_model_mappings[domain]["preferred_models"]
+                    candidate_models.extend(models)
+                    reasoning_parts.append(f"domain: {domain}")
+            # Check for task-specific models
+            for task in entities_info.get("tasks", []):
+                if task in self.entity_model_mappings:
+                    models = self.entity_model_mappings[task]["preferred_models"]
+                    candidate_models.extend(models)
+                    reasoning_parts.append(f"task: {task}")
+            # Check for language-specific models
+            for lang in entities_info.get("languages", []):
+                if lang in self.entity_model_mappings:
+                    models = self.entity_model_mappings[lang]["preferred_models"]
+                    candidate_models.extend(models)
+                    reasoning_parts.append(f"language: {lang}")
+            if not candidate_models:
+                return None
+            # Remove duplicates while preserving order
+            unique_candidates = list(dict.fromkeys(candidate_models))
+            # Check which models are actually available in our database
+            available_models = []
+            for model_id in unique_candidates:
+                if model_id in self.models_metadata:
+                    model_info = self.models_metadata[model_id]
+                    model_type = model_info.get('model_type')
+                    # Filter by service type compatibility
+                    if model_type == service_type or model_type == 'omni':
+                        available_models.append({
+                            "model_id": model_id,
+                            "provider": model_info.get('provider', 'unknown'),
+                            "model_type": model_type,
+                            "entity_match_score": 1.0  # High score for entity matches
+                        })
+            if not available_models:
+                logger.debug(f"No entity-based models available for {unique_candidates}")
+                return None
+            # Return the first available model (highest priority)
+            selected_model = available_models[0]
+            return {
+                "success": True,
+                "selected_model": {
+                    "model_id": selected_model["model_id"],
+                    "provider": selected_model["provider"]
+                },
+                "selection_reason": f"Entity-based match ({', '.join(reasoning_parts)})",
+                "alternatives": available_models[1:3],
+                "entity_match_score": selected_model["entity_match_score"],
+                "entities_detected": entities_info,
+                "method": "entity_based"
+            }
+        except Exception as e:
+            logger.error(f"Entity-based model selection failed: {e}")
+            return None
+    async def _select_model_by_similarity(
+        self,
+        request: str,
+        service_type: str,
+        entities_info: Dict[str, Any]
+    ) -> Optional[Dict[str, Any]]:
+        """Select model using embedding similarity (enhanced with entity info)"""
+        try:
             if not self.embedding_service:
-                return self._get_default_selection(service_type, "No embedding service available")
+                return None
             request_embedding = await self.embedding_service.create_text_embedding(request)
@@ -149,10 +470,39 @@ class IntelligentModelSelector:
             candidates = await self._find_similar_models_supabase(request_embedding, service_type)
             if not candidates:
-                return self._get_default_selection(service_type, "No suitable models found")
+                return None
-            # Return best match
-            best_match = candidates[0]
+            # Boost scores for models that match extracted entities
+            enhanced_candidates = []
+            for candidate in candidates:
+                model_id = candidate["model_id"]
+                base_score = candidate["similarity"]
+                # Apply entity-based boosting
+                entity_boost = 0.0
+                # Boost based on domain match
+                for domain in entities_info.get("domains", []):
+                    if domain in model_id.lower() or any(domain in desc.lower() for desc in [candidate.get("description", "")]):
+                        entity_boost += 0.1
+                # Boost based on task match
+                for task in entities_info.get("tasks", []):
+                    if task in model_id.lower() or any(task in desc.lower() for desc in [candidate.get("description", "")]):
+                        entity_boost += 0.1
+                # Apply boost
+                enhanced_score = min(base_score + entity_boost, 1.0)
+                enhanced_candidate = candidate.copy()
+                enhanced_candidate["similarity"] = enhanced_score
+                enhanced_candidate["entity_boost"] = entity_boost
+                enhanced_candidates.append(enhanced_candidate)
+            # Re-sort by enhanced similarity
+            enhanced_candidates.sort(key=lambda x: x["similarity"], reverse=True)
+            best_match = enhanced_candidates[0]
             return {
                 "success": True,
@@ -160,14 +510,16 @@ class IntelligentModelSelector:
                     "model_id": best_match["model_id"],
                     "provider": best_match["provider"]
                 },
-                "selection_reason": f"Best similarity match (score: {best_match['similarity']:.3f})",
-                "alternatives": candidates[1:3],  # Top 2 alternatives
-                "similarity_score": best_match["similarity"]
+                "selection_reason": f"Enhanced similarity match (base: {best_match['similarity']:.3f}, entity boost: {best_match.get('entity_boost', 0):.3f})",
+                "alternatives": enhanced_candidates[1:3],
+                "similarity_score": best_match["similarity"],
+                "entities_detected": entities_info,
+                "method": "enhanced_similarity"
             }
         except Exception as e:
-            logger.error(f"Model selection failed: {e}")
-            return self._get_default_selection(service_type, f"Selection error: {e}")
+            logger.error(f"Similarity-based model selection failed: {e}")
+            return None
     async def _find_similar_models_supabase(
         self,
@@ -177,7 +529,7 @@ class IntelligentModelSelector:
         """Find similar models using Supabase and embedding service similarity"""
         try:
             # Get all model embeddings from database
-            embeddings_result = self.supabase_client.table('model_embeddings').select('*').execute()
+            embeddings_result = get_supabase_table('model_embeddings').select('*').execute()
             model_embeddings = embeddings_result.data
             if not model_embeddings:
@@ -240,15 +592,41 @@ class IntelligentModelSelector:
             "similarity_score": 0.0
         }
+    def get_rate_limit_fallback(self, service_type: str, original_provider: str = "openai") -> Dict[str, Any]:
+        """
+        Get fallback model when hitting rate limits
+        Args:
+            service_type: Type of service (text, vision, etc.)
+            original_provider: The provider that hit rate limit
+        Returns:
+            Fallback model selection result
+        """
+        if original_provider == "openai" and service_type in self.rate_limit_fallbacks:
+            fallback = self.rate_limit_fallbacks[service_type]
+            return {
+                "success": True,
+                "selected_model": fallback,
+                "selection_reason": f"Rate limit fallback from {original_provider} to {fallback['provider']}",
+                "alternatives": [],
+                "is_fallback": True,
+                "original_provider": original_provider
+            }
+        # If no specific fallback, return default
+        return self._get_default_selection(service_type, f"No fallback available for {original_provider}")
     async def get_available_models(self, service_type: Optional[str] = None) -> List[Dict[str, Any]]:
         """Get list of available models"""
         try:
             if service_type:
                 # Filter by service type
-                query = self.supabase_client.table('models').select('*').or_(f'model_type.eq.{service_type},model_type.eq.omni')
+                query = get_supabase_table('models').select('*').or_(f'model_type.eq.{service_type},model_type.eq.omni')
             else:
                 # Get all models
-                query = self.supabase_client.table('models').select('*')
+                query = get_supabase_table('models').select('*')
             result = query.order('model_id').execute()
             return result.data

isa_model/core/types.py CHANGED Viewed

@@ -188,6 +188,7 @@ class Provider(str, Enum):
     ANTHROPIC = "anthropic"
     GOOGLE = "google"
     YYDS = "yyds"
+    CEREBRAS = "cerebras"
     MODAL = "modal"
 # ===== DATA CLASSES =====

isa_model/deployment/__init__.py CHANGED Viewed

@@ -1,54 +1,11 @@
 """
-ISA Model Deployment Module
+ISA Model Deployment - Multi-provider deployment system
-Provides comprehensive deployment capabilities for AI models including:
-- Multi-cloud deployment (RunPod, AWS, GCP, Azure)
-- Multiple inference engines (Triton, vLLM, TensorRT-LLM)
-- Model optimization and containerization
-- Deployment monitoring and management
-Main Components:
-- DeploymentManager: Orchestrates complete deployment workflow
-- DeploymentConfig: Configuration classes for different deployment scenarios
-- Cloud providers: RunPod, AWS, GCP, Azure integrations
-- Inference engines: Triton, vLLM, TensorRT-LLM support
+Unified deployment architecture supporting Modal and Triton platforms.
 """
+from .modal.deployer import ModalDeployer
+from .triton.provider import TritonProvider
 from .core.deployment_manager import DeploymentManager
-from .core.deployment_config import (
-    DeploymentConfig,
-    DeploymentProvider,
-    InferenceEngine,
-    ModelConfig,
-    ModelFormat,
-    TritonConfig,
-    RunPodServerlessConfig,
-    create_gemma_runpod_triton_config,
-    create_local_triton_config
-)
-from .services import AutoDeployVisionService
-__all__ = [
-    # Main classes
-    "DeploymentManager",
-    "DeploymentConfig",
-    "AutoDeployVisionService",
-    # Configuration classes
-    "ModelConfig",
-    "TritonConfig",
-    "RunPodServerlessConfig",
-    # Enums
-    "DeploymentProvider",
-    "InferenceEngine",
-    "ModelFormat",
-    # Helper functions
-    "create_gemma_runpod_triton_config",
-    "create_local_triton_config"
-]
-# Version info
-__version__ = "0.1.0"
-__author__ = "ISA Model Team"
+__all__ = ["ModalDeployer", "TritonProvider", "DeploymentManager"]

isa_model/deployment/core/__init__.py CHANGED Viewed

@@ -1,34 +1,5 @@
-"""
-Deployment Core Module
-Contains the core deployment functionality including configuration management
-and deployment orchestration.
-"""
+"""Core deployment functionality"""
 from .deployment_manager import DeploymentManager
-from .deployment_config import (
-    DeploymentConfig,
-    DeploymentProvider,
-    InferenceEngine,
-    ModelConfig,
-    ModelFormat,
-    TritonConfig,
-    RunPodServerlessConfig,
-    create_gemma_runpod_triton_config,
-    create_local_triton_config
-)
-from .isa_deployment_service import ISADeploymentService
-__all__ = [
-    "DeploymentManager",
-    "DeploymentConfig",
-    "DeploymentProvider",
-    "InferenceEngine",
-    "ModelConfig",
-    "ModelFormat",
-    "TritonConfig",
-    "RunPodServerlessConfig",
-    "ISADeploymentService",
-    "create_gemma_runpod_triton_config",
-    "create_local_triton_config"
-]
+__all__ = ["DeploymentManager"]

isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl

isa-model 0.4.0py3-none-any.whl → 0.4.4py3-none-any.whl