PyPI - isa-model - Versions diffs - 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl - Mend

isa-model 0.3.6py3-none-any.whl → 0.3.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

isa_model/client.py +200 -6
isa_model/deployment/services/auto_deploy_vision_service.py +4 -3
isa_model/deployment/services/simple_auto_deploy_vision_service.py +275 -0
isa_model/inference/ai_factory.py +83 -3
isa_model/serving/api/routes/unified.py +72 -0
{isa_model-0.3.6.dist-info → isa_model-0.3.8.dist-info}/METADATA +1 -1
{isa_model-0.3.6.dist-info → isa_model-0.3.8.dist-info}/RECORD +9 -18
isa_model/inference/providers/__init__.py +0 -19
isa_model/inference/providers/base_provider.py +0 -77
isa_model/inference/providers/ml_provider.py +0 -50
isa_model/inference/providers/modal_provider.py +0 -109
isa_model/inference/providers/model_cache_manager.py +0 -341
isa_model/inference/providers/ollama_provider.py +0 -92
isa_model/inference/providers/openai_provider.py +0 -130
isa_model/inference/providers/replicate_provider.py +0 -119
isa_model/inference/providers/triton_provider.py +0 -439
isa_model/inference/providers/yyds_provider.py +0 -108
{isa_model-0.3.6.dist-info → isa_model-0.3.8.dist-info}/WHEEL +0 -0
{isa_model-0.3.6.dist-info → isa_model-0.3.8.dist-info}/top_level.txt +0 -0

isa_model/serving/api/routes/unified.py CHANGED Viewed

@@ -10,10 +10,12 @@ This is the main API that handles all types of AI requests:
 """
 from fastapi import APIRouter, HTTPException, UploadFile, File, Form
+from fastapi.responses import StreamingResponse
 from pydantic import BaseModel, Field
 from typing import Optional, Dict, Any, Union, List
 import logging
 import asyncio
+import json
 from pathlib import Path
 from isa_model.client import ISAModelClient
@@ -28,6 +30,7 @@ class UnifiedRequest(BaseModel):
     service_type: str = Field(..., description="Service type (text, vision, audio, image, embedding)")
     model_hint: Optional[str] = Field(None, description="Optional model preference")
     provider_hint: Optional[str] = Field(None, description="Optional provider preference")
+    stream: Optional[bool] = Field(False, description="Enable streaming for text services")
     parameters: Optional[Dict[str, Any]] = Field(default_factory=dict, description="Additional task parameters")
 class UnifiedResponse(BaseModel):
@@ -108,6 +111,75 @@ async def unified_invoke(request: UnifiedRequest) -> UnifiedResponse:
             }
         )
+@router.post("/stream")
+async def unified_stream(request: UnifiedRequest):
+    """
+    **Unified streaming endpoint for text services**
+    Returns Server-Sent Events (SSE) stream for real-time token generation.
+    Only supports text service types.
+    """
+    try:
+        # Validate streaming request
+        if request.service_type != "text":
+            raise HTTPException(status_code=400, detail="Streaming only supported for text services")
+        # Get ISA client instance (local mode)
+        client = get_isa_client()
+        async def generate_stream():
+            """Generator for SSE streaming"""
+            try:
+                # Use client's streaming method
+                stream_gen = await client.invoke(
+                    input_data=request.input_data,
+                    task=request.task,
+                    service_type=request.service_type,
+                    model_hint=request.model_hint,
+                    provider_hint=request.provider_hint,
+                    stream=True,
+                    **request.parameters
+                )
+                # Stream tokens as SSE format
+                async for token in stream_gen:
+                    # SSE format: "data: {json}\n\n"
+                    token_data = {
+                        "token": token,
+                        "type": "token"
+                    }
+                    yield f"data: {json.dumps(token_data)}\n\n"
+                # Send completion signal
+                completion_data = {
+                    "type": "completion",
+                    "status": "finished"
+                }
+                yield f"data: {json.dumps(completion_data)}\n\n"
+            except Exception as e:
+                logger.error(f"Streaming error: {e}")
+                error_data = {
+                    "type": "error",
+                    "error": str(e)
+                }
+                yield f"data: {json.dumps(error_data)}\n\n"
+        # Return SSE stream response
+        return StreamingResponse(
+            generate_stream(),
+            media_type="text/plain",
+            headers={
+                "Cache-Control": "no-cache",
+                "Connection": "keep-alive",
+                "Content-Type": "text/plain; charset=utf-8"
+            }
+        )
+    except Exception as e:
+        logger.error(f"Streaming setup failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
 @router.post("/invoke-file", response_model=UnifiedResponse)
 async def unified_invoke_file(
     task: str = Form(...),

{isa_model-0.3.6.dist-info → isa_model-0.3.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: isa_model
-Version: 0.3.6
+Version: 0.3.8
 Summary: Unified AI model serving framework
 Author: isA_Model Contributors
 Classifier: Development Status :: 3 - Alpha

{isa_model-0.3.6.dist-info → isa_model-0.3.8.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 isa_model/__init__.py,sha256=bAbHdrDVQ-mySC_GJtgfLNI8KPcs2LfReBkIOOtpaQQ,867
-isa_model/client.py,sha256=5u_hqGB1hcFX8MQdVYlCLqzs4ylQFY3rq91h3iTI24c,27500
+isa_model/client.py,sha256=oo2WcxElcD9WMtw6STj3vS9KwjI-DvS_ldqC1X-HRBs,35456
 isa_model/core/config.py,sha256=h9GVTEEMlaJYSCDd0W9q1KtaWTV5V5TawMsKtGuphds,15686
 isa_model/core/pricing_manager.py,sha256=b7HcZsBQ8ZSCzMepOhqN-J9kU43vhTxX5NabQS0aM70,17125
 isa_model/core/types.py,sha256=XLUs442WGNc8E0gF2M-nb6dutD_s-XCfpr2BfGBCA2M,8445
@@ -30,10 +30,11 @@ isa_model/deployment/gpu_int8_ds8/scripts/test_client.py,sha256=aCULgRYzEQj_ELUK
 isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py,sha256=XXrneTCHUeh1LNRcu-YtZQ5B4pNawlrxC-cTWmJU2A8,936
 isa_model/deployment/runtime/deployed_service.py,sha256=0Z_Hg42oXasEVvuKjwBylJPazcmJYXhS-L9uPainaIg,13400
 isa_model/deployment/services/__init__.py,sha256=JrLlmBlLb6RfiqGMzVVxKZfF5tAKliQqpon_rPoNoeA,216
-isa_model/deployment/services/auto_deploy_vision_service.py,sha256=Luo8FaXsEBoKjGw5HQ7veOnv9Eh0e7U0claXaGa3-1o,19624
+isa_model/deployment/services/auto_deploy_vision_service.py,sha256=bZmkNG2DWvG6DdHfHvUuf8fonygic4vI_A4aogrXzvU,19670
 isa_model/deployment/services/model_service.py,sha256=_ncC--8hr5BUwzCWh59yRXPKIPVLapx_31TorB2DIr8,13492
 isa_model/deployment/services/service_monitor.py,sha256=P1zGoeqkNEJwt9AXZF2qTjfSLRm5PKUa80GJVNDSIdA,15223
 isa_model/deployment/services/service_registry.py,sha256=LQgWQOvoP0lb7mC6WTS6shEt6WuX6xc8rRmcixrKwTc,22765
+isa_model/deployment/services/simple_auto_deploy_vision_service.py,sha256=rfXsv9mh_w5cXHVYxA4fBD5ppyNY4HplsH34xp4WpY8,9882
 isa_model/eval/__init__.py,sha256=CRbxC5SN7ow4ymdALSNTHawqw4f82DEdAb7twNT_Pw0,2447
 isa_model/eval/benchmarks.py,sha256=_L4Vwj2hwf2yhqoleIASO9z5e3LRCClCVEVCQbGt0I8,16885
 isa_model/eval/factory.py,sha256=bm5OVY7HIxdBgjlH1n7e5K1YO4ytv8e4KB7z_JS9HVQ,20737
@@ -46,19 +47,9 @@ isa_model/eval/evaluators/llm_evaluator.py,sha256=yfFJFdxwGV2F3mzEWjZ-0fr9u8SR3A
 isa_model/eval/infrastructure/__init__.py,sha256=fxTdtwAFtjCDOV9MJ3GbhY0A-DqKeTwr_u9WTBnGI_U,648
 isa_model/eval/infrastructure/experiment_tracker.py,sha256=yfMWIAk6oA8Lfer3AtmKg0OEZiGhczmsCD5gmp--uew,15283
 isa_model/inference/__init__.py,sha256=usfuQJ4zYY2RRtHkE-V6LuJ5aN7WJogtPUj9Qmy4Wvw,318
-isa_model/inference/ai_factory.py,sha256=IN-q3jNmcraZ-PWNTHyhdABoxxhIweZYcXO2fr_uXoM,16478
+isa_model/inference/ai_factory.py,sha256=oGtRd4wp6IZOTyI3GVKBNN4AtlnrLS7yFZuq2wvkaUg,19784
 isa_model/inference/base.py,sha256=qwOddnSGI0GUdD6qIdGBPQpkW7UjU3Y-zaZvu70B4WA,1278
 isa_model/inference/adapter/unified_api.py,sha256=67_Ok8W20m6Otf6r9WyOEVpnxondP4UAxOASk9ozDk4,8668
-isa_model/inference/providers/__init__.py,sha256=a83q-LMFv8u47wf0XtxvqOw_mlVgA_90wtuwy02qdDE,581
-isa_model/inference/providers/base_provider.py,sha256=PT-YnGwBu-Jn_4T3iAphkAJw_mYmKVLjUID62vf2_Ow,2711
-isa_model/inference/providers/ml_provider.py,sha256=4oGGF7lVWQ91Qh3h7olyPFoACLxCROaMxUZlDiZrRL4,1661
-isa_model/inference/providers/modal_provider.py,sha256=klRYXECD5TJicodHIElsGNGMAsAWRFhvn4yfCRcqdVs,3654
-isa_model/inference/providers/model_cache_manager.py,sha256=dLRpx7OJweQ5LcSAkU7D0DQRfLtIhG6nGvg4W_gau80,15315
-isa_model/inference/providers/ollama_provider.py,sha256=IfM9XhdzfE1faguzS2-4GfhK30v5kDPecD3l4z2eB1w,3620
-isa_model/inference/providers/openai_provider.py,sha256=tB8FMsMivlRx0cfPJ0Yrxh1DCvuXyyjNFXrO4lMkkhA,5366
-isa_model/inference/providers/replicate_provider.py,sha256=0oi_BglIE6-HYgzLau9ifP8OdpAMO-QkwYk0OXRUzPk,4490
-isa_model/inference/providers/triton_provider.py,sha256=GKlth7cTOx6ERbsXXJ0gDNby3kVGQNULBDt098BXBSU,15258
-isa_model/inference/providers/yyds_provider.py,sha256=KbDsopShs11_G9oX3b2i2NgHIqkZV7HYWe9K9uZLccc,4284
 isa_model/inference/services/__init__.py,sha256=yfLz0YGl8ixk6LfTRL6cRTvZMb9F_Pv1QRgGyNc9xYM,386
 isa_model/inference/services/base_service.py,sha256=fVaSx0CogHK71UEsNJeSyM8mhqmq5_9ePbbSZVi3Al8,5085
 isa_model/inference/services/audio/base_stt_service.py,sha256=sfzAfreFdvEOBHtphoTrQSjb-gCoCOW4WCj6iIe51oU,5804
@@ -117,7 +108,7 @@ isa_model/serving/api/routes/__init__.py,sha256=RIaG9OPg0AjAIVbtMzwnqGyNU-tuQXbd
 isa_model/serving/api/routes/health.py,sha256=NwQcC_bpcaI4YZHTIKbGtg82yQ6QLdp0TwcqbEiqbWs,2208
 isa_model/serving/api/routes/llm.py,sha256=5ZVxWugff0i6VBKz_Nv5CqacMZJsPZEKyoSB6XDrW34,385
 isa_model/serving/api/routes/ui_analysis.py,sha256=-WxLaRKQNHnRh4okB85cWA4blTegpEPZtzHTsF3yeeU,6848
-isa_model/serving/api/routes/unified.py,sha256=rSoHPtMWkGlzFwzzoZeFlCmFGWj2r3q-5QH9VeNQQxA,7074
+isa_model/serving/api/routes/unified.py,sha256=r6O_X9ql2EDqiTWaWz_anPERwfzNnF9ZvSdjqht8WxE,9727
 isa_model/serving/api/routes/vision.py,sha256=U9jxssQYe6igtayUW0C2fcYwqmLRIE15__X-5Ru9J4c,396
 isa_model/serving/api/schemas/__init__.py,sha256=Tu_hzxoKW1ZHpww3-5ER4A2hNuDByZ0rAfrgaJ7Bs-M,275
 isa_model/serving/api/schemas/common.py,sha256=HVaAS7wlvqrwC1gMZ2Cvo0vzHB053x2uOTAwUoY2vsE,696
@@ -141,7 +132,7 @@ isa_model/training/core/config.py,sha256=oqgKpBvtzrN6jwLIQYQ2707lH6nmjrktRiSxp9i
 isa_model/training/core/dataset.py,sha256=XCFsnf0NUMU1dJpdvo_CAMyvXB-9_RCUEiy8TU50e20,7802
 isa_model/training/core/trainer.py,sha256=h5TjqjdFr0Fsv5y4-0siy1KmOlqLfliVaUXybvuoeXU,26932
 isa_model/training/core/utils.py,sha256=Nik0M2ssfNbWqP6fKO0Kfyhzr_H6Q19ioxB-qCYbn5E,8387
-isa_model-0.3.6.dist-info/METADATA,sha256=TMGcK76gGTCDWcXfCp17JuAoWxVN4TfVv5Nu-8mN8JE,12326
-isa_model-0.3.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-isa_model-0.3.6.dist-info/top_level.txt,sha256=eHSy_Xb3kNkh2kK11mi1mZh0Wz91AQ5b8k2KFYO-rE8,10
-isa_model-0.3.6.dist-info/RECORD,,
+isa_model-0.3.8.dist-info/METADATA,sha256=rQ6RLtoPX1xoT2BFeRQZTx9gRSeCjRxuTbH3xseKbdE,12326
+isa_model-0.3.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+isa_model-0.3.8.dist-info/top_level.txt,sha256=eHSy_Xb3kNkh2kK11mi1mZh0Wz91AQ5b8k2KFYO-rE8,10
+isa_model-0.3.8.dist-info/RECORD,,

isa_model/inference/providers/__init__.py DELETED Viewed

@@ -1,19 +0,0 @@
-"""
-Providers - Components for integrating with different model providers
-File: isa_model/inference/providers/__init__.py
-This module contains provider implementations for different AI model backends.
-"""
-from .base_provider import BaseProvider
-__all__ = [
-    "BaseProvider",
-]
-# Provider implementations can be imported individually as needed
-# from .triton_provider import TritonProvider
-# from .ollama_provider import OllamaProvider
-# from .yyds_provider import YYDSProvider
-# from .openai_provider import OpenAIProvider
-# from .replicate_provider import ReplicateProvider

isa_model/inference/providers/base_provider.py DELETED Viewed

@@ -1,77 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Dict, List, Any, Optional
-import os
-import logging
-from pathlib import Path
-import dotenv
-from isa_model.inference.base import ModelType, Capability
-logger = logging.getLogger(__name__)
-class BaseProvider(ABC):
-    """Base class for all AI providers - handles API key management"""
-    def __init__(self, config: Optional[Dict[str, Any]] = None):
-        self.config = config or {}
-        self._load_environment_config()
-        self._validate_config()
-    def _load_environment_config(self):
-        """Load configuration from environment variables"""
-        # Load .env file if it exists
-        project_root = Path(__file__).parent.parent.parent.parent
-        env_path = project_root / ".env"
-        if env_path.exists():
-            dotenv.load_dotenv(env_path)
-        # Subclasses should override this to load provider-specific env vars
-        self._load_provider_env_vars()
-    @abstractmethod
-    def _load_provider_env_vars(self):
-        """Load provider-specific environment variables"""
-        pass
-    def _validate_config(self):
-        """Validate that required configuration is present"""
-        # Subclasses should override this to validate provider-specific config
-        pass
-    def get_api_key(self) -> Optional[str]:
-        """Get the API key for this provider"""
-        return self.config.get("api_key")
-    def has_valid_credentials(self) -> bool:
-        """Check if provider has valid credentials"""
-        return bool(self.get_api_key())
-    @abstractmethod
-    def get_capabilities(self) -> Dict[ModelType, List[Capability]]:
-        """Get provider capabilities by model type"""
-        pass
-    @abstractmethod
-    def get_models(self, model_type: ModelType) -> List[str]:
-        """Get available models for given type"""
-        pass
-    def get_config(self) -> Dict[str, Any]:
-        """Get provider configuration (without sensitive data)"""
-        # Return a copy without sensitive information
-        config_copy = self.config.copy()
-        if "api_key" in config_copy:
-            config_copy["api_key"] = "***" if config_copy["api_key"] else ""
-        if "api_token" in config_copy:
-            config_copy["api_token"] = "***" if config_copy["api_token"] else ""
-        return config_copy
-    def get_full_config(self) -> Dict[str, Any]:
-        """Get full provider configuration (including sensitive data) - for internal use only"""
-        return self.config.copy()
-    @abstractmethod
-    def is_reasoning_model(self, model_name: str) -> bool:
-        """Check if the model is optimized for reasoning tasks"""
-        pass

isa_model/inference/providers/ml_provider.py DELETED Viewed

@@ -1,50 +0,0 @@
-from isa_model.inference.providers.base_provider import BaseProvider
-from isa_model.inference.base import ModelType, Capability
-from typing import Dict, List, Any
-import logging
-logger = logging.getLogger(__name__)
-class MLProvider(BaseProvider):
-    """Provider for traditional ML models"""
-    def __init__(self, config=None):
-        default_config = {
-            "model_directory": "./models/ml",
-            "cache_models": True,
-            "max_cache_size": 5
-        }
-        merged_config = {**default_config, **(config or {})}
-        super().__init__(config=merged_config)
-        self.name = "ml"
-        logger.info(f"Initialized MLProvider with model directory: {self.config['model_directory']}")
-    def get_capabilities(self) -> Dict[ModelType, List[Capability]]:
-        """Get provider capabilities"""
-        return {
-            ModelType.LLM: [],  # ML models are not LLMs
-            ModelType.EMBEDDING: [],
-            ModelType.VISION: [],
-            "ML": [  # Custom model type for traditional ML
-                "CLASSIFICATION",
-                "REGRESSION",
-                "CLUSTERING",
-                "FEATURE_EXTRACTION"
-            ]
-        }
-    def get_models(self, model_type: str = "ML") -> List[str]:
-        """Get available ML models"""
-        # In practice, this would scan the model directory
-        return [
-            "fraud_detection_rf",
-            "customer_churn_xgb",
-            "price_prediction_lr",
-            "recommendation_kmeans"
-        ]
-    def get_config(self) -> Dict[str, Any]:
-        """Get provider configuration"""
-        return self.config

isa_model/inference/providers/modal_provider.py DELETED Viewed

@@ -1,109 +0,0 @@
-"""
-Modal Provider
-Provider for ISA self-hosted Modal services
-No API keys needed since we deploy our own services
-"""
-import os
-import logging
-from typing import Dict, Any, Optional, List
-from .base_provider import BaseProvider
-from isa_model.inference.base import ModelType, Capability
-logger = logging.getLogger(__name__)
-class ModalProvider(BaseProvider):
-    """Provider for ISA Modal services"""
-    def __init__(self, config: Optional[Dict[str, Any]] = None):
-        super().__init__(config)
-        self.name = "modal"
-        self.base_url = "https://modal.com"  # Not used directly
-    def _load_provider_env_vars(self):
-        """Load Modal-specific environment variables"""
-        # Modal doesn't need API keys for deployed services
-        # But we can load Modal token if available
-        modal_token = os.getenv("MODAL_TOKEN_ID") or os.getenv("MODAL_TOKEN_SECRET")
-        if modal_token:
-            self.config["modal_token"] = modal_token
-        # Set default config
-        if "timeout" not in self.config:
-            self.config["timeout"] = 300
-        if "deployment_region" not in self.config:
-            self.config["deployment_region"] = "us-east-1"
-        if "gpu_type" not in self.config:
-            self.config["gpu_type"] = "T4"
-    def get_api_key(self) -> str:
-        """Modal services don't need API keys for deployed apps"""
-        return "modal-deployed-service"  # Placeholder
-    def get_base_url(self) -> str:
-        """Get base URL for Modal services"""
-        return self.base_url
-    def validate_credentials(self) -> bool:
-        """
-        Validate Modal credentials
-        For deployed services, we assume they're accessible
-        """
-        try:
-            # Check if Modal is available
-            import modal
-            return True
-        except ImportError:
-            logger.warning("Modal package not available")
-            return False
-    def get_capabilities(self) -> Dict[ModelType, List[Capability]]:
-        """Get Modal provider capabilities"""
-        return {
-            ModelType.VISION: [
-                Capability.OBJECT_DETECTION,
-                Capability.IMAGE_ANALYSIS,
-                Capability.UI_DETECTION,
-                Capability.OCR,
-                Capability.DOCUMENT_ANALYSIS
-            ]
-        }
-    def get_models(self, model_type: ModelType) -> List[str]:
-        """Get available models for given type"""
-        if model_type == ModelType.VISION:
-            return [
-                "omniparser-v2.0",
-                "table-transformer-detection",
-                "table-transformer-structure-v1.1",
-                "paddleocr-3.0",
-                "yolov8"
-            ]
-        return []
-    def is_reasoning_model(self, model_name: str) -> bool:
-        """Check if the model is optimized for reasoning tasks"""
-        # Vision models are not reasoning models
-        return False
-    def get_default_config(self) -> Dict[str, Any]:
-        """Get default configuration for Modal services"""
-        return {
-            "timeout": 300,  # 5 minutes
-            "max_retries": 3,
-            "deployment_region": "us-east-1",
-            "gpu_type": "T4"
-        }
-    def get_billing_info(self) -> Dict[str, Any]:
-        """Get billing information for Modal services"""
-        return {
-            "provider": "modal",
-            "billing_model": "compute_usage",
-            "cost_per_hour": {
-                "T4": 0.60,
-                "A100": 4.00
-            },
-            "note": "Costs depend on actual usage time, scales to zero when not in use"
-        }

isa-model 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl

isa-model 0.3.6py3-none-any.whl → 0.3.8py3-none-any.whl