PyPI - isa-model - Versions diffs - 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl - Mend

isa-model 0.3.6py3-none-any.whl → 0.3.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

isa_model/client.py +200 -6
isa_model/deployment/services/auto_deploy_vision_service.py +4 -3
isa_model/deployment/services/simple_auto_deploy_vision_service.py +275 -0
isa_model/inference/ai_factory.py +83 -3
isa_model/serving/api/routes/unified.py +72 -0
{isa_model-0.3.6.dist-info → isa_model-0.3.8.dist-info}/METADATA +1 -1
{isa_model-0.3.6.dist-info → isa_model-0.3.8.dist-info}/RECORD +9 -18
isa_model/inference/providers/__init__.py +0 -19
isa_model/inference/providers/base_provider.py +0 -77
isa_model/inference/providers/ml_provider.py +0 -50
isa_model/inference/providers/modal_provider.py +0 -109
isa_model/inference/providers/model_cache_manager.py +0 -341
isa_model/inference/providers/ollama_provider.py +0 -92
isa_model/inference/providers/openai_provider.py +0 -130
isa_model/inference/providers/replicate_provider.py +0 -119
isa_model/inference/providers/triton_provider.py +0 -439
isa_model/inference/providers/yyds_provider.py +0 -108
{isa_model-0.3.6.dist-info → isa_model-0.3.8.dist-info}/WHEEL +0 -0
{isa_model-0.3.6.dist-info → isa_model-0.3.8.dist-info}/top_level.txt +0 -0

isa_model/client.py CHANGED Viewed

@@ -89,6 +89,47 @@ class ISAModelClient:
         logger.info("ISA Model Client initialized")
+    async def stream(
+        self,
+        input_data: Union[str, bytes, Path, Dict[str, Any]],
+        task: str,
+        service_type: str,
+        model_hint: Optional[str] = None,
+        provider_hint: Optional[str] = None,
+        **kwargs
+    ):
+        """
+        Streaming invoke method that yields tokens in real-time
+        Args:
+            input_data: Input data (text for LLM streaming)
+            task: Task to perform
+            service_type: Type of service (only "text" supports streaming)
+            model_hint: Optional model preference
+            provider_hint: Optional provider preference
+            **kwargs: Additional parameters
+        Yields:
+            Individual tokens as they arrive from the model
+        Example:
+            async for token in client.stream("Hello world", "chat", "text"):
+                print(token, end="", flush=True)
+        """
+        if service_type != "text":
+            raise ValueError("Streaming is only supported for text/LLM services")
+        try:
+            if self.mode == "api":
+                async for token in self._stream_api(input_data, task, service_type, model_hint, provider_hint, **kwargs):
+                    yield token
+            else:
+                async for token in self._stream_local(input_data, task, service_type, model_hint, provider_hint, **kwargs):
+                    yield token
+        except Exception as e:
+            logger.error(f"Failed to stream {task} on {service_type}: {e}")
+            raise
     async def invoke(
         self,
         input_data: Union[str, bytes, Path, Dict[str, Any]],
@@ -96,8 +137,10 @@ class ISAModelClient:
         service_type: str,
         model_hint: Optional[str] = None,
         provider_hint: Optional[str] = None,
+        stream: bool = False,
+        tools: Optional[List[Any]] = None,
         **kwargs
-    ) -> Dict[str, Any]:
+    ) -> Union[Dict[str, Any], object]:
         """
         Unified invoke method with intelligent model selection
@@ -107,10 +150,13 @@ class ISAModelClient:
             service_type: Type of service (vision, audio, text, image, embedding)
             model_hint: Optional model preference
             provider_hint: Optional provider preference
+            stream: Enable streaming for text services (returns AsyncGenerator)
+            tools: Optional list of tools for function calling (only for text services)
             **kwargs: Additional task-specific parameters
         Returns:
-            Unified response dictionary with result and metadata
+            If stream=False: Unified response dictionary with result and metadata
+            If stream=True: AsyncGenerator yielding tokens (only for text services)
         Examples:
             # Vision tasks
@@ -126,6 +172,17 @@ class ISAModelClient:
             await client.invoke("Translate this text", "translate", "text")
             await client.invoke("What is AI?", "chat", "text")
+            # Streaming text
+            async for token in await client.invoke("Hello", "chat", "text", stream=True):
+                print(token, end="", flush=True)
+            # Text with tools
+            await client.invoke("What's 5+3?", "chat", "text", tools=[calculator_function])
+            # Streaming with tools
+            async for token in await client.invoke("What's 5+3?", "chat", "text", stream=True, tools=[calculator_function]):
+                print(token, end="")
             # Image generation
             await client.invoke("A beautiful sunset", "generate_image", "image")
@@ -133,7 +190,33 @@ class ISAModelClient:
             await client.invoke("Text to embed", "create_embedding", "embedding")
         """
         try:
-            # Route to appropriate mode
+            # Handle streaming case
+            if stream:
+                if service_type != "text":
+                    raise ValueError("Streaming is only supported for text services")
+                if self.mode == "api":
+                    return self._stream_api(
+                        input_data=input_data,
+                        task=task,
+                        service_type=service_type,
+                        model_hint=model_hint,
+                        provider_hint=provider_hint,
+                        tools=tools,
+                        **kwargs
+                    )
+                else:
+                    return self._stream_local(
+                        input_data=input_data,
+                        task=task,
+                        service_type=service_type,
+                        model_hint=model_hint,
+                        provider_hint=provider_hint,
+                        tools=tools,
+                        **kwargs
+                    )
+            # Route to appropriate mode for non-streaming
             if self.mode == "api":
                 return await self._invoke_api(
                     input_data=input_data,
@@ -141,6 +224,7 @@ class ISAModelClient:
                     service_type=service_type,
                     model_hint=model_hint,
                     provider_hint=provider_hint,
+                    tools=tools,
                     **kwargs
                 )
             else:
@@ -150,6 +234,7 @@ class ISAModelClient:
                     service_type=service_type,
                     model_hint=model_hint,
                     provider_hint=provider_hint,
+                    tools=tools,
                     **kwargs
                 )
@@ -277,7 +362,8 @@ class ISAModelClient:
         service_type: str,
         model_name: str,
         provider: str,
-        task: str
+        task: str,
+        tools: Optional[List[Any]] = None
     ) -> Any:
         """Get appropriate service instance"""
@@ -285,7 +371,11 @@ class ISAModelClient:
         # Check cache first
         if cache_key in self._service_cache:
-            return self._service_cache[cache_key]
+            service = self._service_cache[cache_key]
+            # If tools are needed, bind them to the service
+            if tools and service_type == "text":
+                return service.bind_tools(tools)
+            return service
         try:
             # Route to appropriate AIFactory method
@@ -315,6 +405,11 @@ class ISAModelClient:
             # Cache the service
             self._service_cache[cache_key] = service
+            # If tools are needed, bind them to the service
+            if tools and service_type == "text":
+                return service.bind_tools(tools)
             return service
         except Exception as e:
@@ -544,6 +639,7 @@ class ISAModelClient:
         service_type: str,
         model_hint: Optional[str] = None,
         provider_hint: Optional[str] = None,
+        tools: Optional[List[Any]] = None,
         **kwargs
     ) -> Dict[str, Any]:
         """Local invoke using AI Factory (original logic)"""
@@ -562,7 +658,8 @@ class ISAModelClient:
                 service_type=service_type,
                 model_name=selected_model["model_id"],
                 provider=selected_model["provider"],
-                task=task
+                task=task,
+                tools=tools
             )
             # Step 3: Execute task with unified interface
@@ -744,6 +841,103 @@ class ISAModelClient:
                 logger.error(f"API binary upload failed: {e}")
                 raise
+    async def _stream_local(
+        self,
+        input_data: Union[str, bytes, Path, Dict[str, Any]],
+        task: str,
+        service_type: str,
+        model_hint: Optional[str] = None,
+        provider_hint: Optional[str] = None,
+        tools: Optional[List[Any]] = None,
+        **kwargs
+    ):
+        """Local streaming using AI Factory"""
+        # Step 1: Select best model for this task
+        selected_model = await self._select_model(
+            input_data=input_data,
+            task=task,
+            service_type=service_type,
+            model_hint=model_hint,
+            provider_hint=provider_hint
+        )
+        # Step 2: Get appropriate service
+        service = await self._get_service(
+            service_type=service_type,
+            model_name=selected_model["model_id"],
+            provider=selected_model["provider"],
+            task=task,
+            tools=tools
+        )
+        # Step 3: Yield tokens from the stream
+        async for token in service.astream(input_data):
+            yield token
+    async def _stream_api(
+        self,
+        input_data: Union[str, bytes, Path, Dict[str, Any]],
+        task: str,
+        service_type: str,
+        model_hint: Optional[str] = None,
+        provider_hint: Optional[str] = None,
+        **kwargs
+    ):
+        """API streaming using Server-Sent Events (SSE)"""
+        # Only support text streaming for now
+        if not isinstance(input_data, (str, dict)):
+            raise ValueError("API streaming only supports text input")
+        payload = {
+            "input_data": input_data,
+            "task": task,
+            "service_type": service_type,
+            "model_hint": model_hint,
+            "provider_hint": provider_hint,
+            "stream": True,
+            "parameters": kwargs
+        }
+        async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=300)) as session:
+            try:
+                async with session.post(
+                    f"{self.api_url}/api/v1/stream",
+                    json=payload,
+                    headers=self.headers
+                ) as response:
+                    if response.status == 200:
+                        # Parse SSE stream
+                        async for line in response.content:
+                            if line:
+                                line_str = line.decode().strip()
+                                if line_str.startswith("data: "):
+                                    try:
+                                        # Parse SSE data
+                                        import json
+                                        json_str = line_str[6:]  # Remove "data: " prefix
+                                        data = json.loads(json_str)
+                                        if data.get("type") == "token" and "token" in data:
+                                            yield data["token"]
+                                        elif data.get("type") == "completion":
+                                            # End of stream
+                                            break
+                                        elif data.get("type") == "error":
+                                            raise Exception(f"Server error: {data.get('error')}")
+                                    except json.JSONDecodeError:
+                                        # Skip malformed lines
+                                        continue
+                    else:
+                        error_data = await response.text()
+                        raise Exception(f"API streaming error {response.status}: {error_data}")
+            except Exception as e:
+                logger.error(f"API streaming failed: {e}")
+                raise
 # Convenience function for quick access
 def create_client(

isa_model/deployment/services/auto_deploy_vision_service.py CHANGED Viewed

@@ -19,10 +19,11 @@ class AutoDeployVisionService(BaseVisionService):
     of Modal services for ISA vision tasks.
     """
-    def __init__(self, provider_name: str = "modal", model_name: str = "qwen_table", **kwargs):
-        # Use centralized architecture
-        super().__init__(provider_name, model_name, **kwargs)
+    def __init__(self, model_name: str = "isa_vision_table", config: dict = None, **kwargs):
+        # Initialize BaseVisionService with modal provider
+        super().__init__("modal", model_name, **kwargs)
         self.model_name = model_name
+        self.config = config or {}
         self.underlying_service = None
         self._factory = None

isa_model/deployment/services/simple_auto_deploy_vision_service.py ADDED Viewed

@@ -0,0 +1,275 @@
+#!/usr/bin/env python3
+"""
+Simple Auto-Deploy Vision Service Wrapper
+A simplified version that avoids complex import dependencies.
+"""
+import asyncio
+import subprocess
+import logging
+import time
+from typing import Dict, Any, Optional, Union, List, BinaryIO
+from pathlib import Path
+logger = logging.getLogger(__name__)
+class SimpleAutoDeployVisionService:
+    """
+    Simplified vision service wrapper that handles automatic deployment
+    of Modal services for ISA vision tasks without complex inheritance.
+    """
+    def __init__(self, model_name: str = "isa_vision_ui", config: dict = None):
+        self.model_name = model_name
+        self.config = config or {}
+        self.underlying_service = None
+        self._factory = None
+        self._modal_deployed = False
+        logger.info(f"Initialized SimpleAutoDeployVisionService for {model_name}")
+    def _get_factory(self):
+        """Get AIFactory instance for service management"""
+        if not self._factory:
+            from isa_model.inference.ai_factory import AIFactory
+            self._factory = AIFactory()
+        return self._factory
+    async def _ensure_service_deployed(self) -> bool:
+        """Ensure the Modal service is deployed before use"""
+        if self._modal_deployed:
+            logger.info(f"Service {self.model_name} already deployed")
+            return True
+        try:
+            factory = self._get_factory()
+            # Check if service is available
+            app_name = factory._get_modal_app_name(self.model_name)
+            if not factory._check_modal_service_availability(app_name):
+                logger.info(f"Deploying {self.model_name} service...")
+                success = factory._auto_deploy_modal_service(self.model_name)
+                if not success:
+                    logger.error(f"Failed to deploy {self.model_name}")
+                    return False
+                # Wait for service to be ready
+                logger.info(f"Waiting for {self.model_name} service to be ready...")
+                await self._wait_for_service_ready(app_name)
+            # Mark as deployed
+            self._modal_deployed = True
+            # Initialize underlying service using proper factory method
+            if not self.underlying_service:
+                # Create a simple mock service for testing
+                self.underlying_service = MockModalVisionService(self.model_name)
+            return True
+        except Exception as e:
+            logger.error(f"Failed to ensure service deployment: {e}")
+            return False
+    async def _wait_for_service_ready(self, app_name: str, max_wait_time: int = 300):
+        """Wait for Modal service to be ready"""
+        logger.info(f"Waiting up to {max_wait_time} seconds for {app_name} to be ready...")
+        start_time = time.time()
+        while time.time() - start_time < max_wait_time:
+            try:
+                # Simple wait simulation
+                await asyncio.sleep(5)
+                logger.info(f"Still waiting for {app_name}... ({int(time.time() - start_time)}s elapsed)")
+                # For testing, assume service is ready after 10 seconds
+                if time.time() - start_time > 10:
+                    logger.info(f"Service {app_name} assumed ready for testing!")
+                    return
+            except Exception as e:
+                logger.debug(f"Service not ready yet: {e}")
+        logger.warning(f"Service {app_name} may not be fully ready after {max_wait_time}s")
+    async def detect_ui_elements(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
+        """Detect UI elements with auto-deploy"""
+        # Ensure service is deployed
+        if not await self._ensure_service_deployed():
+            return {
+                'success': False,
+                'error': f'Failed to deploy {self.model_name} service',
+                'service': self.model_name
+            }
+        try:
+            # Call the underlying service (mock for testing)
+            logger.info(f"Calling UI detection service for {self.model_name}")
+            result = await self.underlying_service.detect_ui_elements(image)
+            return result
+        except Exception as e:
+            logger.error(f"UI detection failed: {e}")
+            return {
+                'success': False,
+                'error': str(e),
+                'service': self.model_name
+            }
+    async def analyze_image(
+        self,
+        image: Union[str, BinaryIO],
+        prompt: Optional[str] = None,
+        max_tokens: int = 1000
+    ) -> Dict[str, Any]:
+        """Analyze image with auto-deploy"""
+        if not await self._ensure_service_deployed():
+            return {
+                'success': False,
+                'error': f'Failed to deploy {self.model_name} service',
+                'service': self.model_name
+            }
+        try:
+            result = await self.underlying_service.analyze_image(image, prompt, max_tokens)
+            return result
+        except Exception as e:
+            logger.error(f"Image analysis failed: {e}")
+            return {
+                'success': False,
+                'error': str(e),
+                'service': self.model_name
+            }
+    async def invoke(
+        self,
+        image: Union[str, BinaryIO],
+        prompt: Optional[str] = None,
+        task: Optional[str] = None,
+        **kwargs
+    ) -> Dict[str, Any]:
+        """Unified invoke method for all vision operations"""
+        if not await self._ensure_service_deployed():
+            return {
+                'success': False,
+                'error': f'Failed to deploy {self.model_name} service',
+                'service': self.model_name
+            }
+        try:
+            # Route to appropriate method based on task
+            if task == "detect_ui_elements" or task == "ui_detection":
+                return await self.detect_ui_elements(image)
+            elif task == "analyze" or task is None:
+                return await self.analyze_image(image, prompt, kwargs.get("max_tokens", 1000))
+            else:
+                return await self.underlying_service.invoke(image, prompt, task, **kwargs)
+        except Exception as e:
+            logger.error(f"Vision invoke failed: {e}")
+            return {
+                'success': False,
+                'error': str(e),
+                'service': self.model_name
+            }
+    def get_supported_formats(self) -> List[str]:
+        """Get list of supported image formats"""
+        return ['jpg', 'jpeg', 'png', 'gif', 'webp']
+    def get_max_image_size(self) -> Dict[str, int]:
+        """Get maximum supported image dimensions"""
+        return {"width": 2048, "height": 2048, "file_size_mb": 10}
+    async def close(self):
+        """Cleanup resources"""
+        if self.underlying_service:
+            await self.underlying_service.close()
+        logger.info(f"Closed {self.model_name} service")
+class MockModalVisionService:
+    """Mock Modal vision service for testing"""
+    def __init__(self, model_name: str):
+        self.model_name = model_name
+        logger.info(f"Initialized mock service for {model_name}")
+    async def detect_ui_elements(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
+        """Mock UI element detection"""
+        await asyncio.sleep(0.1)  # Simulate processing time
+        # Return mock UI elements based on model type
+        if "ui" in self.model_name:
+            ui_elements = [
+                {
+                    'id': 'ui_0',
+                    'type': 'button',
+                    'content': 'Search Button',
+                    'center': [400, 200],
+                    'bbox': [350, 180, 450, 220],
+                    'confidence': 0.95,
+                    'interactable': True
+                },
+                {
+                    'id': 'ui_1',
+                    'type': 'input',
+                    'content': 'Search Input',
+                    'center': [300, 150],
+                    'bbox': [200, 130, 400, 170],
+                    'confidence': 0.88,
+                    'interactable': True
+                }
+            ]
+        else:
+            ui_elements = []
+        return {
+            'success': True,
+            'service': self.model_name,
+            'ui_elements': ui_elements,
+            'element_count': len(ui_elements),
+            'processing_time': 0.1,
+            'detection_method': 'mock_omniparser',
+            'model_info': {
+                'primary': 'Mock OmniParser v2.0',
+                'gpu': 'T4',
+                'container_id': 'mock-container'
+            }
+        }
+    async def analyze_image(
+        self,
+        image: Union[str, BinaryIO],
+        prompt: Optional[str] = None,
+        max_tokens: int = 1000
+    ) -> Dict[str, Any]:
+        """Mock image analysis"""
+        await asyncio.sleep(0.1)
+        return {
+            'success': True,
+            'service': self.model_name,
+            'text': f'Mock analysis of image with prompt: {prompt}',
+            'confidence': 0.9,
+            'processing_time': 0.1
+        }
+    async def invoke(
+        self,
+        image: Union[str, BinaryIO],
+        prompt: Optional[str] = None,
+        task: Optional[str] = None,
+        **kwargs
+    ) -> Dict[str, Any]:
+        """Mock invoke method"""
+        if task == "detect_ui_elements":
+            return await self.detect_ui_elements(image)
+        else:
+            return await self.analyze_image(image, prompt, kwargs.get("max_tokens", 1000))
+    async def close(self):
+        """Mock cleanup"""
+        pass

isa_model/inference/ai_factory.py CHANGED Viewed

@@ -123,9 +123,9 @@ class AIFactory:
         # Handle special ISA vision services
         if model_name in ["isa_vision_table", "isa_vision_ui", "isa_vision_doc"]:
             try:
-                from isa_model.inference.services.vision.auto_deploy_vision_service import AutoDeployVisionService
+                from isa_model.deployment.services.simple_auto_deploy_vision_service import SimpleAutoDeployVisionService
                 logger.info(f"Creating auto-deploy service wrapper for {model_name}")
-                return AutoDeployVisionService(model_name, config)
+                return SimpleAutoDeployVisionService(model_name, config)
             except Exception as e:
                 logger.error(f"Failed to create ISA vision service: {e}")
                 raise
@@ -347,4 +347,84 @@ class AIFactory:
         """Get the singleton instance"""
         if cls._instance is None:
             cls._instance = cls()
-        return cls._instance
+        return cls._instance
+    # Modal service deployment methods for AutoDeployVisionService
+    def _get_modal_app_name(self, model_name: str) -> str:
+        """Get Modal app name for a given model"""
+        app_mapping = {
+            "isa_vision_table": "qwen-vision-table",
+            "isa_vision_ui": "isa-vision-ui",
+            "isa_vision_doc": "isa-vision-doc"
+        }
+        return app_mapping.get(model_name, f"unknown-{model_name}")
+    def _check_modal_service_availability(self, app_name: str) -> bool:
+        """Check if Modal service is available and running"""
+        try:
+            import modal
+            # Try to lookup the app
+            app = modal.App.lookup(app_name)
+            return True
+        except Exception as e:
+            logger.debug(f"Modal service {app_name} not available: {e}")
+            return False
+    def _auto_deploy_modal_service(self, model_name: str) -> bool:
+        """Auto-deploy Modal service for given model"""
+        try:
+            import subprocess
+            import os
+            from pathlib import Path
+            # Get the Modal service file path
+            service_files = {
+                "isa_vision_table": "isa_vision_table_service.py",
+                "isa_vision_ui": "isa_vision_ui_service.py",
+                "isa_vision_doc": "isa_vision_doc_service.py"
+            }
+            if model_name not in service_files:
+                logger.error(f"No Modal service file found for {model_name}")
+                return False
+            # Get the service file path
+            service_file = service_files[model_name]
+            modal_dir = Path(__file__).parent.parent / "deployment" / "cloud" / "modal"
+            service_path = modal_dir / service_file
+            if not service_path.exists():
+                logger.error(f"Modal service file not found: {service_path}")
+                return False
+            logger.info(f"Deploying Modal service: {service_file}")
+            # Run modal deploy command
+            result = subprocess.run(
+                ["modal", "deploy", str(service_path)],
+                capture_output=True,
+                text=True,
+                timeout=600,  # 10 minute timeout
+                cwd=str(modal_dir)
+            )
+            if result.returncode == 0:
+                logger.info(f"Successfully deployed {model_name} Modal service")
+                return True
+            else:
+                logger.error(f"Failed to deploy {model_name}: {result.stderr}")
+                return False
+        except subprocess.TimeoutExpired:
+            logger.error(f"Deployment timeout for {model_name}")
+            return False
+        except Exception as e:
+            logger.error(f"Exception during {model_name} deployment: {e}")
+            return False
+    def _shutdown_modal_service(self, model_name: str):
+        """Shutdown Modal service (optional - Modal handles auto-scaling)"""
+        # Modal services auto-scale to zero, so explicit shutdown isn't required
+        # This method is here for compatibility with AutoDeployVisionService
+        logger.info(f"Modal service {model_name} will auto-scale to zero when idle")
+        pass

isa-model 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl

isa-model 0.3.6py3-none-any.whl → 0.3.8py3-none-any.whl