PyPI - isa-model - Versions diffs - 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl - Mend

isa-model 0.3.0py3-none-any.whl → 0.3.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

isa_model/core/model_manager.py +69 -4
isa_model/inference/ai_factory.py +335 -46
isa_model/inference/billing_tracker.py +406 -0
isa_model/inference/providers/base_provider.py +51 -4
isa_model/inference/providers/ollama_provider.py +37 -18
isa_model/inference/providers/openai_provider.py +65 -36
isa_model/inference/providers/replicate_provider.py +42 -30
isa_model/inference/services/audio/base_stt_service.py +21 -2
isa_model/inference/services/audio/openai_realtime_service.py +353 -0
isa_model/inference/services/audio/openai_stt_service.py +252 -0
isa_model/inference/services/audio/openai_tts_service.py +48 -9
isa_model/inference/services/audio/replicate_tts_service.py +239 -0
isa_model/inference/services/base_service.py +36 -1
isa_model/inference/services/embedding/openai_embed_service.py +223 -0
isa_model/inference/services/llm/base_llm_service.py +88 -192
isa_model/inference/services/llm/llm_adapter.py +459 -0
isa_model/inference/services/llm/ollama_llm_service.py +111 -185
isa_model/inference/services/llm/openai_llm_service.py +115 -360
isa_model/inference/services/vision/helpers/image_utils.py +4 -3
isa_model/inference/services/vision/ollama_vision_service.py +11 -3
isa_model/inference/services/vision/openai_vision_service.py +275 -41
isa_model/inference/services/vision/replicate_image_gen_service.py +233 -205
{isa_model-0.3.0.dist-info → isa_model-0.3.2.dist-info}/METADATA +1 -1
{isa_model-0.3.0.dist-info → isa_model-0.3.2.dist-info}/RECORD +26 -21
{isa_model-0.3.0.dist-info → isa_model-0.3.2.dist-info}/WHEEL +0 -0
{isa_model-0.3.0.dist-info → isa_model-0.3.2.dist-info}/top_level.txt +0 -0

isa_model/inference/services/llm/ollama_llm_service.py CHANGED Viewed

@@ -8,9 +8,9 @@ from isa_model.inference.providers.base_provider import BaseProvider
 logger = logging.getLogger(__name__)
 class OllamaLLMService(BaseLLMService):
-    """Ollama LLM service using HTTP client"""
+    """Ollama LLM service with unified invoke interface and proper adapter support"""
-    def __init__(self, provider: 'BaseProvider', model_name: str = "llama3.1"):
+    def __init__(self, provider: 'BaseProvider', model_name: str = "llama3.2:3b-instruct-fp16"):
         super().__init__(provider, model_name)
         # Create HTTP client for Ollama API
@@ -25,50 +25,55 @@ class OllamaLLMService(BaseLLMService):
         self.last_token_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
         self.total_token_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0, "requests_count": 0}
-        # Tool binding attributes
-        self._bound_tools: List[Dict[str, Any]] = []
-        self._tool_binding_kwargs: Dict[str, Any] = {}
-        self._tool_functions: Dict[str, Callable] = {}
         logger.info(f"Initialized OllamaLLMService with model {model_name} at {base_url}")
+    def _ensure_client(self):
+        """Ensure the HTTP client is available and not closed"""
+        if not hasattr(self, 'client') or not self.client or self.client.is_closed:
+            base_url = self.config.get("base_url", "http://localhost:11434")
+            timeout = self.config.get("timeout", 60)
+            self.client = httpx.AsyncClient(base_url=base_url, timeout=timeout)
     def _create_bound_copy(self) -> 'OllamaLLMService':
         """Create a copy of this service for tool binding"""
         bound_service = OllamaLLMService(self.provider, self.model_name)
         bound_service._bound_tools = self._bound_tools.copy()
-        bound_service._tool_binding_kwargs = self._tool_binding_kwargs.copy()
-        bound_service._tool_functions = self._tool_functions.copy()
         return bound_service
-    def bind_tools(self, tools: List[Union[Dict[str, Any], Callable]], **kwargs) -> 'OllamaLLMService':
+    def bind_tools(self, tools: List[Any], **kwargs) -> 'OllamaLLMService':
         """Bind tools to this LLM service for function calling"""
         bound_service = self._create_bound_copy()
-        bound_service._bound_tools = self._convert_tools_to_schema(tools)
-        bound_service._tool_binding_kwargs = kwargs
-        # Store the actual functions for execution
-        for tool in tools:
-            if callable(tool):
-                bound_service._tool_functions[tool.__name__] = tool
+        # Use base class method to bind tools
+        bound_service._bound_tools = tools
         return bound_service
-    async def ainvoke(self, prompt: Union[str, List[Dict[str, str]], Any]):
-        """Universal invocation method"""
-        if isinstance(prompt, str):
-            return await self.acompletion(prompt)
-        elif isinstance(prompt, list):
-            return await self.achat(prompt)
-        else:
-            raise ValueError("Prompt must be string or list of messages")
-    async def achat(self, messages: List[Dict[str, str]]):
-        """Chat completion method"""
+    async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any]) -> Union[str, Any]:
+        """
+        Universal async invocation method that handles different input types
+        Args:
+            input_data: Can be:
+                - str: Simple text prompt
+                - list: Message history like [{"role": "user", "content": "hello"}]
+                - Any: LangChain message objects or other formats
+        Returns:
+            Model response (string for simple cases, object for complex cases)
+        """
         try:
+            # Ensure client is available
+            self._ensure_client()
+            # Use adapter manager to prepare messages (consistent with OpenAI service)
+            messages = self._prepare_messages(input_data)
+            # Prepare request parameters
             payload = {
                 "model": self.model_name,
                 "messages": messages,
-                "stream": False,
+                "stream": self.streaming,
                 "options": {
                     "temperature": self.config.get("temperature", 0.7),
                     "top_p": self.config.get("top_p", 0.9),
@@ -76,66 +81,96 @@ class OllamaLLMService(BaseLLMService):
                 }
             }
-            # Add tools if bound
-            if self._has_bound_tools():
-                payload["tools"] = self._get_bound_tools()
+            # Add tools if bound using adapter manager
+            tool_schemas = await self._prepare_tools_for_request()
+            if tool_schemas:
+                payload["tools"] = tool_schemas
+            # Handle streaming
+            if self.streaming:
+                return self._stream_response(payload)
+            # Regular request
             response = await self.client.post("/api/chat", json=payload)
             response.raise_for_status()
             result = response.json()
             # Update token usage if available
             if "eval_count" in result:
-                self.last_token_usage = {
-                    "prompt_tokens": result.get("prompt_eval_count", 0),
-                    "completion_tokens": result.get("eval_count", 0),
-                    "total_tokens": result.get("prompt_eval_count", 0) + result.get("eval_count", 0)
-                }
-                # Update total usage
-                self.total_token_usage["prompt_tokens"] += self.last_token_usage["prompt_tokens"]
-                self.total_token_usage["completion_tokens"] += self.last_token_usage["completion_tokens"]
-                self.total_token_usage["total_tokens"] += self.last_token_usage["total_tokens"]
-                self.total_token_usage["requests_count"] += 1
+                self._update_token_usage(result)
-            # Handle tool calls if present
+            # Handle tool calls if present - let adapter process the complete message
             message = result["message"]
             if "tool_calls" in message and message["tool_calls"]:
-                return await self._handle_tool_calls(message, messages)
+                # Create message object similar to OpenAI format for adapter processing
+                message_obj = type('OllamaMessage', (), {
+                    'content': message.get("content", ""),
+                    'tool_calls': message["tool_calls"]
+                })()
+                # Pass the complete message object to adapter for proper tool_calls handling
+                return self._format_response(message_obj, input_data)
-            return message["content"]
+            # Return appropriate format based on input type
+            return self._format_response(message.get("content", ""), input_data)
         except httpx.RequestError as e:
-            logger.error(f"HTTP request error in chat completion: {e}")
+            logger.error(f"HTTP request error in ainvoke: {e}")
             raise
         except Exception as e:
             logger.error(f"Error in chat completion: {e}")
             raise
+    def _prepare_messages(self, input_data: Union[str, List[Dict[str, str]], Any]) -> List[Dict[str, str]]:
+        """Use adapter manager to convert messages (consistent with OpenAI service)"""
+        return self.adapter_manager.convert_messages(input_data)
+    def _format_response(self, response: Union[str, Any], original_input: Any) -> Union[str, Any]:
+        """Use adapter manager to format response (consistent with OpenAI service)"""
+        return self.adapter_manager.format_response(response, original_input)
+    async def _stream_response(self, payload: Dict[str, Any]) -> AsyncGenerator[str, None]:
+        """Handle streaming responses"""
+        async def stream_generator():
+            try:
+                async with self.client.stream("POST", "/api/chat", json=payload) as response:
+                    response.raise_for_status()
+                    async for line in response.aiter_lines():
+                        if line.strip():
+                            try:
+                                chunk = json.loads(line)
+                                if "message" in chunk and "content" in chunk["message"]:
+                                    content = chunk["message"]["content"]
+                                    if content:
+                                        yield content
+                            except json.JSONDecodeError:
+                                continue
+            except Exception as e:
+                logger.error(f"Error in streaming: {e}")
+                raise
+        return stream_generator()
     async def _handle_tool_calls(self, assistant_message: Dict[str, Any], original_messages: List[Dict[str, str]]) -> str:
-        """Handle tool calls from the assistant"""
+        """Handle tool calls from the assistant using adapter manager"""
         tool_calls = assistant_message.get("tool_calls", [])
         # Add assistant message with tool calls to conversation
         messages = original_messages + [assistant_message]
-        # Execute each tool call
+        # Execute each tool call using adapter manager
         for tool_call in tool_calls:
             function_name = tool_call["function"]["name"]
-            arguments = tool_call["function"]["arguments"]
             try:
                 # Parse arguments if they're a string
+                arguments = tool_call["function"]["arguments"]
                 if isinstance(arguments, str):
                     arguments = json.loads(arguments)
-                # Execute the tool
-                if function_name in self._tool_functions:
-                    result = self._tool_functions[function_name](**arguments)
-                    if hasattr(result, '__await__'):  # Handle async functions
-                        result = await result
-                else:
-                    result = f"Error: Function {function_name} not found"
+                # Use adapter manager to execute tool
+                result = await self._execute_tool_call(function_name, arguments)
                 # Add tool result to messages
                 messages.append({
@@ -153,128 +188,21 @@ class OllamaLLMService(BaseLLMService):
                 })
         # Get final response from the model
-        return await self.achat(messages)
+        return await self.ainvoke(messages)
-    async def acompletion(self, prompt: str):
-        """Text completion method"""
-        try:
-            payload = {
-                "model": self.model_name,
-                "prompt": prompt,
-                "stream": False,
-                "options": {
-                    "temperature": self.config.get("temperature", 0.7),
-                    "top_p": self.config.get("top_p", 0.9),
-                    "num_predict": self.config.get("max_tokens", 2048)
-                }
-            }
-            response = await self.client.post("/api/generate", json=payload)
-            response.raise_for_status()
-            result = response.json()
-            # Update token usage if available
-            if "eval_count" in result:
-                self.last_token_usage = {
-                    "prompt_tokens": result.get("prompt_eval_count", 0),
-                    "completion_tokens": result.get("eval_count", 0),
-                    "total_tokens": result.get("prompt_eval_count", 0) + result.get("eval_count", 0)
-                }
-                # Update total usage
-                self.total_token_usage["prompt_tokens"] += self.last_token_usage["prompt_tokens"]
-                self.total_token_usage["completion_tokens"] += self.last_token_usage["completion_tokens"]
-                self.total_token_usage["total_tokens"] += self.last_token_usage["total_tokens"]
-                self.total_token_usage["requests_count"] += 1
-            return result["response"]
-        except httpx.RequestError as e:
-            logger.error(f"HTTP request error in text completion: {e}")
-            raise
-        except Exception as e:
-            logger.error(f"Error in text completion: {e}")
-            raise
-    async def agenerate(self, messages: List[Dict[str, str]], n: int = 1) -> List[str]:
-        """Generate multiple completions"""
-        results = []
-        for _ in range(n):
-            result = await self.achat(messages)
-            results.append(result)
-        return results
-    async def astream_chat(self, messages: List[Dict[str, str]]) -> AsyncGenerator[str, None]:
-        """Stream chat responses"""
-        try:
-            payload = {
-                "model": self.model_name,
-                "messages": messages,
-                "stream": True,
-                "options": {
-                    "temperature": self.config.get("temperature", 0.7),
-                    "top_p": self.config.get("top_p", 0.9),
-                    "num_predict": self.config.get("max_tokens", 2048)
-                }
-            }
-            # Add tools if bound
-            if self._has_bound_tools():
-                payload["tools"] = self._get_bound_tools()
-            async with self.client.stream("POST", "/api/chat", json=payload) as response:
-                response.raise_for_status()
-                async for line in response.aiter_lines():
-                    if line.strip():
-                        try:
-                            chunk = json.loads(line)
-                            if "message" in chunk and "content" in chunk["message"]:
-                                content = chunk["message"]["content"]
-                                if content:
-                                    yield content
-                        except json.JSONDecodeError:
-                            continue
-        except httpx.RequestError as e:
-            logger.error(f"HTTP request error in stream chat: {e}")
-            raise
-        except Exception as e:
-            logger.error(f"Error in stream chat: {e}")
-            raise
-    async def astream_completion(self, prompt: str) -> AsyncGenerator[str, None]:
-        """Stream completion responses"""
-        try:
-            payload = {
-                "model": self.model_name,
-                "prompt": prompt,
-                "stream": True,
-                "options": {
-                    "temperature": self.config.get("temperature", 0.7),
-                    "top_p": self.config.get("top_p", 0.9),
-                    "num_predict": self.config.get("max_tokens", 2048)
-                }
-            }
-            async with self.client.stream("POST", "/api/generate", json=payload) as response:
-                response.raise_for_status()
-                async for line in response.aiter_lines():
-                    if line.strip():
-                        try:
-                            chunk = json.loads(line)
-                            if "response" in chunk:
-                                content = chunk["response"]
-                                if content:
-                                    yield content
-                        except json.JSONDecodeError:
-                            continue
-        except httpx.RequestError as e:
-            logger.error(f"HTTP request error in stream completion: {e}")
-            raise
-        except Exception as e:
-            logger.error(f"Error in stream completion: {e}")
-            raise
+    def _update_token_usage(self, result: Dict[str, Any]):
+        """Update token usage statistics"""
+        self.last_token_usage = {
+            "prompt_tokens": result.get("prompt_eval_count", 0),
+            "completion_tokens": result.get("eval_count", 0),
+            "total_tokens": result.get("prompt_eval_count", 0) + result.get("eval_count", 0)
+        }
+        # Update total usage
+        self.total_token_usage["prompt_tokens"] += self.last_token_usage["prompt_tokens"]
+        self.total_token_usage["completion_tokens"] += self.last_token_usage["completion_tokens"]
+        self.total_token_usage["total_tokens"] += self.last_token_usage["total_tokens"]
+        self.total_token_usage["requests_count"] += 1
     def get_token_usage(self) -> Dict[str, Any]:
         """Get total token usage statistics"""
@@ -294,14 +222,12 @@ class OllamaLLMService(BaseLLMService):
             "provider": "ollama"
         }
-    def _has_bound_tools(self) -> bool:
-        """Check if this service has bound tools"""
-        return bool(self._bound_tools)
-    def _get_bound_tools(self) -> List[Dict[str, Any]]:
-        """Get the bound tools schema"""
-        return self._bound_tools
     async def close(self):
         """Close the HTTP client"""
-        await self.client.aclose()
+        if hasattr(self, 'client') and self.client:
+            try:
+                if not self.client.is_closed:
+                    await self.client.aclose()
+            except Exception as e:
+                logger.warning(f"Error closing Ollama client: {e}")

isa-model 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

isa-model 0.3.0py3-none-any.whl → 0.3.2py3-none-any.whl