PyPI - isa-model - Versions diffs - 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl - Mend

isa-model 0.3.0py3-none-any.whl → 0.3.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

isa_model/core/model_manager.py +69 -4
isa_model/inference/ai_factory.py +335 -46
isa_model/inference/billing_tracker.py +406 -0
isa_model/inference/providers/base_provider.py +51 -4
isa_model/inference/providers/ollama_provider.py +37 -18
isa_model/inference/providers/openai_provider.py +65 -36
isa_model/inference/providers/replicate_provider.py +42 -30
isa_model/inference/services/audio/base_stt_service.py +21 -2
isa_model/inference/services/audio/openai_realtime_service.py +353 -0
isa_model/inference/services/audio/openai_stt_service.py +252 -0
isa_model/inference/services/audio/openai_tts_service.py +48 -9
isa_model/inference/services/audio/replicate_tts_service.py +239 -0
isa_model/inference/services/base_service.py +36 -1
isa_model/inference/services/embedding/openai_embed_service.py +223 -0
isa_model/inference/services/llm/base_llm_service.py +88 -192
isa_model/inference/services/llm/llm_adapter.py +459 -0
isa_model/inference/services/llm/ollama_llm_service.py +111 -185
isa_model/inference/services/llm/openai_llm_service.py +115 -360
isa_model/inference/services/vision/helpers/image_utils.py +4 -3
isa_model/inference/services/vision/ollama_vision_service.py +11 -3
isa_model/inference/services/vision/openai_vision_service.py +275 -41
isa_model/inference/services/vision/replicate_image_gen_service.py +233 -205
{isa_model-0.3.0.dist-info → isa_model-0.3.2.dist-info}/METADATA +1 -1
{isa_model-0.3.0.dist-info → isa_model-0.3.2.dist-info}/RECORD +26 -21
{isa_model-0.3.0.dist-info → isa_model-0.3.2.dist-info}/WHEEL +0 -0
{isa_model-0.3.0.dist-info → isa_model-0.3.2.dist-info}/top_level.txt +0 -0

isa_model/inference/services/llm/openai_llm_service.py CHANGED Viewed

@@ -3,415 +3,177 @@ import os
 import json
 from typing import Dict, Any, List, Union, AsyncGenerator, Optional, Callable
-# 使用官方 OpenAI 库和 dotenv
+# 使用官方 OpenAI 库
 from openai import AsyncOpenAI
-from dotenv import load_dotenv
 from isa_model.inference.services.llm.base_llm_service import BaseLLMService
 from isa_model.inference.providers.base_provider import BaseProvider
-# 加载 .env.local 文件中的环境变量
-load_dotenv(dotenv_path='.env.local')
+from isa_model.inference.billing_tracker import ServiceType
 logger = logging.getLogger(__name__)
 class OpenAILLMService(BaseLLMService):
-    """OpenAI LLM service implementation"""
+    """OpenAI LLM service implementation with unified invoke interface"""
-    def __init__(self, provider: 'BaseProvider', model_name: str = "gpt-3.5-turbo"):
+    def __init__(self, provider: 'BaseProvider', model_name: str = "gpt-4.1-nano"):
         super().__init__(provider, model_name)
-        # 从provider配置初始化 AsyncOpenAI 客户端
+        # Get full configuration from provider (including sensitive data)
+        provider_config = provider.get_full_config()
+        # Initialize AsyncOpenAI client with provider configuration
         try:
-            api_key = provider.config.get("api_key") or os.getenv("OPENAI_API_KEY")
-            base_url = provider.config.get("api_base") or os.getenv("OPENAI_API_BASE")
+            if not provider_config.get("api_key"):
+                raise ValueError("OpenAI API key not found in provider configuration")
             self.client = AsyncOpenAI(
-                api_key=api_key,
-                base_url=base_url
+                api_key=provider_config["api_key"],
+                base_url=provider_config.get("base_url", "https://api.openai.com/v1"),
+                organization=provider_config.get("organization")
             )
-        except TypeError as e:
-            logger.error("初始化 OpenAI 客户端失败。请检查您的 .env.local 文件中是否正确设置了 OPENAI_API_KEY。")
-            raise ValueError("OPENAI_API_KEY 未设置。") from e
+            logger.info(f"Initialized OpenAILLMService with model {self.model_name} and endpoint {self.client.base_url}")
+        except Exception as e:
+            logger.error(f"Failed to initialize OpenAI client: {e}")
+            raise ValueError(f"Failed to initialize OpenAI client. Check your API key configuration: {e}") from e
         self.last_token_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
         self.total_token_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0, "requests_count": 0}
-        # Tool binding attributes
-        self._bound_tools: List[Dict[str, Any]] = []
-        self._tool_binding_kwargs: Dict[str, Any] = {}
-        self._tool_functions: Dict[str, Callable] = {}
-        logger.info(f"Initialized OpenAILLMService with model {self.model_name} and endpoint {self.client.base_url}")
     def _create_bound_copy(self) -> 'OpenAILLMService':
         """Create a copy of this service for tool binding"""
         bound_service = OpenAILLMService(self.provider, self.model_name)
         bound_service._bound_tools = self._bound_tools.copy()
-        bound_service._tool_binding_kwargs = self._tool_binding_kwargs.copy()
-        bound_service._tool_functions = self._tool_functions.copy()
         return bound_service
-    def bind_tools(self, tools: List[Union[Dict[str, Any], Callable]], **kwargs) -> 'OpenAILLMService':
-        """Bind tools to this LLM service for function calling"""
+    def bind_tools(self, tools: List[Any], **kwargs) -> 'OpenAILLMService':
+        """
+        Bind tools to this LLM service for function calling
+        Args:
+            tools: List of tools (functions, dicts, or LangChain tools)
+            **kwargs: Additional arguments for tool binding
+        Returns:
+            New LLM service instance with tools bound
+        """
+        # Create a copy of this service
         bound_service = self._create_bound_copy()
-        bound_service._bound_tools = self._convert_tools_to_schema(tools)
-        bound_service._tool_binding_kwargs = kwargs
-        # Store the actual functions for execution
-        for tool in tools:
-            if callable(tool):
-                bound_service._tool_functions[tool.__name__] = tool
+        # Use base class method to bind tools
+        bound_service._bound_tools = tools
         return bound_service
-    async def ainvoke(self, prompt: Union[str, List[Any], Any]) -> Union[str, Any]:
-        """Universal invocation method"""
-        if isinstance(prompt, str):
-            return await self.acompletion(prompt)
-        elif isinstance(prompt, list):
-            if not prompt:
-                raise ValueError("Empty message list provided")
-            # 检查是否是 LangGraph 消息对象
-            first_msg = prompt[0]
-            if hasattr(first_msg, 'content') and hasattr(first_msg, 'type'):
-                # 转换 LangGraph 消息对象为标准格式
-                converted_messages = []
-                for msg in prompt:
-                    if hasattr(msg, 'type') and hasattr(msg, 'content'):
-                        # LangGraph 消息对象
-                        msg_dict = {"content": msg.content}
-                        # 根据消息类型设置 role
-                        if msg.type == "system":
-                            msg_dict["role"] = "system"
-                        elif msg.type == "human":
-                            msg_dict["role"] = "user"
-                        elif msg.type == "ai":
-                            msg_dict["role"] = "assistant"
-                            # 处理工具调用
-                            if hasattr(msg, 'tool_calls') and msg.tool_calls:
-                                msg_dict["tool_calls"] = [
-                                    {
-                                        "id": tc.get("id", f"call_{i}"),
-                                        "type": "function",
-                                        "function": {
-                                            "name": tc["name"],
-                                            "arguments": json.dumps(tc["args"])
-                                        }
-                                    } for i, tc in enumerate(msg.tool_calls)
-                                ]
-                        elif msg.type == "tool":
-                            msg_dict["role"] = "tool"
-                            if hasattr(msg, 'tool_call_id'):
-                                msg_dict["tool_call_id"] = msg.tool_call_id
-                        else:
-                            msg_dict["role"] = "user"  # 默认为用户消息
-                        converted_messages.append(msg_dict)
-                    elif isinstance(msg, dict):
-                        # 已经是字典格式
-                        converted_messages.append(msg)
-                    else:
-                        # 处理其他类型（如字符串）
-                        converted_messages.append({"role": "user", "content": str(msg)})
-                # 如果绑定了工具，返回 AIMessage 对象以兼容 LangGraph
-                if self._has_bound_tools():
-                    return await self.achat_with_message_response(converted_messages)
-                else:
-                    return await self.achat(converted_messages)
-            elif isinstance(first_msg, dict):
-                # 标准字典格式的消息
-                if self._has_bound_tools():
-                    return await self.achat_with_message_response(prompt)
-                else:
-                    return await self.achat(prompt)
-            else:
-                # 处理其他格式，如字符串列表
-                converted_messages = []
-                for msg in prompt:
-                    if isinstance(msg, str):
-                        converted_messages.append({"role": "user", "content": msg})
-                    elif isinstance(msg, dict):
-                        converted_messages.append(msg)
-                    else:
-                        converted_messages.append({"role": "user", "content": str(msg)})
-                if self._has_bound_tools():
-                    return await self.achat_with_message_response(converted_messages)
-                else:
-                    return await self.achat(converted_messages)
-        else:
-            raise ValueError("Prompt must be a string or a list of messages")
-    async def achat(self, messages: List[Dict[str, str]]) -> str:
-        """Chat completion method"""
+    async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any]) -> Union[str, Any]:
+        """Unified invoke method for all input types"""
         try:
-            temperature = self.config.get("temperature", 0.7)
-            max_tokens = self.config.get("max_tokens", 1024)
+            # Use adapter manager to prepare messages
+            messages = self._prepare_messages(input_data)
+            # Prepare request kwargs
             kwargs = {
                 "model": self.model_name,
                 "messages": messages,
-                "temperature": temperature,
-                "max_tokens": max_tokens
+                "temperature": self.config.get("temperature", 0.7),
+                "max_tokens": self.config.get("max_tokens", 1024)
             }
-            # Add tools if bound
-            if self._has_bound_tools():
-                kwargs["tools"] = self._get_bound_tools()
+            # Add tools if bound using adapter manager
+            tool_schemas = await self._prepare_tools_for_request()
+            if tool_schemas:
+                kwargs["tools"] = tool_schemas
                 kwargs["tool_choice"] = "auto"
-            response = await self.client.chat.completions.create(**kwargs)
-            if response.usage:
-                self.last_token_usage = {
-                    "prompt_tokens": response.usage.prompt_tokens,
-                    "completion_tokens": response.usage.completion_tokens,
-                    "total_tokens": response.usage.total_tokens
-                }
+            # Handle streaming vs non-streaming
+            if self.streaming:
+                # Streaming mode - collect all chunks
+                content_chunks = []
+                async for chunk in await self._stream_response(kwargs):
+                    content_chunks.append(chunk)
+                content = "".join(content_chunks)
-                # Update total usage
-                self.total_token_usage["prompt_tokens"] += self.last_token_usage["prompt_tokens"]
-                self.total_token_usage["completion_tokens"] += self.last_token_usage["completion_tokens"]
-                self.total_token_usage["total_tokens"] += self.last_token_usage["total_tokens"]
-                self.total_token_usage["requests_count"] += 1
-            # Handle tool calls if present
-            message = response.choices[0].message
-            if message.tool_calls:
-                return await self._handle_tool_calls(message, messages)
-            return message.content or ""
-        except Exception as e:
-            logger.error(f"Error in chat completion: {e}")
-            raise
-    async def achat_with_message_response(self, messages: List[Dict[str, str]]) -> Any:
-        """Chat completion method that returns message object for LangGraph compatibility"""
-        try:
-            temperature = self.config.get("temperature", 0.7)
-            max_tokens = self.config.get("max_tokens", 1024)
-            kwargs = {
-                "model": self.model_name,
-                "messages": messages,
-                "temperature": temperature,
-                "max_tokens": max_tokens
-            }
-            # Add tools if bound
-            if self._has_bound_tools():
-                kwargs["tools"] = self._get_bound_tools()
-                kwargs["tool_choice"] = "auto"
-            response = await self.client.chat.completions.create(**kwargs)
-            if response.usage:
-                self.last_token_usage = {
-                    "prompt_tokens": response.usage.prompt_tokens,
-                    "completion_tokens": response.usage.completion_tokens,
-                    "total_tokens": response.usage.total_tokens
-                }
+                # Create a mock usage object for tracking
+                class MockUsage:
+                    def __init__(self):
+                        self.prompt_tokens = len(str(messages)) // 4  # Rough estimate
+                        self.completion_tokens = len(content) // 4   # Rough estimate
+                        self.total_tokens = self.prompt_tokens + self.completion_tokens
-                # Update total usage
-                self.total_token_usage["prompt_tokens"] += self.last_token_usage["prompt_tokens"]
-                self.total_token_usage["completion_tokens"] += self.last_token_usage["completion_tokens"]
-                self.total_token_usage["total_tokens"] += self.last_token_usage["total_tokens"]
-                self.total_token_usage["requests_count"] += 1
-            # Create a LangGraph-compatible message object
-            message = response.choices[0].message
-            # Try to import LangGraph message classes
-            try:
-                from langchain_core.messages import AIMessage
+                usage = MockUsage()
+                self._update_token_usage(usage)
+                self._track_billing(usage)
-                # Create AIMessage with tool calls if present
-                if message.tool_calls:
-                    tool_calls = []
-                    for tc in message.tool_calls:
-                        tool_calls.append({
-                            "name": tc.function.name,
-                            "args": json.loads(tc.function.arguments),
-                            "id": tc.id
-                        })
-                    return AIMessage(
-                        content=message.content or "",
-                        tool_calls=tool_calls
-                    )
-                else:
-                    return AIMessage(content=message.content or "")
-            except ImportError:
-                # Fallback: create a simple object with content attribute
-                class SimpleMessage:
-                    def __init__(self, content, tool_calls=None):
-                        self.content = content
-                        self.tool_calls = tool_calls or []
+                return self._format_response(content, input_data)
+            else:
+                # Non-streaming mode
+                response = await self.client.chat.completions.create(**kwargs)
+                message = response.choices[0].message
+                # Update usage tracking
+                if response.usage:
+                    self._update_token_usage(response.usage)
+                    self._track_billing(response.usage)
+                # Handle tool calls if present - let adapter process the complete message
                 if message.tool_calls:
-                    tool_calls = []
-                    for tc in message.tool_calls:
-                        tool_calls.append({
-                            "name": tc.function.name,
-                            "args": json.loads(tc.function.arguments),
-                            "id": tc.id
-                        })
-                    return SimpleMessage(message.content or "", tool_calls)
-                else:
-                    return SimpleMessage(message.content or "")
+                    # Pass the complete message object to adapter for proper tool_calls handling
+                    return self._format_response(message, input_data)
+                # Return appropriate format based on input type
+                return self._format_response(message.content or "", input_data)
         except Exception as e:
-            logger.error(f"Error in chat completion with message response: {e}")
+            logger.error(f"Error in ainvoke: {e}")
             raise
-    async def _handle_tool_calls(self, assistant_message, original_messages: List[Dict[str, str]]) -> str:
-        """Handle tool calls from the assistant"""
-        # Add assistant message with tool calls to conversation
-        messages = original_messages + [{
-            "role": "assistant",
-            "content": assistant_message.content or "",
-            "tool_calls": [
-                {
-                    "id": tc.id,
-                    "type": tc.type,
-                    "function": {
-                        "name": tc.function.name,
-                        "arguments": tc.function.arguments
-                    }
-                } for tc in assistant_message.tool_calls
-            ]
-        }]
+    async def _stream_response(self, kwargs: Dict[str, Any]) -> AsyncGenerator[str, None]:
+        """Handle streaming responses"""
+        kwargs["stream"] = True
-        # Execute each tool call
-        for tool_call in assistant_message.tool_calls:
-            function_name = tool_call.function.name
-            arguments = json.loads(tool_call.function.arguments)
+        async def stream_generator():
             try:
-                # Execute the tool
-                if function_name in self._tool_functions:
-                    result = self._tool_functions[function_name](**arguments)
-                    if hasattr(result, '__await__'):  # Handle async functions
-                        result = await result
-                else:
-                    result = f"Error: Function {function_name} not found"
-                # Add tool result to messages
-                messages.append({
-                    "role": "tool",
-                    "content": str(result),
-                    "tool_call_id": tool_call.id
-                })
+                stream = await self.client.chat.completions.create(**kwargs)
+                async for chunk in stream:
+                    content = chunk.choices[0].delta.content
+                    if content:
+                        yield content
             except Exception as e:
-                logger.error(f"Error executing tool {function_name}: {e}")
-                messages.append({
-                    "role": "tool",
-                    "content": f"Error executing {function_name}: {str(e)}",
-                    "tool_call_id": tool_call.id
-                })
+                logger.error(f"Error in streaming: {e}")
+                raise
-        # Get final response from the model with all context
-        try:
-            kwargs = {
-                "model": self.model_name,
-                "messages": messages,
-                "temperature": self.config.get("temperature", 0.7),
-                "max_tokens": self.config.get("max_tokens", 1024)
-            }
-            response = await self.client.chat.completions.create(**kwargs)
-            return response.choices[0].message.content or ""
-        except Exception as e:
-            logger.error(f"Error getting final response after tool calls: {e}")
-            raise
-    async def acompletion(self, prompt: str) -> str:
-        """Text completion method (using chat API)"""
-        messages = [{"role": "user", "content": prompt}]
-        return await self.achat(messages)
+        return stream_generator()
-    async def agenerate(self, messages: List[Dict[str, str]], n: int = 1) -> List[str]:
-        """Generate multiple completions"""
-        try:
-            temperature = self.config.get("temperature", 0.7)
-            max_tokens = self.config.get("max_tokens", 1024)
-            kwargs = {
-                "model": self.model_name,
-                "messages": messages,
-                "temperature": temperature,
-                "max_tokens": max_tokens,
-                "n": n
-            }
-            # Add tools if bound
-            if self._has_bound_tools():
-                kwargs["tools"] = self._get_bound_tools()
-                kwargs["tool_choice"] = "auto"
-            response = await self.client.chat.completions.create(**kwargs)
-            if response.usage:
-                self.last_token_usage = {
-                    "prompt_tokens": response.usage.prompt_tokens,
-                    "completion_tokens": response.usage.completion_tokens,
-                    "total_tokens": response.usage.total_tokens
-                }
-                # Update total usage
-                self.total_token_usage["prompt_tokens"] += self.last_token_usage["prompt_tokens"]
-                self.total_token_usage["completion_tokens"] += self.last_token_usage["completion_tokens"]
-                self.total_token_usage["total_tokens"] += self.last_token_usage["total_tokens"]
-                self.total_token_usage["requests_count"] += 1
-            return [choice.message.content or "" for choice in response.choices]
-        except Exception as e:
-            logger.error(f"Error in generate: {e}")
-            raise
-    async def astream_chat(self, messages: List[Dict[str, str]]) -> AsyncGenerator[str, None]:
-        """Stream chat responses"""
-        try:
-            temperature = self.config.get("temperature", 0.7)
-            max_tokens = self.config.get("max_tokens", 1024)
-            kwargs = {
-                "model": self.model_name,
-                "messages": messages,
-                "temperature": temperature,
-                "max_tokens": max_tokens,
-                "stream": True
+    def _update_token_usage(self, usage):
+        """Update token usage statistics"""
+        self.last_token_usage = {
+            "prompt_tokens": usage.prompt_tokens,
+            "completion_tokens": usage.completion_tokens,
+            "total_tokens": usage.total_tokens
+        }
+        # Update total usage
+        self.total_token_usage["prompt_tokens"] += self.last_token_usage["prompt_tokens"]
+        self.total_token_usage["completion_tokens"] += self.last_token_usage["completion_tokens"]
+        self.total_token_usage["total_tokens"] += self.last_token_usage["total_tokens"]
+        self.total_token_usage["requests_count"] += 1
+    def _track_billing(self, usage):
+        """Track billing information"""
+        self._track_usage(
+            service_type=ServiceType.LLM,
+            operation="chat",
+            input_tokens=usage.prompt_tokens,
+            output_tokens=usage.completion_tokens,
+            metadata={
+                "temperature": self.config.get("temperature", 0.7),
+                "max_tokens": self.config.get("max_tokens", 1024)
             }
-            # Add tools if bound
-            if self._has_bound_tools():
-                kwargs["tools"] = self._get_bound_tools()
-                kwargs["tool_choice"] = "auto"
-            stream = await self.client.chat.completions.create(**kwargs)
-            async for chunk in stream:
-                content = chunk.choices[0].delta.content
-                if content:
-                    yield content
-        except Exception as e:
-            logger.error(f"Error in stream chat: {e}")
-            raise
-    async def astream_completion(self, prompt: str) -> AsyncGenerator[str, None]:
-        """Stream completion responses"""
-        messages = [{"role": "user", "content": prompt}]
-        async for chunk in self.astream_chat(messages):
-            yield chunk
+        )
     def get_token_usage(self) -> Dict[str, Any]:
         """Get total token usage statistics"""
@@ -431,13 +193,6 @@ class OpenAILLMService(BaseLLMService):
             "provider": "openai"
         }
-    def _has_bound_tools(self) -> bool:
-        """Check if this service has bound tools"""
-        return bool(self._bound_tools)
-    def _get_bound_tools(self) -> List[Dict[str, Any]]:
-        """Get the bound tools schema"""
-        return self._bound_tools
     async def close(self):
         """Close the backend client"""

isa_model/inference/services/vision/helpers/image_utils.py CHANGED Viewed

@@ -2,9 +2,10 @@ from io import BytesIO
 from PIL import Image
 from typing import Union
 import base64
-from app.config.config_manager import config_manager
+# from app.config.config_manager import config_manager  # Commented out to fix import
+import logging
-logger = config_manager.get_logger(__name__)
+logger = logging.getLogger(__name__)
 def compress_image(image_data: Union[bytes, BytesIO], max_size: int = 1024) -> bytes:
     """压缩图片以减小大小
@@ -30,7 +31,7 @@ def compress_image(image_data: Union[bytes, BytesIO], max_size: int = 1024) -> b
         # 计算新尺寸，保持宽高比
         ratio = max_size / max(img.size)
         if ratio < 1:
-            new_size = tuple(int(dim * ratio) for dim in img.size)
+            new_size = (int(img.size[0] * ratio), int(img.size[1] * ratio))
             img = img.resize(new_size, Image.Resampling.LANCZOS)
         # 保存压缩后的图片

isa_model/inference/services/vision/ollama_vision_service.py CHANGED Viewed

@@ -7,6 +7,7 @@ from tenacity import retry, stop_after_attempt, wait_exponential
 from isa_model.inference.services.vision.base_vision_service import BaseVisionService
 from isa_model.inference.providers.base_provider import BaseProvider
 import logging
+import requests
 logger = logging.getLogger(__name__)
@@ -19,10 +20,17 @@ class OllamaVisionService(BaseVisionService):
         self.temperature = self.config.get('temperature', 0.7)
     def _get_image_data(self, image: Union[str, BinaryIO]) -> bytes:
-        """获取图像数据"""
+        """获取图像数据，支持本地文件和URL"""
         if isinstance(image, str):
-            with open(image, 'rb') as f:
-                return f.read()
+            # Check if it's a URL
+            if image.startswith(('http://', 'https://')):
+                response = requests.get(image)
+                response.raise_for_status()
+                return response.content
+            else:
+                # Local file path
+                with open(image, 'rb') as f:
+                    return f.read()
         else:
             return image.read()

isa-model 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

isa-model 0.3.0py3-none-any.whl → 0.3.2py3-none-any.whl