PyPI - isa-model - Versions diffs - 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl - Mend

isa-model 0.3.3py3-none-any.whl → 0.3.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

isa_model/config/__init__.py +9 -0
isa_model/config/config_manager.py +213 -0
isa_model/core/model_manager.py +5 -0
isa_model/core/model_registry.py +39 -6
isa_model/core/storage/supabase_storage.py +344 -0
isa_model/core/vision_models_init.py +116 -0
isa_model/deployment/cloud/__init__.py +9 -0
isa_model/deployment/cloud/modal/__init__.py +10 -0
isa_model/deployment/cloud/modal/isa_vision_doc_service.py +612 -0
isa_model/deployment/cloud/modal/isa_vision_ui_service.py +305 -0
isa_model/inference/ai_factory.py +238 -14
isa_model/inference/providers/modal_provider.py +109 -0
isa_model/inference/providers/yyds_provider.py +108 -0
isa_model/inference/services/__init__.py +2 -1
isa_model/inference/services/base_service.py +0 -38
isa_model/inference/services/llm/base_llm_service.py +32 -0
isa_model/inference/services/llm/llm_adapter.py +73 -3
isa_model/inference/services/llm/ollama_llm_service.py +104 -3
isa_model/inference/services/llm/openai_llm_service.py +67 -15
isa_model/inference/services/llm/yyds_llm_service.py +254 -0
isa_model/inference/services/stacked/__init__.py +26 -0
isa_model/inference/services/stacked/base_stacked_service.py +269 -0
isa_model/inference/services/stacked/config.py +426 -0
isa_model/inference/services/stacked/doc_analysis_service.py +640 -0
isa_model/inference/services/stacked/flux_professional_service.py +579 -0
isa_model/inference/services/stacked/ui_analysis_service.py +1319 -0
isa_model/inference/services/vision/base_image_gen_service.py +0 -34
isa_model/inference/services/vision/base_vision_service.py +46 -2
isa_model/inference/services/vision/isA_vision_service.py +402 -0
isa_model/inference/services/vision/openai_vision_service.py +151 -9
isa_model/inference/services/vision/replicate_image_gen_service.py +166 -38
isa_model/inference/services/vision/replicate_vision_service.py +693 -0
isa_model/serving/__init__.py +19 -0
isa_model/serving/api/__init__.py +10 -0
isa_model/serving/api/fastapi_server.py +84 -0
isa_model/serving/api/middleware/__init__.py +9 -0
isa_model/serving/api/middleware/request_logger.py +88 -0
isa_model/serving/api/routes/__init__.py +5 -0
isa_model/serving/api/routes/health.py +82 -0
isa_model/serving/api/routes/llm.py +19 -0
isa_model/serving/api/routes/ui_analysis.py +223 -0
isa_model/serving/api/routes/vision.py +19 -0
isa_model/serving/api/schemas/__init__.py +17 -0
isa_model/serving/api/schemas/common.py +33 -0
isa_model/serving/api/schemas/ui_analysis.py +78 -0
{isa_model-0.3.3.dist-info → isa_model-0.3.5.dist-info}/METADATA +1 -1
{isa_model-0.3.3.dist-info → isa_model-0.3.5.dist-info}/RECORD +49 -17
{isa_model-0.3.3.dist-info → isa_model-0.3.5.dist-info}/WHEEL +0 -0
{isa_model-0.3.3.dist-info → isa_model-0.3.5.dist-info}/top_level.txt +0 -0

isa_model/inference/providers/modal_provider.py ADDED Viewed

@@ -0,0 +1,109 @@
+"""
+Modal Provider
+Provider for ISA self-hosted Modal services
+No API keys needed since we deploy our own services
+"""
+import os
+import logging
+from typing import Dict, Any, Optional, List
+from .base_provider import BaseProvider
+from isa_model.inference.base import ModelType, Capability
+logger = logging.getLogger(__name__)
+class ModalProvider(BaseProvider):
+    """Provider for ISA Modal services"""
+    def __init__(self, config: Optional[Dict[str, Any]] = None):
+        super().__init__(config)
+        self.name = "modal"
+        self.base_url = "https://modal.com"  # Not used directly
+    def _load_provider_env_vars(self):
+        """Load Modal-specific environment variables"""
+        # Modal doesn't need API keys for deployed services
+        # But we can load Modal token if available
+        modal_token = os.getenv("MODAL_TOKEN_ID") or os.getenv("MODAL_TOKEN_SECRET")
+        if modal_token:
+            self.config["modal_token"] = modal_token
+        # Set default config
+        if "timeout" not in self.config:
+            self.config["timeout"] = 300
+        if "deployment_region" not in self.config:
+            self.config["deployment_region"] = "us-east-1"
+        if "gpu_type" not in self.config:
+            self.config["gpu_type"] = "T4"
+    def get_api_key(self) -> str:
+        """Modal services don't need API keys for deployed apps"""
+        return "modal-deployed-service"  # Placeholder
+    def get_base_url(self) -> str:
+        """Get base URL for Modal services"""
+        return self.base_url
+    def validate_credentials(self) -> bool:
+        """
+        Validate Modal credentials
+        For deployed services, we assume they're accessible
+        """
+        try:
+            # Check if Modal is available
+            import modal
+            return True
+        except ImportError:
+            logger.warning("Modal package not available")
+            return False
+    def get_capabilities(self) -> Dict[ModelType, List[Capability]]:
+        """Get Modal provider capabilities"""
+        return {
+            ModelType.VISION: [
+                Capability.OBJECT_DETECTION,
+                Capability.IMAGE_ANALYSIS,
+                Capability.UI_DETECTION,
+                Capability.OCR,
+                Capability.DOCUMENT_ANALYSIS
+            ]
+        }
+    def get_models(self, model_type: ModelType) -> List[str]:
+        """Get available models for given type"""
+        if model_type == ModelType.VISION:
+            return [
+                "omniparser-v2.0",
+                "table-transformer-detection",
+                "table-transformer-structure-v1.1",
+                "paddleocr-3.0",
+                "yolov8"
+            ]
+        return []
+    def is_reasoning_model(self, model_name: str) -> bool:
+        """Check if the model is optimized for reasoning tasks"""
+        # Vision models are not reasoning models
+        return False
+    def get_default_config(self) -> Dict[str, Any]:
+        """Get default configuration for Modal services"""
+        return {
+            "timeout": 300,  # 5 minutes
+            "max_retries": 3,
+            "deployment_region": "us-east-1",
+            "gpu_type": "T4"
+        }
+    def get_billing_info(self) -> Dict[str, Any]:
+        """Get billing information for Modal services"""
+        return {
+            "provider": "modal",
+            "billing_model": "compute_usage",
+            "cost_per_hour": {
+                "T4": 0.60,
+                "A100": 4.00
+            },
+            "note": "Costs depend on actual usage time, scales to zero when not in use"
+        }

isa_model/inference/providers/yyds_provider.py ADDED Viewed

@@ -0,0 +1,108 @@
+from isa_model.inference.providers.base_provider import BaseProvider
+from isa_model.inference.base import ModelType, Capability
+from typing import Dict, List, Any
+import logging
+import os
+logger = logging.getLogger(__name__)
+class YydsProvider(BaseProvider):
+    """Provider for YYDS API with proper API key management"""
+    def __init__(self, config=None):
+        """Initialize the YYDS Provider with centralized config management"""
+        super().__init__(config)
+        self.name = "yyds"
+        logger.info(f"Initialized YydsProvider with URL: {self.config.get('base_url', 'https://api.yyds.com/v1')}")
+        if not self.has_valid_credentials():
+            logger.warning("YYDS API key not found. Set YYDS_API_KEY environment variable or pass api_key in config.")
+    def _load_provider_env_vars(self):
+        """Load YYDS-specific environment variables"""
+        # Set defaults first
+        defaults = {
+            "base_url": "https://api.yyds.com/v1",
+            "timeout": 60,
+            "temperature": 0.7,
+            "top_p": 0.9,
+            "max_tokens": 1024
+        }
+        # Apply defaults only if not already set
+        for key, value in defaults.items():
+            if key not in self.config:
+                self.config[key] = value
+        # Load from environment variables (override config if present)
+        env_mappings = {
+            "api_key": "YYDS_API_KEY",
+            "base_url": "YYDS_API_BASE",
+            "organization": "YYDS_ORGANIZATION"
+        }
+        for config_key, env_var in env_mappings.items():
+            env_value = os.getenv(env_var)
+            if env_value:
+                self.config[config_key] = env_value
+    def _validate_config(self):
+        """Validate YYDS configuration"""
+        if not self.config.get("api_key"):
+            logger.debug("YYDS API key not set - some functionality may not work")
+    def get_model_pricing(self, model_name: str) -> Dict[str, float]:
+        """Get pricing information for a model - delegated to ModelManager"""
+        # Import here to avoid circular imports
+        from isa_model.core.model_manager import ModelManager
+        model_manager = ModelManager()
+        return model_manager.get_model_pricing("yyds", model_name)
+    def calculate_cost(self, model_name: str, input_tokens: int, output_tokens: int) -> float:
+        """Calculate cost for a request - delegated to ModelManager"""
+        # Import here to avoid circular imports
+        from isa_model.core.model_manager import ModelManager
+        model_manager = ModelManager()
+        return model_manager.calculate_cost("yyds", model_name, input_tokens, output_tokens)
+    def set_api_key(self, api_key: str):
+        """Set the API key after initialization"""
+        self.config["api_key"] = api_key
+        logger.info("YYDS API key updated")
+    def get_capabilities(self) -> Dict[ModelType, List[Capability]]:
+        """Get provider capabilities by model type"""
+        return {
+            ModelType.LLM: [
+                Capability.CHAT,
+                Capability.COMPLETION
+            ]
+        }
+    def get_models(self, model_type: ModelType) -> List[str]:
+        """Get available models for given type"""
+        if model_type == ModelType.LLM:
+            return ["claude-sonnet-4-20250514", "claude-3-5-sonnet-20241022"]
+        else:
+            return []
+    def get_default_model(self, model_type: ModelType) -> str:
+        """Get default model for a given type"""
+        if model_type == ModelType.LLM:
+            return "claude-sonnet-4-20250514"
+        else:
+            return ""
+    def get_config(self) -> Dict[str, Any]:
+        """Get provider configuration"""
+        # Return a copy without sensitive information
+        config_copy = self.config.copy()
+        if "api_key" in config_copy:
+            config_copy["api_key"] = "***" if config_copy["api_key"] else ""
+        return config_copy
+    def is_reasoning_model(self, model_name: str) -> bool:
+        """Check if the model is optimized for reasoning tasks"""
+        reasoning_models = ["claude-sonnet-4", "claude-3-5-sonnet"]
+        return any(rm in model_name.lower() for rm in reasoning_models)

isa_model/inference/services/__init__.py CHANGED Viewed

@@ -5,7 +5,8 @@ File: isa_model/inference/services/__init__.py
 This module contains service implementations for different AI model types.
 """
-from .base_service import BaseService, BaseLLMService, BaseEmbeddingService
+from .base_service import BaseService, BaseEmbeddingService
+from .llm.base_llm_service import BaseLLMService
 __all__ = [
     "BaseService",

isa_model/inference/services/base_service.py CHANGED Viewed

@@ -52,44 +52,6 @@ class BaseService(ABC):
         yield
         return self
-class BaseLLMService(BaseService):
-    """Base class for LLM services"""
-    @abstractmethod
-    async def ainvoke(self, prompt: Union[str, List[Dict[str, str]], Any]) -> T:
-        """Universal invocation method"""
-        pass
-    @abstractmethod
-    async def achat(self, messages: List[Dict[str, str]]) -> T:
-        """Chat completion method"""
-        pass
-    @abstractmethod
-    async def acompletion(self, prompt: str) -> T:
-        """Text completion method"""
-        pass
-    @abstractmethod
-    async def agenerate(self, messages: List[Dict[str, str]], n: int = 1) -> List[T]:
-        """Generate multiple completions"""
-        pass
-    @abstractmethod
-    async def astream_chat(self, messages: List[Dict[str, str]]) -> AsyncGenerator[str, None]:
-        """Stream chat responses"""
-        pass
-    @abstractmethod
-    def get_token_usage(self) -> Any:
-        """Get total token usage statistics"""
-        pass
-    @abstractmethod
-    def get_last_token_usage(self) -> Dict[str, int]:
-        """Get token usage from last request"""
-        pass
 class BaseEmbeddingService(BaseService):
     """Base class for embedding services"""

isa_model/inference/services/llm/base_llm_service.py CHANGED Viewed

@@ -51,6 +51,22 @@ class BaseLLMService(BaseService):
         """使用适配器管理器执行工具调用"""
         return await self.adapter_manager.execute_tool(tool_name, arguments, self._tool_mappings)
+    @abstractmethod
+    async def astream(self, input_data: Union[str, List[Dict[str, str]], Any]) -> AsyncGenerator[str, None]:
+        """
+        True streaming method that yields tokens one by one as they arrive
+        Args:
+            input_data: Can be:
+                - str: Simple text prompt
+                - list: Message history like [{"role": "user", "content": "hello"}]
+                - Any: LangChain message objects or other formats
+        Yields:
+            Individual tokens as they arrive from the model
+        """
+        pass
     @abstractmethod
     async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any]) -> Union[str, Any]:
         """
@@ -67,6 +83,22 @@ class BaseLLMService(BaseService):
         """
         pass
+    def stream(self, input_data: Union[str, List[Dict[str, str]], Any]):
+        """
+        Synchronous wrapper for astream - returns the async generator
+        Args:
+            input_data: Same as astream
+        Returns:
+            AsyncGenerator that yields tokens
+        Usage:
+            async for token in llm.stream("Hello"):
+                print(token, end="", flush=True)
+        """
+        return self.astream(input_data)
     def invoke(self, input_data: Union[str, List[Dict[str, str]], Any]) -> Union[str, Any]:
         """
         Synchronous wrapper for ainvoke

isa_model/inference/services/llm/llm_adapter.py CHANGED Viewed

@@ -120,7 +120,12 @@ class LangChainMessageAdapter:
                     msg_dict["role"] = "tool"
                     if hasattr(msg, 'tool_call_id'):
                         msg_dict["tool_call_id"] = msg.tool_call_id
+                elif msg.type == "function":  # Legacy function message
+                    msg_dict["role"] = "function"
+                    if hasattr(msg, 'name'):
+                        msg_dict["name"] = msg.name
                 else:
+                    # Unknown message type, default to user
                     msg_dict["role"] = "user"
                 converted_messages.append(msg_dict)
@@ -245,6 +250,69 @@ class LangChainToolAdapter:
             return f"Error executing LangChain tool {tool.name}: {str(e)}"
+# ============= OpenAI 格式工具适配器 =============
+class DictToolAdapter:
+    """OpenAI 格式工具字典适配器"""
+    def __init__(self):
+        self.adapter_name = "dict_tool"
+        self.priority = 9  # Higher priority than Python functions
+    def can_handle(self, tool: Any) -> bool:
+        """检查是否是 OpenAI 格式的工具字典"""
+        return (isinstance(tool, dict) and
+                tool.get("type") == "function" and
+                "function" in tool and
+                isinstance(tool["function"], dict) and
+                "name" in tool["function"])
+    def to_openai_schema(self, tool: Any) -> Dict[str, Any]:
+        """工具已经是 OpenAI 格式，直接返回"""
+        return tool
+    async def execute_tool(self, tool: Any, arguments: Dict[str, Any]) -> Any:
+        """执行 OpenAI 格式工具（通常需要外部执行器）"""
+        # 对于 OpenAI 格式的工具字典，我们无法直接执行
+        # 这种情况下返回一个指示，让调用方处理
+        tool_name = tool["function"]["name"]
+        return f"Error: Cannot execute dict tool {tool_name} directly. Requires external executor."
+# ============= MCP 工具适配器 =============
+class MCPToolAdapter:
+    """MCP 工具适配器 - 处理 MCP 协议的工具格式"""
+    def __init__(self):
+        self.adapter_name = "mcp_tool"
+        self.priority = 7  # 高优先级，在 LangChain 和 Dict 之间
+    def can_handle(self, tool: Any) -> bool:
+        """检查是否是 MCP 工具格式"""
+        return (isinstance(tool, dict) and
+                "name" in tool and
+                "description" in tool and
+                "inputSchema" in tool and
+                isinstance(tool["inputSchema"], dict))
+    def to_openai_schema(self, tool: Any) -> Dict[str, Any]:
+        """转换 MCP 工具为 OpenAI schema"""
+        return {
+            "type": "function",
+            "function": {
+                "name": tool["name"],
+                "description": tool["description"],
+                "parameters": tool.get("inputSchema", {"type": "object", "properties": {}})
+            }
+        }
+    async def execute_tool(self, tool: Any, arguments: Dict[str, Any]) -> Any:
+        """MCP 工具执行由外部处理，这里返回指示信息"""
+        tool_name = tool["name"]
+        return f"MCP tool {tool_name} execution should be handled externally by MCP client"
 # ============= Python 函数适配器 =============
 class PythonFunctionAdapter:
@@ -391,10 +459,12 @@ class AdapterManager:
             StandardMessageAdapter()  # 回退适配器
         ]
-        # 工具适配器
+        # 工具适配器（按优先级排序）
         self.tool_adapters = [
-            LangChainToolAdapter(),
-            PythonFunctionAdapter()
+            DictToolAdapter(),        # 最高优先级 - OpenAI格式工具
+            LangChainToolAdapter(),   # 中等优先级 - LangChain工具
+            MCPToolAdapter(),         # 高优先级 - MCP工具
+            PythonFunctionAdapter()   # 最低优先级 - Python函数
         ]
     def register_custom_adapter(self, adapter, adapter_type: str):

isa_model/inference/services/llm/ollama_llm_service.py CHANGED Viewed

@@ -86,9 +86,15 @@ class OllamaLLMService(BaseLLMService):
             if tool_schemas:
                 payload["tools"] = tool_schemas
-            # Handle streaming
+            # Handle streaming vs non-streaming
             if self.streaming:
-                return self._stream_response(payload)
+                # TRUE STREAMING MODE - collect all chunks from the stream
+                content_chunks = []
+                async for token in self.astream(input_data):
+                    content_chunks.append(token)
+                content = "".join(content_chunks)
+                return self._format_response(content, input_data)
             # Regular request
             response = await self.client.post("/api/chat", json=payload)
@@ -190,6 +196,32 @@ class OllamaLLMService(BaseLLMService):
         # Get final response from the model
         return await self.ainvoke(messages)
+    def _track_streaming_usage(self, messages: List[Dict[str, str]], content: str):
+        """Track usage for streaming requests (estimated)"""
+        # Create a mock usage object for tracking
+        class MockUsage:
+            def __init__(self):
+                self.prompt_tokens = len(str(messages)) // 4  # Rough estimate
+                self.completion_tokens = len(content) // 4   # Rough estimate
+                self.total_tokens = self.prompt_tokens + self.completion_tokens
+        usage = MockUsage()
+        self._update_token_usage_from_mock(usage)
+    def _update_token_usage_from_mock(self, usage):
+        """Update token usage statistics from mock usage object"""
+        self.last_token_usage = {
+            "prompt_tokens": usage.prompt_tokens,
+            "completion_tokens": usage.completion_tokens,
+            "total_tokens": usage.total_tokens
+        }
+        # Update total usage
+        self.total_token_usage["prompt_tokens"] += self.last_token_usage["prompt_tokens"]
+        self.total_token_usage["completion_tokens"] += self.last_token_usage["completion_tokens"]
+        self.total_token_usage["total_tokens"] += self.last_token_usage["total_tokens"]
+        self.total_token_usage["requests_count"] += 1
     def _update_token_usage(self, result: Dict[str, Any]):
         """Update token usage statistics"""
         self.last_token_usage = {
@@ -230,4 +262,73 @@ class OllamaLLMService(BaseLLMService):
                 if not self.client.is_closed:
                     await self.client.aclose()
             except Exception as e:
-                logger.warning(f"Error closing Ollama client: {e}")
+                logger.warning(f"Error closing Ollama client: {e}")
+    async def astream(self, input_data: Union[str, List[Dict[str, str]], Any]) -> AsyncGenerator[str, None]:
+        """
+        True streaming method that yields tokens one by one as they arrive
+        Args:
+            input_data: Can be:
+                - str: Simple text prompt
+                - list: Message history like [{"role": "user", "content": "hello"}]
+                - Any: LangChain message objects or other formats
+        Yields:
+            Individual tokens as they arrive from the model
+        """
+        try:
+            # Ensure client is available
+            self._ensure_client()
+            # Use adapter manager to prepare messages
+            messages = self._prepare_messages(input_data)
+            # Prepare request parameters for streaming
+            payload = {
+                "model": self.model_name,
+                "messages": messages,
+                "stream": True,  # Force streaming for astream
+                "options": {
+                    "temperature": self.config.get("temperature", 0.7),
+                    "top_p": self.config.get("top_p", 0.9),
+                    "num_predict": self.config.get("max_tokens", 2048)
+                }
+            }
+            # Add tools if bound using adapter manager
+            tool_schemas = await self._prepare_tools_for_request()
+            if tool_schemas:
+                payload["tools"] = tool_schemas
+            # Stream tokens one by one
+            content_chunks = []
+            try:
+                async with self.client.stream("POST", "/api/chat", json=payload) as response:
+                    response.raise_for_status()
+                    async for line in response.aiter_lines():
+                        if line.strip():
+                            try:
+                                chunk = json.loads(line)
+                                if "message" in chunk and "content" in chunk["message"]:
+                                    content = chunk["message"]["content"]
+                                    if content:
+                                        content_chunks.append(content)
+                                        yield content
+                            except json.JSONDecodeError:
+                                continue
+                # Track usage after streaming is complete (estimated)
+                full_content = "".join(content_chunks)
+                self._track_streaming_usage(messages, full_content)
+            except Exception as e:
+                logger.error(f"Error in streaming: {e}")
+                raise
+        except httpx.RequestError as e:
+            logger.error(f"HTTP request error in astream: {e}")
+            raise
+        except Exception as e:
+            logger.error(f"Error in astream: {e}")
+            raise

isa_model/inference/services/llm/openai_llm_service.py CHANGED Viewed

@@ -67,6 +67,57 @@ class OpenAILLMService(BaseLLMService):
         return bound_service
+    async def astream(self, input_data: Union[str, List[Dict[str, str]], Any]) -> AsyncGenerator[str, None]:
+        """
+        True streaming method - yields tokens one by one as they arrive
+        Args:
+            input_data: Same as ainvoke
+        Yields:
+            Individual tokens as they arrive from the API
+        """
+        try:
+            # Use adapter manager to prepare messages
+            messages = self._prepare_messages(input_data)
+            # Prepare request kwargs
+            kwargs = {
+                "model": self.model_name,
+                "messages": messages,
+                "temperature": self.config.get("temperature", 0.7),
+                "max_tokens": self.config.get("max_tokens", 1024),
+                "stream": True
+            }
+            # Add tools if bound using adapter manager
+            tool_schemas = await self._prepare_tools_for_request()
+            if tool_schemas:
+                kwargs["tools"] = tool_schemas
+                kwargs["tool_choice"] = "auto"
+            # Stream tokens one by one
+            content_chunks = []
+            try:
+                stream = await self.client.chat.completions.create(**kwargs)
+                async for chunk in stream:
+                    content = chunk.choices[0].delta.content
+                    if content:
+                        content_chunks.append(content)
+                        yield content
+                # Track usage after streaming is complete
+                full_content = "".join(content_chunks)
+                self._track_streaming_usage(messages, full_content)
+            except Exception as e:
+                logger.error(f"Error in streaming: {e}")
+                raise
+        except Exception as e:
+            logger.error(f"Error in astream: {e}")
+            raise
     async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any]) -> Union[str, Any]:
         """Unified invoke method for all input types"""
         try:
@@ -89,23 +140,12 @@ class OpenAILLMService(BaseLLMService):
             # Handle streaming vs non-streaming
             if self.streaming:
-                # Streaming mode - collect all chunks
+                # TRUE STREAMING MODE - collect all chunks from the stream
                 content_chunks = []
-                async for chunk in await self._stream_response(kwargs):
-                    content_chunks.append(chunk)
+                async for token in self.astream(input_data):
+                    content_chunks.append(token)
                 content = "".join(content_chunks)
-                # Create a mock usage object for tracking
-                class MockUsage:
-                    def __init__(self):
-                        self.prompt_tokens = len(str(messages)) // 4  # Rough estimate
-                        self.completion_tokens = len(content) // 4   # Rough estimate
-                        self.total_tokens = self.prompt_tokens + self.completion_tokens
-                usage = MockUsage()
-                self._update_token_usage(usage)
-                self._track_billing(usage)
                 return self._format_response(content, input_data)
             else:
                 # Non-streaming mode
@@ -129,9 +169,21 @@ class OpenAILLMService(BaseLLMService):
             logger.error(f"Error in ainvoke: {e}")
             raise
+    def _track_streaming_usage(self, messages: List[Dict[str, str]], content: str):
+        """Track usage for streaming requests (estimated)"""
+        # Create a mock usage object for tracking
+        class MockUsage:
+            def __init__(self):
+                self.prompt_tokens = len(str(messages)) // 4  # Rough estimate
+                self.completion_tokens = len(content) // 4   # Rough estimate
+                self.total_tokens = self.prompt_tokens + self.completion_tokens
+        usage = MockUsage()
+        self._update_token_usage(usage)
+        self._track_billing(usage)
     async def _stream_response(self, kwargs: Dict[str, Any]) -> AsyncGenerator[str, None]:
-        """Handle streaming responses"""
+        """Handle streaming responses - DEPRECATED: Use astream() instead"""
         kwargs["stream"] = True
         async def stream_generator():

isa-model 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl

isa-model 0.3.3py3-none-any.whl → 0.3.5py3-none-any.whl