PyPI - autobyteus - Versions diffs - 1.1.1__py3-none-any.whl → 1.1.3__py3-none-any.whl - Mend

autobyteus 1.1.1py3-none-any.whl → 1.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

autobyteus/agent/bootstrap_steps/__init__.py +2 -0
autobyteus/agent/bootstrap_steps/agent_bootstrapper.py +2 -0
autobyteus/agent/bootstrap_steps/mcp_server_prewarming_step.py +71 -0
autobyteus/agent/llm_response_processor/provider_aware_tool_usage_processor.py +41 -12
autobyteus/agent/runtime/agent_runtime.py +1 -4
autobyteus/agent/runtime/agent_worker.py +56 -23
autobyteus/agent/shutdown_steps/__init__.py +17 -0
autobyteus/agent/shutdown_steps/agent_shutdown_orchestrator.py +63 -0
autobyteus/agent/shutdown_steps/base_shutdown_step.py +33 -0
autobyteus/agent/shutdown_steps/llm_instance_cleanup_step.py +45 -0
autobyteus/agent/shutdown_steps/mcp_server_cleanup_step.py +32 -0
autobyteus/llm/api/deepseek_llm.py +10 -172
autobyteus/llm/api/grok_llm.py +10 -171
autobyteus/llm/api/kimi_llm.py +24 -0
autobyteus/llm/api/openai_compatible_llm.py +193 -0
autobyteus/llm/api/openai_llm.py +11 -139
autobyteus/llm/llm_factory.py +62 -0
autobyteus/llm/providers.py +1 -0
autobyteus/llm/token_counter/kimi_token_counter.py +24 -0
autobyteus/llm/token_counter/token_counter_factory.py +3 -0
autobyteus/llm/utils/messages.py +3 -3
autobyteus/tools/base_tool.py +2 -0
autobyteus/tools/mcp/__init__.py +10 -7
autobyteus/tools/mcp/call_handlers/__init__.py +0 -2
autobyteus/tools/mcp/config_service.py +1 -6
autobyteus/tools/mcp/factory.py +12 -26
autobyteus/tools/mcp/registrar.py +57 -178
autobyteus/tools/mcp/server/__init__.py +16 -0
autobyteus/tools/mcp/server/base_managed_mcp_server.py +139 -0
autobyteus/tools/mcp/server/http_managed_mcp_server.py +29 -0
autobyteus/tools/mcp/server/proxy.py +36 -0
autobyteus/tools/mcp/server/stdio_managed_mcp_server.py +33 -0
autobyteus/tools/mcp/server_instance_manager.py +93 -0
autobyteus/tools/mcp/tool.py +28 -46
autobyteus/tools/mcp/tool_registrar.py +177 -0
autobyteus/tools/mcp/types.py +10 -21
autobyteus/tools/registry/tool_definition.py +11 -2
autobyteus/tools/registry/tool_registry.py +27 -28
autobyteus/tools/usage/parsers/_json_extractor.py +99 -0
autobyteus/tools/usage/parsers/default_json_tool_usage_parser.py +46 -77
autobyteus/tools/usage/parsers/default_xml_tool_usage_parser.py +87 -97
autobyteus/tools/usage/parsers/gemini_json_tool_usage_parser.py +38 -46
autobyteus/tools/usage/parsers/openai_json_tool_usage_parser.py +104 -154
{autobyteus-1.1.1.dist-info → autobyteus-1.1.3.dist-info}/METADATA +4 -2
{autobyteus-1.1.1.dist-info → autobyteus-1.1.3.dist-info}/RECORD +48 -32
autobyteus/tools/mcp/call_handlers/sse_handler.py +0 -22
{autobyteus-1.1.1.dist-info → autobyteus-1.1.3.dist-info}/WHEEL +0 -0
{autobyteus-1.1.1.dist-info → autobyteus-1.1.3.dist-info}/licenses/LICENSE +0 -0
{autobyteus-1.1.1.dist-info → autobyteus-1.1.3.dist-info}/top_level.txt +0 -0

autobyteus/llm/api/deepseek_llm.py CHANGED Viewed

@@ -1,188 +1,26 @@
 import logging
-import os
-from typing import Optional, List, AsyncGenerator
-from openai import OpenAI
-from openai.types.completion_usage import CompletionUsage
-from openai.types.chat import ChatCompletionChunk
-from autobyteus.llm.base_llm import BaseLLM
+from typing import Optional
 from autobyteus.llm.models import LLMModel
 from autobyteus.llm.utils.llm_config import LLMConfig
-from autobyteus.llm.utils.messages import MessageRole
-from autobyteus.llm.utils.image_payload_formatter import process_image
-from autobyteus.llm.utils.token_usage import TokenUsage
-from autobyteus.llm.utils.response_types import CompleteResponse, ChunkResponse
+from autobyteus.llm.api.openai_compatible_llm import OpenAICompatibleLLM
 logger = logging.getLogger(__name__)
-class DeepSeekLLM(BaseLLM):
+class DeepSeekLLM(OpenAICompatibleLLM):
     def __init__(self, model: LLMModel = None, llm_config: LLMConfig = None):
-        deepseek_api_key = os.getenv("DEEPSEEK_API_KEY")
-        if not deepseek_api_key:
-            logger.error("DEEPSEEK_API_KEY environment variable is not set.")
-            raise ValueError("DEEPSEEK_API_KEY environment variable is not set.")
-        self.client = OpenAI(api_key=deepseek_api_key, base_url="https://api.deepseek.com")
-        logger.info("DeepSeek API key and base URL set successfully")
         # Provide defaults if not specified
         if model is None:
-            model = LLMModel.deepseek_chat
+            model = LLMModel['deepseek-chat']
         if llm_config is None:
             llm_config = LLMConfig()
-        super().__init__(model=model, llm_config=llm_config)
-        self.max_tokens = 8000
-    def _create_token_usage(self, usage_data: Optional[CompletionUsage]) -> Optional[TokenUsage]:
-        """Convert usage data to TokenUsage format."""
-        if not usage_data:
-            return None
-        return TokenUsage(
-            prompt_tokens=usage_data.prompt_tokens,
-            completion_tokens=usage_data.completion_tokens,
-            total_tokens=usage_data.total_tokens
+        super().__init__(
+            model=model,
+            llm_config=llm_config,
+            api_key_env_var="DEEPSEEK_API_KEY",
+            base_url="https://api.deepseek.com"
         )
+        logger.info(f"DeepSeekLLM initialized with model: {self.model}")
-    async def _send_user_message_to_llm(
-        self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs
-    ) -> CompleteResponse:
-        """
-        Sends a non-streaming request to the DeepSeek API.
-        Supports optional reasoning content if provided in the response.
-        """
-        content = []
-        if user_message:
-            content.append({"type": "text", "text": user_message})
-        if image_urls:
-            for image_url in image_urls:
-                try:
-                    image_content = process_image(image_url)
-                    content.append(image_content)
-                    logger.info(f"Processed image: {image_url}")
-                except ValueError as e:
-                    logger.error(f"Error processing image {image_url}: {str(e)}")
-                    continue
-        self.add_user_message(content)
-        logger.debug(f"Prepared message content: {content}")
-        try:
-            logger.info("Sending request to DeepSeek API")
-            response = self.client.chat.completions.create(
-                model=self.model.value,
-                messages=[msg.to_dict() for msg in self.messages],
-                max_tokens=self.max_tokens,
-            )
-            full_message = response.choices.message
-            # Extract reasoning_content if present
-            reasoning = None
-            if hasattr(full_message, "reasoning_content") and full_message.reasoning_content:
-                reasoning = full_message.reasoning_content
-            elif "reasoning_content" in full_message and full_message["reasoning_content"]:
-                reasoning = full_message["reasoning_content"]
-            # Extract main content
-            main_content = ""
-            if hasattr(full_message, "content") and full_message.content:
-                main_content = full_message.content
-            elif "content" in full_message and full_message["content"]:
-                main_content = full_message["content"]
-            self.add_assistant_message(main_content, reasoning_content=reasoning)
-            token_usage = self._create_token_usage(response.usage)
-            logger.info("Received response from DeepSeek API with usage data")
-            return CompleteResponse(
-                content=main_content,
-                reasoning=reasoning,
-                usage=token_usage
-            )
-        except Exception as e:
-            logger.error(f"Error in DeepSeek API request: {str(e)}")
-            raise ValueError(f"Error in DeepSeek API request: {str(e)}")
-    async def _stream_user_message_to_llm(
-        self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs
-    ) -> AsyncGenerator[ChunkResponse, None]:
-        """
-        Streams the response from the DeepSeek API.
-        Yields reasoning and content in separate chunks.
-        """
-        content = []
-        if user_message:
-            content.append({"type": "text", "text": user_message})
-        if image_urls:
-            for image_url in image_urls:
-                try:
-                    image_content = process_image(image_url)
-                    content.append(image_content)
-                    logger.info(f"Processed image for streaming: {image_url}")
-                except ValueError as e:
-                    logger.error(f"Error processing image for streaming {image_url}: {str(e)}")
-                    continue
-        self.add_user_message(content)
-        logger.debug(f"Prepared streaming message content: {content}")
-        # Initialize variables to track reasoning and main content
-        accumulated_reasoning = ""
-        accumulated_content = ""
-        try:
-            logger.info("Starting streaming request to DeepSeek API")
-            stream = self.client.chat.completions.create(
-                model=self.model.value,
-                messages=[msg.to_dict() for msg in self.messages],
-                max_tokens=self.max_tokens,
-                stream=True,
-                stream_options={"include_usage": True}
-            )
-            for chunk in stream:
-                chunk: ChatCompletionChunk
-                # Process reasoning tokens
-                reasoning_chunk = getattr(chunk.choices.delta, "reasoning_content", None)
-                if reasoning_chunk:
-                    accumulated_reasoning += reasoning_chunk
-                    yield ChunkResponse(
-                        content="",
-                        reasoning=reasoning_chunk
-                    )
-                # Process main content tokens
-                main_token = chunk.choices.delta.content
-                if main_token:
-                    accumulated_content += main_token
-                    yield ChunkResponse(
-                        content=main_token,
-                        reasoning=None
-                    )
-                # Yield token usage if available in the final chunk
-                if hasattr(chunk, "usage") and chunk.usage is not None:
-                    token_usage = self._create_token_usage(chunk.usage)
-                    yield ChunkResponse(
-                        content="",
-                        reasoning=None,
-                        is_complete=True,
-                        usage=token_usage
-                    )
-            # After streaming, add the fully accumulated assistant message to history
-            self.add_assistant_message(accumulated_content, reasoning_content=accumulated_reasoning)
-            logger.info("Completed streaming response from DeepSeek API")
-        except Exception as e:
-            logger.error(f"Error in DeepSeek API streaming: {str(e)}")
-            raise ValueError(f"Error in DeepSeek API streaming: {str(e)}")
     async def cleanup(self):
         await super().cleanup()

autobyteus/llm/api/grok_llm.py CHANGED Viewed

@@ -1,187 +1,26 @@
 import logging
-import os
-from typing import Optional, List, AsyncGenerator
-from openai import OpenAI
-from openai.types.completion_usage import CompletionUsage
-from openai.types.chat import ChatCompletionChunk
-from autobyteus.llm.base_llm import BaseLLM
+from typing import Optional
 from autobyteus.llm.models import LLMModel
 from autobyteus.llm.utils.llm_config import LLMConfig
-from autobyteus.llm.utils.messages import MessageRole
-from autobyteus.llm.utils.image_payload_formatter import process_image
-from autobyteus.llm.utils.token_usage import TokenUsage
-from autobyteus.llm.utils.response_types import CompleteResponse, ChunkResponse
+from autobyteus.llm.api.openai_compatible_llm import OpenAICompatibleLLM
 logger = logging.getLogger(__name__)
-class GrokLLM(BaseLLM):
+class GrokLLM(OpenAICompatibleLLM):
     def __init__(self, model: LLMModel = None, llm_config: LLMConfig = None):
-        grok_api_key = os.getenv("GROK_API_KEY")
-        if not grok_api_key:
-            logger.error("GROK_API_KEY environment variable is not set.")
-            raise ValueError("GROK_API_KEY environment variable is not set.")
-        self.client = OpenAI(api_key=grok_api_key, base_url="https://api.x.ai/v1")
-        logger.info("Grok API key and base URL set successfully")
         # Provide defaults if not specified
         if model is None:
-            model = LLMModel.grok_2_1212
+            model = LLMModel['grok-2-1212']
         if llm_config is None:
             llm_config = LLMConfig()
-        super().__init__(model=model, llm_config=llm_config)
-        self.max_tokens = 8000
-    def _create_token_usage(self, usage_data: Optional[CompletionUsage]) -> Optional[TokenUsage]:
-        """Convert usage data to TokenUsage format."""
-        if not usage_data:
-            return None
-        return TokenUsage(
-            prompt_tokens=usage_data.prompt_tokens,
-            completion_tokens=usage_data.completion_tokens,
-            total_tokens=usage_data.total_tokens
+        super().__init__(
+            model=model,
+            llm_config=llm_config,
+            api_key_env_var="GROK_API_KEY",
+            base_url="https://api.x.ai/v1"
         )
+        logger.info(f"GrokLLM initialized with model: {self.model}")
-    async def _send_user_message_to_llm(
-        self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs
-    ) -> CompleteResponse:
-        """
-        Sends a non-streaming request to the Grok API.
-        """
-        content = []
-        if user_message:
-            content.append({"type": "text", "text": user_message})
-        if image_urls:
-            for image_url in image_urls:
-                try:
-                    image_content = process_image(image_url)
-                    content.append(image_content)
-                    logger.info(f"Processed image: {image_url}")
-                except ValueError as e:
-                    logger.error(f"Error processing image {image_url}: {str(e)}")
-                    continue
-        self.add_user_message(content)
-        logger.debug(f"Prepared message content: {content}")
-        try:
-            logger.info("Sending request to Grok API")
-            response = self.client.chat.completions.create(
-                model=self.model.value,
-                messages=[msg.to_dict() for msg in self.messages],
-                max_tokens=self.max_tokens,
-            )
-            full_message = response.choices.message
-            # Extract reasoning_content if present
-            reasoning = None
-            if hasattr(full_message, "reasoning_content") and full_message.reasoning_content:
-                reasoning = full_message.reasoning_content
-            elif "reasoning_content" in full_message and full_message["reasoning_content"]:
-                reasoning = full_message["reasoning_content"]
-            # Extract main content
-            main_content = ""
-            if hasattr(full_message, "content") and full_message.content:
-                main_content = full_message.content
-            elif "content" in full_message and full_message["content"]:
-                main_content = full_message["content"]
-            self.add_assistant_message(main_content, reasoning_content=reasoning)
-            token_usage = self._create_token_usage(response.usage)
-            logger.info("Received response from Grok API with usage data")
-            return CompleteResponse(
-                content=main_content,
-                reasoning=reasoning,
-                usage=token_usage
-            )
-        except Exception as e:
-            logger.error(f"Error in Grok API request: {str(e)}")
-            raise ValueError(f"Error in Grok API request: {str(e)}")
-    async def _stream_user_message_to_llm(
-        self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs
-    ) -> AsyncGenerator[ChunkResponse, None]:
-        """
-        Streams the response from the Grok API.
-        Yields reasoning and content in separate chunks.
-        """
-        content = []
-        if user_message:
-            content.append({"type": "text", "text": user_message})
-        if image_urls:
-            for image_url in image_urls:
-                try:
-                    image_content = process_image(image_url)
-                    content.append(image_content)
-                    logger.info(f"Processed image for streaming: {image_url}")
-                except ValueError as e:
-                    logger.error(f"Error processing image for streaming {image_url}: {str(e)}")
-                    continue
-        self.add_user_message(content)
-        logger.debug(f"Prepared streaming message content: {content}")
-        # Initialize variables to track reasoning and main content
-        accumulated_reasoning = ""
-        accumulated_content = ""
-        try:
-            logger.info("Starting streaming request to Grok API")
-            stream = self.client.chat.completions.create(
-                model=self.model.value,
-                messages=[msg.to_dict() for msg in self.messages],
-                max_tokens=self.max_tokens,
-                stream=True,
-                stream_options={"include_usage": True}
-            )
-            for chunk in stream:
-                chunk: ChatCompletionChunk
-                # Process reasoning tokens
-                reasoning_chunk = getattr(chunk.choices.delta, "reasoning_content", None)
-                if reasoning_chunk:
-                    accumulated_reasoning += reasoning_chunk
-                    yield ChunkResponse(
-                        content="",
-                        reasoning=reasoning_chunk
-                    )
-                # Process main content tokens
-                main_token = chunk.choices.delta.content
-                if main_token:
-                    accumulated_content += main_token
-                    yield ChunkResponse(
-                        content=main_token,
-                        reasoning=None
-                    )
-                # Yield token usage if available in the final chunk
-                if hasattr(chunk, "usage") and chunk.usage is not None:
-                    token_usage = self._create_token_usage(chunk.usage)
-                    yield ChunkResponse(
-                        content="",
-                        reasoning=None,
-                        is_complete=True,
-                        usage=token_usage
-                    )
-            # After streaming, add the fully accumulated assistant message to history
-            self.add_assistant_message(accumulated_content, reasoning_content=accumulated_reasoning)
-            logger.info("Completed streaming response from Grok API")
-        except Exception as e:
-            logger.error(f"Error in Grok API streaming: {str(e)}")
-            raise ValueError(f"Error in Grok API streaming: {str(e)}")
     async def cleanup(self):
         await super().cleanup()

autobyteus/llm/api/kimi_llm.py ADDED Viewed

@@ -0,0 +1,24 @@
+import logging
+from typing import Optional
+from autobyteus.llm.models import LLMModel
+from autobyteus.llm.utils.llm_config import LLMConfig
+from autobyteus.llm.api.openai_compatible_llm import OpenAICompatibleLLM
+logger = logging.getLogger(__name__)
+class KimiLLM(OpenAICompatibleLLM):
+    def __init__(self, model: LLMModel = None, llm_config: LLMConfig = None):
+        # Provide defaults if not specified
+        if model is None:
+            # Setting a default Kimi model from the factory ones
+            model = LLMModel['kimi-latest']
+        if llm_config is None:
+            llm_config = LLMConfig()
+        super().__init__(
+            model=model,
+            llm_config=llm_config,
+            api_key_env_var="KIMI_API_KEY",
+            base_url="https://api.moonshot.cn/v1"
+        )
+        logger.info(f"KimiLLM initialized with model: {self.model}")

autobyteus/llm/api/openai_compatible_llm.py ADDED Viewed

@@ -0,0 +1,193 @@
+import logging
+import os
+from abc import ABC
+from typing import Optional, List, AsyncGenerator
+from openai import OpenAI
+from openai.types.completion_usage import CompletionUsage
+from openai.types.chat import ChatCompletionChunk
+from autobyteus.llm.base_llm import BaseLLM
+from autobyteus.llm.models import LLMModel
+from autobyteus.llm.utils.llm_config import LLMConfig
+from autobyteus.llm.utils.image_payload_formatter import process_image
+from autobyteus.llm.utils.token_usage import TokenUsage
+from autobyteus.llm.utils.response_types import CompleteResponse, ChunkResponse
+logger = logging.getLogger(__name__)
+class OpenAICompatibleLLM(BaseLLM, ABC):
+    def __init__(
+        self,
+        model: LLMModel,
+        llm_config: LLMConfig,
+        api_key_env_var: str,
+        base_url: str
+    ):
+        api_key = os.getenv(api_key_env_var)
+        if not api_key:
+            logger.error(f"{api_key_env_var} environment variable is not set.")
+            raise ValueError(f"{api_key_env_var} environment variable is not set.")
+        self.client = OpenAI(api_key=api_key, base_url=base_url)
+        logger.info(f"Initialized OpenAI compatible client with base_url: {base_url}")
+        super().__init__(model=model, llm_config=llm_config)
+        self.max_tokens = 8000 # A default, can be overridden by subclass or config
+    def _create_token_usage(self, usage_data: Optional[CompletionUsage]) -> Optional[TokenUsage]:
+        """Convert usage data to TokenUsage format."""
+        if not usage_data:
+            return None
+        return TokenUsage(
+            prompt_tokens=usage_data.prompt_tokens,
+            completion_tokens=usage_data.completion_tokens,
+            total_tokens=usage_data.total_tokens
+        )
+    async def _send_user_message_to_llm(
+        self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs
+    ) -> CompleteResponse:
+        """
+        Sends a non-streaming request to an OpenAI-compatible API.
+        Supports optional reasoning content if provided in the response.
+        """
+        content = []
+        if user_message:
+            content.append({"type": "text", "text": user_message})
+        if image_urls:
+            for image_url in image_urls:
+                try:
+                    image_content = process_image(image_url)
+                    content.append(image_content)
+                    logger.info(f"Processed image: {image_url}")
+                except ValueError as e:
+                    logger.error(f"Error processing image {image_url}: {str(e)}")
+                    continue
+        self.add_user_message(content)
+        logger.debug(f"Prepared message content: {content}")
+        try:
+            logger.info(f"Sending request to {self.model.provider.value} API")
+            response = self.client.chat.completions.create(
+                model=self.model.value,
+                messages=[msg.to_dict() for msg in self.messages],
+                max_tokens=self.max_tokens,
+            )
+            full_message = response.choices[0].message
+            # Extract reasoning_content if present
+            reasoning = None
+            if hasattr(full_message, "reasoning_content") and full_message.reasoning_content:
+                reasoning = full_message.reasoning_content
+            elif "reasoning_content" in full_message and full_message["reasoning_content"]:
+                reasoning = full_message["reasoning_content"]
+            # Extract main content
+            main_content = ""
+            if hasattr(full_message, "content") and full_message.content:
+                main_content = full_message.content
+            elif "content" in full_message and full_message["content"]:
+                main_content = full_message["content"]
+            self.add_assistant_message(main_content, reasoning_content=reasoning)
+            token_usage = self._create_token_usage(response.usage)
+            logger.info(f"Received response from {self.model.provider.value} API with usage data")
+            return CompleteResponse(
+                content=main_content,
+                reasoning=reasoning,
+                usage=token_usage
+            )
+        except Exception as e:
+            logger.error(f"Error in {self.model.provider.value} API request: {str(e)}")
+            raise ValueError(f"Error in {self.model.provider.value} API request: {str(e)}")
+    async def _stream_user_message_to_llm(
+        self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs
+    ) -> AsyncGenerator[ChunkResponse, None]:
+        """
+        Streams the response from an OpenAI-compatible API.
+        Yields reasoning and content in separate chunks.
+        """
+        content = []
+        if user_message:
+            content.append({"type": "text", "text": user_message})
+        if image_urls:
+            for image_url in image_urls:
+                try:
+                    image_content = process_image(image_url)
+                    content.append(image_content)
+                    logger.info(f"Processed image for streaming: {image_url}")
+                except ValueError as e:
+                    logger.error(f"Error processing image for streaming {image_url}: {str(e)}")
+                    continue
+        self.add_user_message(content)
+        logger.debug(f"Prepared streaming message content: {content}")
+        # Initialize variables to track reasoning and main content
+        accumulated_reasoning = ""
+        accumulated_content = ""
+        try:
+            logger.info(f"Starting streaming request to {self.model.provider.value} API")
+            stream = self.client.chat.completions.create(
+                model=self.model.value,
+                messages=[msg.to_dict() for msg in self.messages],
+                max_tokens=self.max_tokens,
+                stream=True,
+                stream_options={"include_usage": True}
+            )
+            for chunk in stream:
+                chunk: ChatCompletionChunk
+                if not chunk.choices:
+                    continue
+                delta = chunk.choices[0].delta
+                # Process reasoning tokens (if supported by model)
+                reasoning_chunk = getattr(delta, "reasoning_content", None)
+                if reasoning_chunk:
+                    accumulated_reasoning += reasoning_chunk
+                    yield ChunkResponse(
+                        content="",
+                        reasoning=reasoning_chunk
+                    )
+                # Process main content tokens
+                main_token = delta.content
+                if main_token:
+                    accumulated_content += main_token
+                    yield ChunkResponse(
+                        content=main_token,
+                        reasoning=None
+                    )
+                # Yield token usage if available in the final chunk
+                if hasattr(chunk, "usage") and chunk.usage is not None:
+                    token_usage = self._create_token_usage(chunk.usage)
+                    yield ChunkResponse(
+                        content="",
+                        reasoning=None,
+                        is_complete=True,
+                        usage=token_usage
+                    )
+            # After streaming, add the fully accumulated assistant message to history
+            self.add_assistant_message(accumulated_content, reasoning_content=accumulated_reasoning)
+            logger.info(f"Completed streaming response from {self.model.provider.value} API")
+        except Exception as e:
+            logger.error(f"Error in {self.model.provider.value} API streaming: {str(e)}")
+            raise ValueError(f"Error in {self.model.provider.value} API streaming: {str(e)}")
+    async def cleanup(self):
+        await super().cleanup()

autobyteus 1.1.1__py3-none-any.whl → 1.1.3__py3-none-any.whl

autobyteus 1.1.1py3-none-any.whl → 1.1.3py3-none-any.whl