PyPI - autobyteus - Versions diffs - 1.1.5__py3-none-any.whl → 1.1.6__py3-none-any.whl - Mend

autobyteus 1.1.5py3-none-any.whl → 1.1.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

autobyteus/agent/context/agent_config.py +6 -1
autobyteus/agent/handlers/llm_user_message_ready_event_handler.py +30 -7
autobyteus/agent/handlers/user_input_message_event_handler.py +22 -25
autobyteus/agent/message/__init__.py +7 -5
autobyteus/agent/message/agent_input_user_message.py +6 -16
autobyteus/agent/message/context_file.py +24 -24
autobyteus/agent/message/context_file_type.py +29 -8
autobyteus/agent/message/multimodal_message_builder.py +47 -0
autobyteus/agent/streaming/stream_event_payloads.py +23 -4
autobyteus/agent/system_prompt_processor/tool_manifest_injector_processor.py +6 -2
autobyteus/agent/tool_invocation.py +2 -1
autobyteus/agent_team/bootstrap_steps/agent_configuration_preparation_step.py +9 -2
autobyteus/agent_team/context/agent_team_config.py +1 -0
autobyteus/llm/api/autobyteus_llm.py +33 -33
autobyteus/llm/api/bedrock_llm.py +13 -5
autobyteus/llm/api/claude_llm.py +13 -27
autobyteus/llm/api/gemini_llm.py +108 -42
autobyteus/llm/api/groq_llm.py +4 -3
autobyteus/llm/api/mistral_llm.py +97 -51
autobyteus/llm/api/nvidia_llm.py +6 -5
autobyteus/llm/api/ollama_llm.py +37 -12
autobyteus/llm/api/openai_compatible_llm.py +91 -91
autobyteus/llm/autobyteus_provider.py +1 -1
autobyteus/llm/base_llm.py +42 -139
autobyteus/llm/extensions/base_extension.py +6 -6
autobyteus/llm/extensions/token_usage_tracking_extension.py +3 -2
autobyteus/llm/llm_factory.py +106 -4
autobyteus/llm/token_counter/token_counter_factory.py +1 -1
autobyteus/llm/user_message.py +43 -35
autobyteus/llm/utils/llm_config.py +34 -18
autobyteus/llm/utils/media_payload_formatter.py +99 -0
autobyteus/llm/utils/messages.py +32 -25
autobyteus/llm/utils/response_types.py +9 -3
autobyteus/llm/utils/token_usage.py +6 -5
autobyteus/multimedia/__init__.py +31 -0
autobyteus/multimedia/audio/__init__.py +11 -0
autobyteus/multimedia/audio/api/__init__.py +4 -0
autobyteus/multimedia/audio/api/autobyteus_audio_client.py +59 -0
autobyteus/multimedia/audio/api/gemini_audio_client.py +219 -0
autobyteus/multimedia/audio/audio_client_factory.py +120 -0
autobyteus/multimedia/audio/audio_model.py +96 -0
autobyteus/multimedia/audio/autobyteus_audio_provider.py +108 -0
autobyteus/multimedia/audio/base_audio_client.py +40 -0
autobyteus/multimedia/image/__init__.py +11 -0
autobyteus/multimedia/image/api/__init__.py +9 -0
autobyteus/multimedia/image/api/autobyteus_image_client.py +97 -0
autobyteus/multimedia/image/api/gemini_image_client.py +188 -0
autobyteus/multimedia/image/api/openai_image_client.py +142 -0
autobyteus/multimedia/image/autobyteus_image_provider.py +109 -0
autobyteus/multimedia/image/base_image_client.py +67 -0
autobyteus/multimedia/image/image_client_factory.py +118 -0
autobyteus/multimedia/image/image_model.py +96 -0
autobyteus/multimedia/providers.py +5 -0
autobyteus/multimedia/runtimes.py +8 -0
autobyteus/multimedia/utils/__init__.py +10 -0
autobyteus/multimedia/utils/api_utils.py +19 -0
autobyteus/multimedia/utils/multimedia_config.py +29 -0
autobyteus/multimedia/utils/response_types.py +13 -0
autobyteus/tools/__init__.py +3 -0
autobyteus/tools/multimedia/__init__.py +8 -0
autobyteus/tools/multimedia/audio_tools.py +116 -0
autobyteus/tools/multimedia/image_tools.py +186 -0
autobyteus/tools/tool_category.py +1 -0
autobyteus/tools/usage/parsers/provider_aware_tool_usage_parser.py +5 -2
autobyteus/tools/usage/providers/tool_manifest_provider.py +5 -3
autobyteus/tools/usage/registries/tool_formatting_registry.py +9 -2
autobyteus/tools/usage/registries/tool_usage_parser_registry.py +9 -2
{autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/METADATA +9 -9
{autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/RECORD +73 -45
examples/run_browser_agent.py +1 -1
autobyteus/llm/utils/image_payload_formatter.py +0 -89
{autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/WHEEL +0 -0
{autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/licenses/LICENSE +0 -0
{autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/top_level.txt +0 -0

autobyteus/llm/api/mistral_llm.py CHANGED Viewed

@@ -1,45 +1,91 @@
-from typing import Dict, Optional, List, AsyncGenerator
+from typing import Dict, Optional, List, Any, AsyncGenerator, Union
 import os
 import logging
+import httpx
+import asyncio
 from autobyteus.llm.models import LLMModel
 from autobyteus.llm.base_llm import BaseLLM
 from mistralai import Mistral
-from autobyteus.llm.utils.messages import MessageRole, Message
+from autobyteus.llm.utils.messages import Message, MessageRole
 from autobyteus.llm.utils.llm_config import LLMConfig
 from autobyteus.llm.utils.token_usage import TokenUsage
 from autobyteus.llm.utils.response_types import CompleteResponse, ChunkResponse
+from autobyteus.llm.user_message import LLMUserMessage
+from autobyteus.llm.utils.media_payload_formatter import image_source_to_base64, get_mime_type, is_valid_image_path
-# Configure logger
 logger = logging.getLogger(__name__)
+async def _format_mistral_messages(messages: List[Message]) -> List[Dict[str, Any]]:
+    """Formats a list of internal Message objects into a list of dictionaries for the Mistral API."""
+    mistral_messages = []
+    for msg in messages:
+        # Skip empty messages from non-system roles as Mistral API may reject them
+        if not msg.content and not msg.image_urls and msg.role != MessageRole.SYSTEM:
+            continue
+        content: Union[str, List[Dict[str, Any]]]
+        if msg.image_urls:
+            content_parts: List[Dict[str, Any]] = []
+            if msg.content:
+                content_parts.append({"type": "text", "text": msg.content})
+            image_tasks = [image_source_to_base64(url) for url in msg.image_urls]
+            try:
+                base64_images = await asyncio.gather(*image_tasks)
+                for i, b64_image in enumerate(base64_images):
+                    original_url = msg.image_urls[i]
+                    mime_type = get_mime_type(original_url) if is_valid_image_path(original_url) else "image/jpeg"
+                    data_uri = f"data:{mime_type};base64,{b64_image}"
+                    # Mistral's format for image parts
+                    content_parts.append({
+                        "type": "image_url",
+                        "image_url": {
+                            "url": data_uri
+                        }
+                    })
+            except Exception as e:
+                logger.error(f"Error processing images for Mistral: {e}")
+            if msg.audio_urls:
+                logger.warning("MistralLLM does not yet support audio; skipping.")
+            if msg.video_urls:
+                logger.warning("MistralLLM does not yet support video; skipping.")
+            content = content_parts
+        else:
+            content = msg.content or ""
+        mistral_messages.append({"role": msg.role.value, "content": content})
+    return mistral_messages
 class MistralLLM(BaseLLM):
     def __init__(self, model: LLMModel = None, llm_config: LLMConfig = None):
-        # Provide defaults if not specified
         if model is None:
-            model = LLMModel.mistral_large
+            model = LLMModel['mistral-large']
         if llm_config is None:
             llm_config = LLMConfig()
         super().__init__(model=model, llm_config=llm_config)
-        self.client = self.initialize()
+        self.http_client = httpx.AsyncClient()
+        self.client: Mistral = self._initialize()
         logger.info(f"MistralLLM initialized with model: {self.model}")
-    @classmethod
-    def initialize(cls):
+    def _initialize(self) -> Mistral:
         mistral_api_key = os.environ.get("MISTRAL_API_KEY")
         if not mistral_api_key:
             logger.error("MISTRAL_API_KEY environment variable is not set")
-            raise ValueError(
-                "MISTRAL_API_KEY environment variable is not set. "
-                "Please set this variable in your environment."
-            )
+            raise ValueError("MISTRAL_API_KEY environment variable is not set.")
         try:
-            return Mistral(api_key=mistral_api_key)
+            return Mistral(api_key=mistral_api_key, client=self.http_client)
         except Exception as e:
             logger.error(f"Failed to initialize Mistral client: {str(e)}")
             raise ValueError(f"Failed to initialize Mistral client: {str(e)}")
-    def _create_token_usage(self, usage_data: Dict) -> TokenUsage:
+    def _create_token_usage(self, usage_data: Any) -> TokenUsage:
         """Convert Mistral usage data to TokenUsage format."""
         return TokenUsage(
             prompt_tokens=usage_data.prompt_tokens,
@@ -48,26 +94,26 @@ class MistralLLM(BaseLLM):
         )
     async def _send_user_message_to_llm(
-        self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs
+        self, user_message: LLMUserMessage, **kwargs
     ) -> CompleteResponse:
         self.add_user_message(user_message)
         try:
-            mistral_messages = [msg.to_mistral_message() for msg in self.messages]
+            mistral_messages = await _format_mistral_messages(self.messages)
-            chat_response = self.client.chat.complete(
+            chat_response = await self.client.chat.complete_async(
                 model=self.model.value,
                 messages=mistral_messages,
+                temperature=self.config.temperature,
+                max_tokens=self.config.max_tokens,
+                top_p=self.config.top_p,
             )
-            assistant_message = chat_response.choices.message.content
+            assistant_message = chat_response.choices[0].message.content
             self.add_assistant_message(assistant_message)
-            # Create token usage if available
-            token_usage = None
-            if hasattr(chat_response, 'usage') and chat_response.usage:
-                token_usage = self._create_token_usage(chat_response.usage)
-                logger.debug(f"Token usage recorded: {token_usage}")
+            token_usage = self._create_token_usage(chat_response.usage)
+            logger.debug(f"Token usage recorded: {token_usage}")
             return CompleteResponse(
                 content=assistant_message,
@@ -78,48 +124,48 @@ class MistralLLM(BaseLLM):
             raise ValueError(f"Error in Mistral API call: {str(e)}")
     async def _stream_user_message_to_llm(
-        self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs
+        self, user_message: LLMUserMessage, **kwargs
     ) -> AsyncGenerator[ChunkResponse, None]:
         self.add_user_message(user_message)
+        accumulated_message = ""
+        final_usage = None
         try:
-            mistral_messages = [msg.to_mistral_message() for msg in self.messages]
-            stream = await self.client.chat.stream_async(
+            mistral_messages = await _format_mistral_messages(self.messages)
+            stream = self.client.chat.stream_async(
                 model=self.model.value,
                 messages=mistral_messages,
+                temperature=self.config.temperature,
+                max_tokens=self.config.max_tokens,
+                top_p=self.config.top_p,
             )
-            accumulated_message = ""
             async for chunk in stream:
-                if chunk.data.choices.delta.content is not None:
-                    token = chunk.data.choices.delta.content
+                if chunk.choices and chunk.choices[0].delta.content is not None:
+                    token = chunk.choices[0].delta.content
                     accumulated_message += token
-                    # For intermediate chunks, yield without usage
-                    yield ChunkResponse(
-                        content=token,
-                        is_complete=False
-                    )
-                # Check if this is the last chunk with usage data
-                if hasattr(chunk.data, 'usage') and chunk.data.usage is not None:
-                    token_usage = self._create_token_usage(chunk.data.usage)
-                    yield ChunkResponse(
-                        content="",
-                        is_complete=True,
-                        usage=token_usage
-                    )
-            # After streaming is complete, store the full message
+                    yield ChunkResponse(content=token, is_complete=False)
+                if hasattr(chunk, 'usage') and chunk.usage:
+                    final_usage = self._create_token_usage(chunk.usage)
+            # Yield the final chunk with usage data
+            yield ChunkResponse(
+                content="",
+                is_complete=True,
+                usage=final_usage
+            )
             self.add_assistant_message(accumulated_message)
         except Exception as e:
             logger.error(f"Error in Mistral API streaming call: {str(e)}")
             raise ValueError(f"Error in Mistral API streaming call: {str(e)}")
     async def cleanup(self):
-        # Clean up any resources if needed
         logger.debug("Cleaning up MistralLLM instance")
-        self.messages = []
-        super().cleanup()
+        if self.http_client and not self.http_client.is_closed:
+            await self.http_client.aclose()
+        await super().cleanup()

autobyteus/llm/api/nvidia_llm.py CHANGED Viewed

@@ -8,6 +8,7 @@ from autobyteus.llm.utils.llm_config import LLMConfig
 from autobyteus.llm.utils.messages import MessageRole, Message
 from autobyteus.llm.utils.token_usage import TokenUsage
 from autobyteus.llm.utils.response_types import CompleteResponse, ChunkResponse
+from autobyteus.llm.user_message import LLMUserMessage
 logger = logging.getLogger(__name__)
@@ -38,11 +39,11 @@ class NvidiaLLM(BaseLLM):
         except Exception as e:
             raise ValueError(f"Failed to initialize Nvidia client: {str(e)}")
-    async def _send_user_message_to_llm(self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs) -> CompleteResponse:
+    async def _send_user_message_to_llm(self, user_message: LLMUserMessage, **kwargs) -> CompleteResponse:
         self.add_user_message(user_message)
         try:
             completion = self.client.chat.completions.create(
-                model=self.model,
+                model=self.model.value,
                 messages=[msg.to_dict() for msg in self.messages],
                 temperature=0,
                 top_p=1,
@@ -65,12 +66,12 @@ class NvidiaLLM(BaseLLM):
         except Exception as e:
             raise ValueError(f"Error in Nvidia API call: {str(e)}")
-    async def stream_response(self, user_message: str) -> AsyncGenerator[ChunkResponse, None]:
+    async def _stream_user_message_to_llm(self, user_message: LLMUserMessage, **kwargs) -> AsyncGenerator[ChunkResponse, None]:
         self.add_user_message(user_message)
         complete_response = ""
         try:
             completion = self.client.chat.completions.create(
-                model=self.model,
+                model=self.model.value,
                 messages=[msg.to_dict() for msg in self.messages],
                 temperature=0,
                 top_p=1,
@@ -104,4 +105,4 @@ class NvidiaLLM(BaseLLM):
             raise ValueError(f"Error in Nvidia API streaming call: {str(e)}")
     async def cleanup(self):
-        super().cleanup()
+        await super().cleanup()

autobyteus/llm/api/ollama_llm.py CHANGED Viewed

@@ -1,21 +1,22 @@
-from typing import Dict, Optional, List, AsyncGenerator
+from typing import Dict, Optional, List, AsyncGenerator, Any
 from ollama import AsyncClient, ChatResponse, ResponseError
+from ollama import Image  # FIX: Import the Image type from the ollama library
 from autobyteus.llm.models import LLMModel
 from autobyteus.llm.base_llm import BaseLLM
 from autobyteus.llm.utils.llm_config import LLMConfig
-from autobyteus.llm.utils.messages import MessageRole, Message
+from autobyteus.llm.utils.messages import Message
 from autobyteus.llm.utils.token_usage import TokenUsage
 from autobyteus.llm.utils.response_types import CompleteResponse, ChunkResponse
+from autobyteus.llm.user_message import LLMUserMessage
+from autobyteus.llm.utils.media_payload_formatter import image_source_to_base64
 import logging
 import asyncio
 import httpx
-import os
 logger = logging.getLogger(__name__)
 class OllamaLLM(BaseLLM):
     def __init__(self, model: LLMModel, llm_config: LLMConfig):
-        # The host URL is now passed via the model object, decoupling from environment variables here.
         if not model.host_url:
             raise ValueError("OllamaLLM requires a host_url to be set in its LLMModel object.")
@@ -26,16 +27,41 @@ class OllamaLLM(BaseLLM):
         super().__init__(model=model, llm_config=llm_config)
         logger.info(f"OllamaLLM initialized with model: {self.model.model_identifier}")
-    async def _send_user_message_to_llm(self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs) -> CompleteResponse:
+    async def _format_ollama_messages(self) -> List[Dict[str, Any]]:
+        """
+        Formats the conversation history for the Ollama API, including multimodal content.
+        """
+        formatted_messages = []
+        for msg in self.messages:
+            msg_dict = {
+                "role": msg.role.value,
+                "content": msg.content or ""
+            }
+            if msg.image_urls:
+                try:
+                    # Concurrently process all images using the centralized utility
+                    image_tasks = [image_source_to_base64(url) for url in msg.image_urls]
+                    prepared_base64_images = await asyncio.gather(*image_tasks)
+                    if prepared_base64_images:
+                        # FIX: Wrap each base64 string in the official ollama.Image object
+                        msg_dict["images"] = [Image(value=b64_string) for b64_string in prepared_base64_images]
+                except Exception as e:
+                    logger.error(f"Error processing images for Ollama, skipping them. Error: {e}")
+            formatted_messages.append(msg_dict)
+        return formatted_messages
+    async def _send_user_message_to_llm(self, user_message: LLMUserMessage, **kwargs) -> CompleteResponse:
         self.add_user_message(user_message)
         try:
+            formatted_messages = await self._format_ollama_messages()
             response: ChatResponse = await self.client.chat(
                 model=self.model.value,
-                messages=[msg.to_dict() for msg in self.messages]
+                messages=formatted_messages
             )
             assistant_message = response['message']['content']
-            # Detect and process reasoning content using <think> markers
             reasoning_content = None
             main_content = assistant_message
             if "<think>" in assistant_message and "</think>" in assistant_message:
@@ -69,7 +95,7 @@ class OllamaLLM(BaseLLM):
             raise
     async def _stream_user_message_to_llm(
-        self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs
+        self, user_message: LLMUserMessage, **kwargs
     ) -> AsyncGenerator[ChunkResponse, None]:
         self.add_user_message(user_message)
         accumulated_main = ""
@@ -78,17 +104,16 @@ class OllamaLLM(BaseLLM):
         final_response = None
         try:
+            formatted_messages = await self._format_ollama_messages()
             async for part in await self.client.chat(
                 model=self.model.value,
-                messages=[msg.to_dict() for msg in self.messages],
+                messages=formatted_messages,
                 stream=True
             ):
                 token = part['message']['content']
-                # Simple state machine for <think> tags
                 if "<think>" in token:
                     in_reasoning = True
-                    # In case token is like "...</think><think>...", handle it
                     parts = token.split("<think>")
                     token = parts[-1]
@@ -130,4 +155,4 @@ class OllamaLLM(BaseLLM):
             raise
     async def cleanup(self):
-        await super().cleanup()
+        await super().cleanup()

autobyteus/llm/api/openai_compatible_llm.py CHANGED Viewed

@@ -1,20 +1,62 @@
 import logging
 import os
 from abc import ABC
-from typing import Optional, List, AsyncGenerator
+from typing import Optional, List, AsyncGenerator, Dict, Any
 from openai import OpenAI
 from openai.types.completion_usage import CompletionUsage
 from openai.types.chat import ChatCompletionChunk
+import asyncio
 from autobyteus.llm.base_llm import BaseLLM
 from autobyteus.llm.models import LLMModel
 from autobyteus.llm.utils.llm_config import LLMConfig
-from autobyteus.llm.utils.image_payload_formatter import process_image
+from autobyteus.llm.utils.media_payload_formatter import image_source_to_base64, create_data_uri, get_mime_type, is_valid_image_path
 from autobyteus.llm.utils.token_usage import TokenUsage
 from autobyteus.llm.utils.response_types import CompleteResponse, ChunkResponse
+from autobyteus.llm.user_message import LLMUserMessage
+from autobyteus.llm.utils.messages import Message
 logger = logging.getLogger(__name__)
+async def _format_openai_history(messages: List[Message]) -> List[Dict[str, Any]]:
+    """A local async function to format history for the OpenAI SDK, handling image processing."""
+    formatted_messages = []
+    for msg in messages:
+        # For multimodal messages, build the content list of parts
+        if msg.image_urls or msg.audio_urls or msg.video_urls:
+            content_parts: List[Dict[str, Any]] = []
+            if msg.content:
+                content_parts.append({"type": "text", "text": msg.content})
+            image_tasks = []
+            if msg.image_urls:
+                for url in msg.image_urls:
+                    # Create an async task for each image to process them concurrently
+                    image_tasks.append(image_source_to_base64(url))
+            try:
+                base64_images = await asyncio.gather(*image_tasks)
+                for i, b64_image in enumerate(base64_images):
+                    original_url = msg.image_urls[i]
+                    # Determine mime type from original path if possible, otherwise default
+                    mime_type = get_mime_type(original_url) if is_valid_image_path(original_url) else "image/jpeg"
+                    content_parts.append(create_data_uri(mime_type, b64_image))
+            except Exception as e:
+                logger.error(f"Error processing one or more images: {e}")
+            # Placeholder for future audio/video processing
+            if msg.audio_urls:
+                logger.warning("OpenAI compatible layer does not yet support audio; skipping.")
+            if msg.video_urls:
+                logger.warning("OpenAI compatible layer does not yet support video; skipping.")
+            formatted_messages.append({"role": msg.role.value, "content": content_parts})
+        else:
+            # For text-only messages, use the simple string format
+            formatted_messages.append({"role": msg.role.value, "content": msg.content})
+    return formatted_messages
 class OpenAICompatibleLLM(BaseLLM, ABC):
     def __init__(
         self,
@@ -24,18 +66,6 @@ class OpenAICompatibleLLM(BaseLLM, ABC):
         base_url: str,
         api_key_default: Optional[str] = None
     ):
-        """
-        Initializes an OpenAI-compatible LLM.
-        Args:
-            model (LLMModel): The model to use.
-            llm_config (LLMConfig): Configuration for the LLM.
-            api_key_env_var (str): The name of the environment variable for the API key.
-            base_url (str): The base URL for the API.
-            api_key_default (Optional[str], optional): A default API key to use if the
-                                                       environment variable is not set.
-                                                       Defaults to None.
-        """
         api_key = os.getenv(api_key_env_var)
         if not api_key:
             if api_key_default:
@@ -49,13 +79,11 @@ class OpenAICompatibleLLM(BaseLLM, ABC):
         logger.info(f"Initialized OpenAI compatible client with base_url: {base_url}")
         super().__init__(model=model, llm_config=llm_config)
-        self.max_tokens = 8000 # A default, can be overridden by subclass or config
+        self.max_tokens = 8000
     def _create_token_usage(self, usage_data: Optional[CompletionUsage]) -> Optional[TokenUsage]:
-        """Convert usage data to TokenUsage format."""
         if not usage_data:
             return None
         return TokenUsage(
             prompt_tokens=usage_data.prompt_tokens,
             completion_tokens=usage_data.completion_tokens,
@@ -63,53 +91,41 @@ class OpenAICompatibleLLM(BaseLLM, ABC):
         )
     async def _send_user_message_to_llm(
-        self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs
+        self, user_message: LLMUserMessage, **kwargs
     ) -> CompleteResponse:
-        """
-        Sends a non-streaming request to an OpenAI-compatible API.
-        Supports optional reasoning content if provided in the response.
-        """
-        content = []
-        if user_message:
-            content.append({"type": "text", "text": user_message})
-        if image_urls:
-            for image_url in image_urls:
-                try:
-                    image_content = process_image(image_url)
-                    content.append(image_content)
-                    logger.info(f"Processed image: {image_url}")
-                except ValueError as e:
-                    logger.error(f"Error processing image {image_url}: {str(e)}")
-                    continue
-        self.add_user_message(content)
-        logger.debug(f"Prepared message content: {content}")
+        self.add_user_message(user_message)
         try:
+            formatted_messages = await _format_openai_history(self.messages)
             logger.info(f"Sending request to {self.model.provider.value} API")
-            response = self.client.chat.completions.create(
-                model=self.model.value,
-                messages=[msg.to_dict() for msg in self.messages],
-                max_tokens=self.max_tokens,
-            )
+            params: Dict[str, Any] = {
+                "model": self.model.value,
+                "messages": formatted_messages,
+            }
+            if self.config.uses_max_completion_tokens:
+                params["max_completion_tokens"] = self.max_tokens
+            else:
+                params["max_tokens"] = self.max_tokens
+            response = self.client.chat.completions.create(**params)
             full_message = response.choices[0].message
-            # Extract reasoning_content if present
+            # --- PRESERVED ORIGINAL LOGIC ---
             reasoning = None
             if hasattr(full_message, "reasoning_content") and full_message.reasoning_content:
                 reasoning = full_message.reasoning_content
             elif "reasoning_content" in full_message and full_message["reasoning_content"]:
                 reasoning = full_message["reasoning_content"]
-            # Extract main content
             main_content = ""
             if hasattr(full_message, "content") and full_message.content:
                 main_content = full_message.content
             elif "content" in full_message and full_message["content"]:
                 main_content = full_message["content"]
+            # --- END PRESERVED LOGIC ---
             self.add_assistant_message(main_content, reasoning_content=reasoning)
             token_usage = self._create_token_usage(response.usage)
@@ -125,43 +141,30 @@ class OpenAICompatibleLLM(BaseLLM, ABC):
             raise ValueError(f"Error in {self.model.provider.value} API request: {str(e)}")
     async def _stream_user_message_to_llm(
-        self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs
+        self, user_message: LLMUserMessage, **kwargs
     ) -> AsyncGenerator[ChunkResponse, None]:
-        """
-        Streams the response from an OpenAI-compatible API.
-        Yields reasoning and content in separate chunks.
-        """
-        content = []
-        if user_message:
-            content.append({"type": "text", "text": user_message})
-        if image_urls:
-            for image_url in image_urls:
-                try:
-                    image_content = process_image(image_url)
-                    content.append(image_content)
-                    logger.info(f"Processed image for streaming: {image_url}")
-                except ValueError as e:
-                    logger.error(f"Error processing image for streaming {image_url}: {str(e)}")
-                    continue
+        self.add_user_message(user_message)
-        self.add_user_message(content)
-        logger.debug(f"Prepared streaming message content: {content}")
-        # Initialize variables to track reasoning and main content
         accumulated_reasoning = ""
         accumulated_content = ""
         try:
+            formatted_messages = await _format_openai_history(self.messages)
             logger.info(f"Starting streaming request to {self.model.provider.value} API")
-            stream = self.client.chat.completions.create(
-                model=self.model.value,
-                messages=[msg.to_dict() for msg in self.messages],
-                max_tokens=self.max_tokens,
-                stream=True,
-                stream_options={"include_usage": True}
-            )
+            params: Dict[str, Any] = {
+                "model": self.model.value,
+                "messages": formatted_messages,
+                "stream": True,
+                "stream_options": {"include_usage": True},
+            }
+            if self.config.uses_max_completion_tokens:
+                params["max_completion_tokens"] = self.max_tokens
+            else:
+                params["max_tokens"] = self.max_tokens
+            stream = self.client.chat.completions.create(**params)
             for chunk in stream:
                 chunk: ChatCompletionChunk
@@ -170,25 +173,23 @@ class OpenAICompatibleLLM(BaseLLM, ABC):
                 delta = chunk.choices[0].delta
-                # Process reasoning tokens (if supported by model)
-                reasoning_chunk = getattr(delta, "reasoning_content", None)
+                # --- PRESERVED ORIGINAL LOGIC (adapted for streaming) ---
+                reasoning_chunk = None
+                if hasattr(delta, "reasoning_content") and delta.reasoning_content:
+                    reasoning_chunk = delta.reasoning_content
+                elif isinstance(delta, dict) and "reasoning_content" in delta and delta["reasoning_content"]:
+                    reasoning_chunk = delta["reasoning_content"]
                 if reasoning_chunk:
                     accumulated_reasoning += reasoning_chunk
-                    yield ChunkResponse(
-                        content="",
-                        reasoning=reasoning_chunk
-                    )
+                    yield ChunkResponse(content="", reasoning=reasoning_chunk)
+                # --- END PRESERVED LOGIC ---
-                # Process main content tokens
                 main_token = delta.content
                 if main_token:
                     accumulated_content += main_token
-                    yield ChunkResponse(
-                        content=main_token,
-                        reasoning=None
-                    )
+                    yield ChunkResponse(content=main_token, reasoning=None)
-                # Yield token usage if available in the final chunk
                 if hasattr(chunk, "usage") and chunk.usage is not None:
                     token_usage = self._create_token_usage(chunk.usage)
                     yield ChunkResponse(
@@ -198,7 +199,6 @@ class OpenAICompatibleLLM(BaseLLM, ABC):
                         usage=token_usage
                     )
-            # After streaming, add the fully accumulated assistant message to history
             self.add_assistant_message(accumulated_content, reasoning_content=accumulated_reasoning)
             logger.info(f"Completed streaming response from {self.model.provider.value} API")

autobyteus/llm/autobyteus_provider.py CHANGED Viewed

@@ -49,7 +49,7 @@ class AutobyteusModelProvider:
                 try:
                     # Instantiate client for this specific host
                     client = AutobyteusClient(server_url=host_url)
-                    response = client.get_available_models_sync()
+                    response = client.get_available_llm_models_sync()
                 except Exception as e:
                     logger.warning(f"Could not connect or fetch models from Autobyteus server at {host_url}: {e}")
                     continue

autobyteus 1.1.5__py3-none-any.whl → 1.1.6__py3-none-any.whl

autobyteus 1.1.5py3-none-any.whl → 1.1.6py3-none-any.whl