PyPI - autobyteus - Versions diffs - 1.1.5__py3-none-any.whl → 1.1.6__py3-none-any.whl - Mend

autobyteus 1.1.5py3-none-any.whl → 1.1.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

autobyteus/agent/context/agent_config.py +6 -1
autobyteus/agent/handlers/llm_user_message_ready_event_handler.py +30 -7
autobyteus/agent/handlers/user_input_message_event_handler.py +22 -25
autobyteus/agent/message/__init__.py +7 -5
autobyteus/agent/message/agent_input_user_message.py +6 -16
autobyteus/agent/message/context_file.py +24 -24
autobyteus/agent/message/context_file_type.py +29 -8
autobyteus/agent/message/multimodal_message_builder.py +47 -0
autobyteus/agent/streaming/stream_event_payloads.py +23 -4
autobyteus/agent/system_prompt_processor/tool_manifest_injector_processor.py +6 -2
autobyteus/agent/tool_invocation.py +2 -1
autobyteus/agent_team/bootstrap_steps/agent_configuration_preparation_step.py +9 -2
autobyteus/agent_team/context/agent_team_config.py +1 -0
autobyteus/llm/api/autobyteus_llm.py +33 -33
autobyteus/llm/api/bedrock_llm.py +13 -5
autobyteus/llm/api/claude_llm.py +13 -27
autobyteus/llm/api/gemini_llm.py +108 -42
autobyteus/llm/api/groq_llm.py +4 -3
autobyteus/llm/api/mistral_llm.py +97 -51
autobyteus/llm/api/nvidia_llm.py +6 -5
autobyteus/llm/api/ollama_llm.py +37 -12
autobyteus/llm/api/openai_compatible_llm.py +91 -91
autobyteus/llm/autobyteus_provider.py +1 -1
autobyteus/llm/base_llm.py +42 -139
autobyteus/llm/extensions/base_extension.py +6 -6
autobyteus/llm/extensions/token_usage_tracking_extension.py +3 -2
autobyteus/llm/llm_factory.py +106 -4
autobyteus/llm/token_counter/token_counter_factory.py +1 -1
autobyteus/llm/user_message.py +43 -35
autobyteus/llm/utils/llm_config.py +34 -18
autobyteus/llm/utils/media_payload_formatter.py +99 -0
autobyteus/llm/utils/messages.py +32 -25
autobyteus/llm/utils/response_types.py +9 -3
autobyteus/llm/utils/token_usage.py +6 -5
autobyteus/multimedia/__init__.py +31 -0
autobyteus/multimedia/audio/__init__.py +11 -0
autobyteus/multimedia/audio/api/__init__.py +4 -0
autobyteus/multimedia/audio/api/autobyteus_audio_client.py +59 -0
autobyteus/multimedia/audio/api/gemini_audio_client.py +219 -0
autobyteus/multimedia/audio/audio_client_factory.py +120 -0
autobyteus/multimedia/audio/audio_model.py +96 -0
autobyteus/multimedia/audio/autobyteus_audio_provider.py +108 -0
autobyteus/multimedia/audio/base_audio_client.py +40 -0
autobyteus/multimedia/image/__init__.py +11 -0
autobyteus/multimedia/image/api/__init__.py +9 -0
autobyteus/multimedia/image/api/autobyteus_image_client.py +97 -0
autobyteus/multimedia/image/api/gemini_image_client.py +188 -0
autobyteus/multimedia/image/api/openai_image_client.py +142 -0
autobyteus/multimedia/image/autobyteus_image_provider.py +109 -0
autobyteus/multimedia/image/base_image_client.py +67 -0
autobyteus/multimedia/image/image_client_factory.py +118 -0
autobyteus/multimedia/image/image_model.py +96 -0
autobyteus/multimedia/providers.py +5 -0
autobyteus/multimedia/runtimes.py +8 -0
autobyteus/multimedia/utils/__init__.py +10 -0
autobyteus/multimedia/utils/api_utils.py +19 -0
autobyteus/multimedia/utils/multimedia_config.py +29 -0
autobyteus/multimedia/utils/response_types.py +13 -0
autobyteus/tools/__init__.py +3 -0
autobyteus/tools/multimedia/__init__.py +8 -0
autobyteus/tools/multimedia/audio_tools.py +116 -0
autobyteus/tools/multimedia/image_tools.py +186 -0
autobyteus/tools/tool_category.py +1 -0
autobyteus/tools/usage/parsers/provider_aware_tool_usage_parser.py +5 -2
autobyteus/tools/usage/providers/tool_manifest_provider.py +5 -3
autobyteus/tools/usage/registries/tool_formatting_registry.py +9 -2
autobyteus/tools/usage/registries/tool_usage_parser_registry.py +9 -2
{autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/METADATA +9 -9
{autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/RECORD +73 -45
examples/run_browser_agent.py +1 -1
autobyteus/llm/utils/image_payload_formatter.py +0 -89
{autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/WHEEL +0 -0
{autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/licenses/LICENSE +0 -0
{autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/top_level.txt +0 -0

autobyteus/agent/context/agent_config.py CHANGED Viewed

@@ -37,6 +37,7 @@ class AgentConfig:
                  system_prompt: Optional[str] = None,
                  tools: Optional[List['BaseTool']] = None,
                  auto_execute_tools: bool = True,
+                 use_xml_tool_format: bool = False,
                  input_processors: Optional[List['BaseAgentUserInputMessageProcessor']] = None,
                  llm_response_processors: Optional[List['BaseLLMResponseProcessor']] = None,
                  system_prompt_processors: Optional[List['BaseSystemPromptProcessor']] = None,
@@ -57,6 +58,8 @@ class AgentConfig:
                            llm_instance's config will be used as the base.
             tools: An optional list of pre-initialized tool instances (subclasses of BaseTool).
             auto_execute_tools: If True, the agent will execute tools without approval.
+            use_xml_tool_format: If True, forces the agent to use XML format for tool
+                                 definitions and parsing, overriding provider defaults.
             input_processors: A list of input processor instances.
             llm_response_processors: A list of LLM response processor instances.
             system_prompt_processors: A list of system prompt processor instances.
@@ -74,6 +77,7 @@ class AgentConfig:
         self.tools = tools or []
         self.workspace = workspace
         self.auto_execute_tools = auto_execute_tools
+        self.use_xml_tool_format = use_xml_tool_format
         self.input_processors = input_processors or []
         self.llm_response_processors = llm_response_processors if llm_response_processors is not None else list(self.DEFAULT_LLM_RESPONSE_PROCESSORS)
         self.system_prompt_processors = system_prompt_processors if system_prompt_processors is not None else list(self.DEFAULT_SYSTEM_PROMPT_PROCESSORS)
@@ -81,7 +85,7 @@ class AgentConfig:
         self.phase_hooks = phase_hooks or []
         self.initial_custom_data = initial_custom_data
-        logger.debug(f"AgentConfig created for name '{self.name}', role '{self.role}'.")
+        logger.debug(f"AgentConfig created for name '{self.name}', role '{self.role}'. XML tool format override: {self.use_xml_tool_format}")
     def copy(self) -> 'AgentConfig':
         """
@@ -98,6 +102,7 @@ class AgentConfig:
             system_prompt=self.system_prompt,
             tools=self.tools.copy(),  # Shallow copy the list, but reference the original tool instances
             auto_execute_tools=self.auto_execute_tools,
+            use_xml_tool_format=self.use_xml_tool_format,
             input_processors=self.input_processors.copy(), # Shallow copy the list
             llm_response_processors=self.llm_response_processors.copy(), # Shallow copy the list
             system_prompt_processors=self.system_prompt_processors.copy(), # Shallow copy the list

autobyteus/agent/handlers/llm_user_message_ready_event_handler.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # file: autobyteus/autobyteus/agent/handlers/llm_user_message_ready_event_handler.py
 import logging
 import traceback
-from typing import TYPE_CHECKING, cast, Optional
+from typing import TYPE_CHECKING, cast, Optional, List
 from autobyteus.agent.handlers.base_event_handler import AgentEventHandler
 from autobyteus.agent.events import LLMUserMessageReadyEvent, LLMCompleteResponseReceivedEvent
@@ -53,6 +53,9 @@ class LLMUserMessageReadyEventHandler(AgentEventHandler):
         complete_response_text = ""
         complete_reasoning_text = ""
         token_usage: Optional[TokenUsage] = None
+        complete_image_urls: List[str] = []
+        complete_audio_urls: List[str] = []
+        complete_video_urls: List[str] = []
         notifier: Optional['AgentExternalEventNotifier'] = None
         if context.phase_manager:
@@ -72,9 +75,19 @@ class LLMUserMessageReadyEventHandler(AgentEventHandler):
                 if chunk_response.reasoning:
                     complete_reasoning_text += chunk_response.reasoning
-                if chunk_response.is_complete and chunk_response.usage:
-                    token_usage = chunk_response.usage
-                    logger.debug(f"Agent '{agent_id}' received final chunk with token usage: {token_usage}")
+                if chunk_response.is_complete:
+                    if chunk_response.usage:
+                        token_usage = chunk_response.usage
+                        logger.debug(f"Agent '{agent_id}' received final chunk with token usage: {token_usage}")
+                    if chunk_response.image_urls:
+                        complete_image_urls.extend(chunk_response.image_urls)
+                        logger.debug(f"Agent '{agent_id}' received final chunk with {len(chunk_response.image_urls)} image URLs.")
+                    if chunk_response.audio_urls:
+                        complete_audio_urls.extend(chunk_response.audio_urls)
+                        logger.debug(f"Agent '{agent_id}' received final chunk with {len(chunk_response.audio_urls)} audio URLs.")
+                    if chunk_response.video_urls:
+                        complete_video_urls.extend(chunk_response.video_urls)
+                        logger.debug(f"Agent '{agent_id}' received final chunk with {len(chunk_response.video_urls)} video URLs.")
                 if notifier:
                     try:
@@ -121,20 +134,30 @@ class LLMUserMessageReadyEventHandler(AgentEventHandler):
             logger.info(f"Agent '{agent_id}' enqueued LLMCompleteResponseReceivedEvent with error details from LLMUserMessageReadyEventHandler.")
             return
-        # Add message to history with reasoning
+        # Add message to history with reasoning and multimodal data
         history_entry = {"role": "assistant", "content": complete_response_text}
         if complete_reasoning_text:
             history_entry["reasoning"] = complete_reasoning_text
+        if complete_image_urls:
+            history_entry["image_urls"] = complete_image_urls
+        if complete_audio_urls:
+            history_entry["audio_urls"] = complete_audio_urls
+        if complete_video_urls:
+            history_entry["video_urls"] = complete_video_urls
         context.state.add_message_to_history(history_entry)
-        # Create complete response with reasoning
+        # Create complete response with reasoning and multimodal data
         complete_response_obj = CompleteResponse(
             content=complete_response_text,
             reasoning=complete_reasoning_text,
-            usage=token_usage
+            usage=token_usage,
+            image_urls=complete_image_urls,
+            audio_urls=complete_audio_urls,
+            video_urls=complete_video_urls
         )
         llm_complete_event = LLMCompleteResponseReceivedEvent(
             complete_response=complete_response_obj
         )
         await context.input_event_queues.enqueue_internal_system_event(llm_complete_event)
         logger.info(f"Agent '{agent_id}' enqueued LLMCompleteResponseReceivedEvent from LLMUserMessageReadyEventHandler.")

autobyteus/agent/handlers/user_input_message_event_handler.py CHANGED Viewed

@@ -3,14 +3,14 @@ import logging
 from typing import TYPE_CHECKING
 from autobyteus.agent.handlers.base_event_handler import AgentEventHandler
-from autobyteus.agent.events import UserMessageReceivedEvent, LLMUserMessageReadyEvent
-from autobyteus.agent.message.agent_input_user_message import AgentInputUserMessage
+from autobyteus.agent.events import UserMessageReceivedEvent, LLMUserMessageReadyEvent
+from autobyteus.agent.message.agent_input_user_message import AgentInputUserMessage
 from autobyteus.agent.input_processor import BaseAgentUserInputMessageProcessor
-from autobyteus.llm.user_message import LLMUserMessage
+from autobyteus.agent.message.multimodal_message_builder import build_llm_user_message
 if TYPE_CHECKING:
-    from autobyteus.agent.context import AgentContext
+    from autobyteus.agent.context import AgentContext
     from autobyteus.agent.events.notifiers import AgentExternalEventNotifier
 logger = logging.getLogger(__name__)
@@ -18,24 +18,23 @@ logger = logging.getLogger(__name__)
 class UserInputMessageEventHandler(AgentEventHandler):
     """
     Handles UserMessageReceivedEvents by first applying any configured
-    AgentUserInputMessageProcessors (provided as instances) to the AgentInputUserMessage,
-    then converting the processed message into an LLMUserMessage, and finally
+    AgentUserInputMessageProcessors, then using the multimodal_message_builder
+    to convert the processed message into an LLMUserMessage, and finally
     enqueuing an LLMUserMessageReadyEvent for further processing by the LLM.
-    It also checks for metadata to emit special notifications for system-generated tasks.
     """
     def __init__(self):
         logger.info("UserInputMessageEventHandler initialized.")
     async def handle(self,
-                     event: UserMessageReceivedEvent,
+                     event: UserMessageReceivedEvent,
                      context: 'AgentContext') -> None:
-        if not isinstance(event, UserMessageReceivedEvent):
+        if not isinstance(event, UserMessageReceivedEvent):
             logger.warning(f"UserInputMessageEventHandler received non-UserMessageReceivedEvent: {type(event)}. Skipping.")
             return
-        original_agent_input_user_msg: AgentInputUserMessage = event.agent_input_user_message
+        original_agent_input_user_msg: AgentInputUserMessage = event.agent_input_user_message
         # --- NEW LOGIC: Check metadata for system-generated tasks and notify TUI ---
         if original_agent_input_user_msg.metadata.get('source') == 'system_task_notifier':
             if context.phase_manager:
@@ -47,11 +46,11 @@ class UserInputMessageEventHandler(AgentEventHandler):
                 notifier.notify_agent_data_system_task_notification_received(notification_data)
                 logger.info(f"Agent '{context.agent_id}' emitted system task notification for TUI.")
         # --- END NEW LOGIC ---
-        processed_agent_input_user_msg: AgentInputUserMessage = original_agent_input_user_msg
-        logger.info(f"Agent '{context.agent_id}' handling UserMessageReceivedEvent: '{original_agent_input_user_msg.content}'")
+        processed_agent_input_user_msg: AgentInputUserMessage = original_agent_input_user_msg
+        logger.info(f"Agent '{context.agent_id}' handling UserMessageReceivedEvent: '{original_agent_input_user_msg.content}'")
         processor_instances = context.config.input_processors
         if processor_instances:
             processor_names = [p.get_name() for p in processor_instances]
@@ -62,14 +61,14 @@ class UserInputMessageEventHandler(AgentEventHandler):
                     if not isinstance(processor_instance, BaseAgentUserInputMessageProcessor):
                         logger.error(f"Agent '{context.agent_id}': Invalid input processor type in config: {type(processor_instance)}. Skipping.")
                         continue
                     processor_name_for_log = processor_instance.get_name()
                     logger.debug(f"Agent '{context.agent_id}': Applying input processor '{processor_name_for_log}'.")
                     msg_before_this_processor = processed_agent_input_user_msg
                     # Pass the original event to the processor
                     processed_agent_input_user_msg = await processor_instance.process(
-                        message=msg_before_this_processor,
-                        context=context,
+                        message=msg_before_this_processor,
+                        context=context,
                         triggering_event=event
                     )
                     logger.info(f"Agent '{context.agent_id}': Input processor '{processor_name_for_log}' applied successfully.")
@@ -81,12 +80,10 @@ class UserInputMessageEventHandler(AgentEventHandler):
         else:
             logger.debug(f"Agent '{context.agent_id}': No input processors configured in agent config.")
-        llm_user_message = LLMUserMessage(
-            content=processed_agent_input_user_msg.content,
-            image_urls=processed_agent_input_user_msg.image_urls
-        )
+        # --- Refactored: Use the dedicated builder ---
+        llm_user_message = build_llm_user_message(processed_agent_input_user_msg)
-        llm_user_message_ready_event = LLMUserMessageReadyEvent(llm_user_message=llm_user_message)
+        llm_user_message_ready_event = LLMUserMessageReadyEvent(llm_user_message=llm_user_message)
         await context.input_event_queues.enqueue_internal_system_event(llm_user_message_ready_event)
         logger.info(f"Agent '{context.agent_id}' processed AgentInputUserMessage and enqueued LLMUserMessageReadyEvent.")

autobyteus/agent/message/__init__.py CHANGED Viewed

@@ -9,12 +9,14 @@ from .agent_input_user_message import AgentInputUserMessage
 from .send_message_to import SendMessageTo
 from .context_file import ContextFile
 from .context_file_type import ContextFileType
+from .multimodal_message_builder import build_llm_user_message
 __all__ = [
-    "InterAgentMessage",
-    "InterAgentMessageType",
-    "AgentInputUserMessage",
+    "InterAgentMessage",
+    "InterAgentMessageType",
+    "AgentInputUserMessage",
     "SendMessageTo",
-    "ContextFile",
-    "ContextFileType",
+    "ContextFile",
+    "ContextFileType",
+    "build_llm_user_message",
 ]

autobyteus/agent/message/agent_input_user_message.py CHANGED Viewed

@@ -8,21 +8,18 @@ from .context_file import ContextFile # Import the new ContextFile dataclass
 logger = logging.getLogger(__name__)
 @dataclass
-class AgentInputUserMessage:
+class AgentInputUserMessage:
     """
     Represents a message received from an external user interacting with the agent system.
-    This is a simple dataclass. It includes support for a list of ContextFile objects,
-    allowing users to provide various documents as context.
+    This is a simple dataclass. It includes support for a list of ContextFile objects,
+    allowing users to provide various documents and media as context via a single list.
     """
     content: str
-    image_urls: Optional[List[str]] = field(default=None) # Basic list of strings
     context_files: Optional[List[ContextFile]] = field(default=None)
     metadata: Dict[str, Any] = field(default_factory=dict)
     def __post_init__(self):
         # Basic type validation that dataclasses don't do automatically for mutable defaults or complex types
-        if self.image_urls is not None and not (isinstance(self.image_urls, list) and all(isinstance(url, str) for url in self.image_urls)):
-            raise TypeError("AgentInputUserMessage 'image_urls' must be a list of strings if provided.")
         if self.context_files is not None and not (isinstance(self.context_files, list) and all(isinstance(cf, ContextFile) for cf in self.context_files)):
             raise TypeError("AgentInputUserMessage 'context_files' must be a list of ContextFile objects if provided.")
         if not isinstance(self.metadata, dict): # Should be caught by default_factory, but good practice
@@ -34,7 +31,7 @@ class AgentInputUserMessage:
             num_context_files = len(self.context_files) if self.context_files else 0
             logger.debug(
                 f"AgentInputUserMessage initialized. Content: '{self.content[:50]}...', "
-                f"Image URLs: {self.image_urls}, Num ContextFiles: {num_context_files}, "
+                f"Num ContextFiles: {num_context_files}, "
                 f"Metadata keys: {list(self.metadata.keys())}"
             )
@@ -47,7 +44,6 @@ class AgentInputUserMessage:
         return {
             "content": self.content,
-            "image_urls": self.image_urls,
             "context_files": context_files_dict_list,
             "metadata": self.metadata,
         }
@@ -59,31 +55,25 @@ class AgentInputUserMessage:
         if not isinstance(content, str): # Ensure content is string
             raise ValueError("AgentInputUserMessage 'content' in dictionary must be a string.")
-        image_urls = data.get("image_urls")
-        if image_urls is not None and not (isinstance(image_urls, list) and all(isinstance(url, str) for url in image_urls)):
-            raise ValueError("AgentInputUserMessage 'image_urls' in dictionary must be a list of strings if provided.")
         context_files_data = data.get("context_files")
         context_files_list: Optional[List[ContextFile]] = None
         if context_files_data is not None:
             if not isinstance(context_files_data, list):
                 raise ValueError("AgentInputUserMessage 'context_files' in dictionary must be a list if provided.")
             context_files_list = [ContextFile.from_dict(cf_data) for cf_data in context_files_data]
         metadata = data.get("metadata", {})
         if not isinstance(metadata, dict):
             raise ValueError("AgentInputUserMessage 'metadata' in dictionary must be a dict if provided.")
         return cls(
             content=content,
-            image_urls=image_urls,
             context_files=context_files_list,
             metadata=metadata
         )
     def __repr__(self) -> str:
         content_preview = f"{self.content[:100]}..." if len(self.content) > 100 else self.content
-        images_repr = f", image_urls={self.image_urls}" if self.image_urls else ""
         if self.context_files:
             context_repr = f", context_files=[{len(self.context_files)} ContextFile(s)]"
@@ -93,4 +83,4 @@ class AgentInputUserMessage:
         meta_repr = f", metadata_keys={list(self.metadata.keys())}" if self.metadata else ""
         return (f"AgentInputUserMessage(content='{content_preview}'"
-                f"{images_repr}{context_repr}{meta_repr})")
+                f"{context_repr}{meta_repr})")

autobyteus/agent/message/context_file.py CHANGED Viewed

@@ -3,6 +3,7 @@ import os
 import logging
 from typing import Optional, Dict, Any
 from dataclasses import dataclass, field
+from urllib.parse import urlparse
 from .context_file_type import ContextFileType
@@ -12,10 +13,9 @@ logger = logging.getLogger(__name__)
 class ContextFile:
     """
     Represents a single context file provided to an agent.
-    This is a simple dataclass, deferring path validation and file access
-    to input processors.
+    The 'uri' can be a local file path or a network URL.
     """
-    path: str
+    uri: str
     file_type: ContextFileType = ContextFileType.UNKNOWN
     file_name: Optional[str] = None
     metadata: Dict[str, Any] = field(default_factory=dict)
@@ -25,33 +25,33 @@ class ContextFile:
         Called after the dataclass's __init__ method.
         Used here to infer file_name and file_type if not provided or UNKNOWN.
         """
-        if self.file_name is None and self.path:
+        if not isinstance(self.uri, str) or not self.uri:
+            raise TypeError(f"ContextFile uri must be a non-empty string, got {type(self.uri)}")
+        if self.file_name is None:
             try:
-                self.file_name = os.path.basename(self.path)
+                # Use urlparse to correctly handle both URLs and local paths
+                parsed_path = urlparse(self.uri).path
+                self.file_name = os.path.basename(parsed_path)
             except Exception as e:
-                logger.warning(f"Could not determine basename for path '{self.path}': {e}")
+                logger.warning(f"Could not determine basename for uri '{self.uri}': {e}")
                 self.file_name = "unknown_file"
-        if self.file_type == ContextFileType.UNKNOWN and self.path:
-            inferred_type = ContextFileType.from_path(self.path)
+        if self.file_type == ContextFileType.UNKNOWN:
+            inferred_type = ContextFileType.from_path(self.uri)
             if inferred_type != ContextFileType.UNKNOWN:
                 self.file_type = inferred_type
-                logger.debug(f"Inferred file type for '{self.path}' as {self.file_type.value}")
+                logger.debug(f"Inferred file type for '{self.uri}' as {self.file_type.value}")
             else:
-                logger.debug(f"Could not infer specific file type for '{self.path}', remaining UNKNOWN.")
-        # Ensure path is a string
-        if not isinstance(self.path, str):
-            # This ideally should be caught by type hints earlier, but as a runtime safeguard:
-            raise TypeError(f"ContextFile path must be a string, got {type(self.path)}")
+                logger.debug(f"Could not infer specific file type for '{self.uri}', remaining UNKNOWN.")
         if logger.isEnabledFor(logging.DEBUG):
-            logger.debug(f"ContextFile initialized: path='{self.path}', type='{self.file_type.value}', name='{self.file_name}'")
+            logger.debug(f"ContextFile initialized: uri='{self.uri}', type='{self.file_type.value}', name='{self.file_name}'")
     def to_dict(self) -> Dict[str, Any]:
         """Serializes the ContextFile to a dictionary."""
         return {
-            "path": self.path,
+            "uri": self.uri,
             "file_type": self.file_type.value, # Serialize enum to its value
             "file_name": self.file_name,
             "metadata": self.metadata,
@@ -60,23 +60,23 @@ class ContextFile:
     @classmethod
     def from_dict(cls, data: Dict[str, Any]) -> 'ContextFile':
         """Deserializes a ContextFile from a dictionary."""
-        if not isinstance(data.get("path"), str):
-            raise ValueError("ContextFile 'path' in dictionary must be a string.")
+        if not isinstance(data.get("uri"), str):
+            raise ValueError("ContextFile 'uri' in dictionary must be a string.")
         file_type_str = data.get("file_type", ContextFileType.UNKNOWN.value)
         try:
             file_type = ContextFileType(file_type_str)
         except ValueError:
             logger.warning(f"Invalid file_type string '{file_type_str}' in ContextFile data. Defaulting to UNKNOWN.")
             file_type = ContextFileType.UNKNOWN
         return cls(
-            path=data["path"],
+            uri=data["uri"],
             file_type=file_type,
             file_name=data.get("file_name"),
             metadata=data.get("metadata", {})
         )
     def __repr__(self) -> str:
-        return (f"ContextFile(path='{self.path}', file_name='{self.file_name}', "
+        return (f"ContextFile(uri='{self.uri}', file_name='{self.file_name}', "
                 f"file_type='{self.file_type.value}', metadata_keys={list(self.metadata.keys())})")

autobyteus/agent/message/context_file_type.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from enum import Enum
 import os
+from urllib.parse import urlparse
 class ContextFileType(str, Enum):
     """
@@ -23,19 +24,25 @@ class ContextFileType(str, Enum):
     UNKNOWN = "unknown"    # Fallback for unrecognized types
     @classmethod
-    def from_path(cls, file_path: str) -> 'ContextFileType':
+    def from_path(cls, uri: str) -> 'ContextFileType':
         """
-        Infers the ContextFileType from a file path based on its extension.
+        Infers the ContextFileType from a file path or URL based on its extension.
         """
-        if not file_path or not isinstance(file_path, str):
+        if not uri or not isinstance(uri, str):
             return cls.UNKNOWN
-        _, extension = os.path.splitext(file_path.lower())
+        try:
+            # Parse the URI to handle both file paths and URLs gracefully
+            parsed_path = urlparse(uri).path
+            _, extension = os.path.splitext(parsed_path.lower())
+        except Exception:
+            # Fallback for malformed URIs
+            _, extension = os.path.splitext(uri.lower())
         if extension == ".txt":
             return cls.TEXT
         elif extension == ".md":
-            return cls.MARKDOWN
+            return cls.MARKDOWN
         elif extension == ".pdf":
             return cls.PDF
         elif extension == ".docx":
@@ -61,9 +68,23 @@ class ContextFileType(str, Enum):
         elif extension in [".mp4", ".mov", ".avi", ".mkv", ".webm"]:
             return cls.VIDEO
         elif extension in [".png", ".jpg", ".jpeg", ".gif", ".webp"]:
-            return cls.IMAGE
+            return cls.IMAGE
         else:
             return cls.UNKNOWN
+    @classmethod
+    def get_readable_text_types(cls) -> list['ContextFileType']:
+        """Returns a list of file types that can be read as plain text for context."""
+        return [
+            cls.TEXT,
+            cls.MARKDOWN,
+            cls.JSON,
+            cls.XML,
+            cls.HTML,
+            cls.PYTHON,
+            cls.JAVASCRIPT,
+            cls.CSV,
+        ]
     def __str__(self) -> str:
         return self.value

autobyteus/agent/message/multimodal_message_builder.py ADDED Viewed

@@ -0,0 +1,47 @@
+# file: autobyteus/autobyteus/agent/message/multimodal_message_builder.py
+import logging
+from autobyteus.agent.message.agent_input_user_message import AgentInputUserMessage
+from autobyteus.agent.message.context_file_type import ContextFileType
+from autobyteus.llm.user_message import LLMUserMessage
+logger = logging.getLogger(__name__)
+def build_llm_user_message(agent_input_user_message: AgentInputUserMessage) -> LLMUserMessage:
+    """
+    Builds an LLMUserMessage from an AgentInputUserMessage by categorizing its context files.
+    This function iterates through the context files, sorting URIs for images, audio, and video
+    into the appropriate fields of the LLMUserMessage. It ignores other file types for now.
+    Args:
+        agent_input_user_message: The user input message containing content and context files.
+    Returns:
+        An LLMUserMessage ready to be sent to the LLM.
+    """
+    image_urls = []
+    audio_urls = []
+    video_urls = []
+    if agent_input_user_message.context_files:
+        for context_file in agent_input_user_message.context_files:
+            file_type = context_file.file_type
+            if file_type == ContextFileType.IMAGE:
+                image_urls.append(context_file.uri)
+            elif file_type == ContextFileType.AUDIO:
+                audio_urls.append(context_file.uri)
+            elif file_type == ContextFileType.VIDEO:
+                video_urls.append(context_file.uri)
+            else:
+                logger.debug(f"Ignoring non-media context file of type '{file_type.value}' during LLM message build: {context_file.uri}")
+    llm_user_message = LLMUserMessage(
+        content=agent_input_user_message.content,
+        image_urls=image_urls if image_urls else None,
+        audio_urls=audio_urls if audio_urls else None,
+        video_urls=video_urls if video_urls else None
+    )
+    logger.info(f"Built LLMUserMessage with {len(image_urls)} images, {len(audio_urls)} audio, {len(video_urls)} video files.")
+    return llm_user_message

autobyteus/agent/streaming/stream_event_payloads.py CHANGED Viewed

@@ -20,12 +20,18 @@ class AssistantChunkData(BaseStreamPayload):
     reasoning: Optional[str] = None
     is_complete: bool
     usage: Optional[TokenUsage] = None
+    image_urls: Optional[List[str]] = None
+    audio_urls: Optional[List[str]] = None
+    video_urls: Optional[List[str]] = None
 class AssistantCompleteResponseData(BaseStreamPayload):
     content: str
     reasoning: Optional[str] = None
     usage: Optional[TokenUsage] = None
+    image_urls: Optional[List[str]] = None
+    audio_urls: Optional[List[str]] = None
+    video_urls: Optional[List[str]] = None
 class ToolInteractionLogEntryData(BaseStreamPayload):
     log_entry: str
@@ -102,14 +108,20 @@ def create_assistant_chunk_data(chunk_obj: Any) -> AssistantChunkData:
             content=str(getattr(chunk_obj, 'content', '')),
             reasoning=getattr(chunk_obj, 'reasoning', None),
             is_complete=bool(getattr(chunk_obj, 'is_complete', False)),
-            usage=parsed_usage
+            usage=parsed_usage,
+            image_urls=getattr(chunk_obj, 'image_urls', None),
+            audio_urls=getattr(chunk_obj, 'audio_urls', None),
+            video_urls=getattr(chunk_obj, 'video_urls', None)
         )
     elif isinstance(chunk_obj, dict):
          return AssistantChunkData(
             content=str(chunk_obj.get('content', '')),
             reasoning=chunk_obj.get('reasoning', None),
             is_complete=bool(chunk_obj.get('is_complete', False)),
-            usage=parsed_usage
+            usage=parsed_usage,
+            image_urls=chunk_obj.get('image_urls', None),
+            audio_urls=chunk_obj.get('audio_urls', None),
+            video_urls=chunk_obj.get('video_urls', None)
         )
     raise ValueError(f"Cannot create AssistantChunkData from {type(chunk_obj)}")
@@ -136,13 +148,19 @@ def create_assistant_complete_response_data(complete_resp_obj: Any) -> Assistant
         return AssistantCompleteResponseData(
             content=str(getattr(complete_resp_obj, 'content', '')),
             reasoning=getattr(complete_resp_obj, 'reasoning', None),
-            usage=parsed_usage
+            usage=parsed_usage,
+            image_urls=getattr(complete_resp_obj, 'image_urls', None),
+            audio_urls=getattr(complete_resp_obj, 'audio_urls', None),
+            video_urls=getattr(complete_resp_obj, 'video_urls', None)
         )
     elif isinstance(complete_resp_obj, dict):
         return AssistantCompleteResponseData(
             content=str(complete_resp_obj.get('content', '')),
             reasoning=complete_resp_obj.get('reasoning', None),
-            usage=parsed_usage
+            usage=parsed_usage,
+            image_urls=complete_resp_obj.get('image_urls', None),
+            audio_urls=complete_resp_obj.get('audio_urls', None),
+            video_urls=complete_resp_obj.get('video_urls', None)
         )
     raise ValueError(f"Cannot create AssistantCompleteResponseData from {type(complete_resp_obj)}")
@@ -177,3 +195,4 @@ def create_system_task_notification_data(notification_data_dict: Any) -> SystemT
     if isinstance(notification_data_dict, dict):
         return SystemTaskNotificationData(**notification_data_dict)
     raise ValueError(f"Cannot create SystemTaskNotificationData from {type(notification_data_dict)}")

autobyteus/agent/system_prompt_processor/tool_manifest_injector_processor.py CHANGED Viewed

@@ -47,6 +47,9 @@ class ToolManifestInjectorProcessor(BaseSystemPromptProcessor):
         llm_provider = None
         if context.llm_instance and context.llm_instance.model:
             llm_provider = context.llm_instance.model.provider
+        # Retrieve the override flag from the agent's configuration.
+        use_xml_tool_format = context.config.use_xml_tool_format
         # Generate the manifest string for the 'tools' variable.
         tools_manifest: str
@@ -59,10 +62,11 @@ class ToolManifestInjectorProcessor(BaseSystemPromptProcessor):
             ]
             try:
-                # Delegate manifest generation to the provider, which now handles all format logic.
+                # Delegate manifest generation to the provider, passing the override flag.
                 tools_manifest = self._manifest_provider.provide(
                     tool_definitions=tool_definitions,
-                    provider=llm_provider
+                    provider=llm_provider,
+                    use_xml_tool_format=use_xml_tool_format
                 )
             except Exception as e:
                 logger.exception(f"An unexpected error occurred during tool manifest generation for agent '{agent_id}': {e}")

autobyteus/agent/tool_invocation.py CHANGED Viewed

@@ -33,7 +33,8 @@ class ToolInvocation:
         """
         # Create a canonical representation of the arguments
         # sort_keys=True ensures that the order of keys doesn't change the hash
-        canonical_args = json.dumps(arguments, sort_keys=True, separators=(',', ':'))
+        # ensure_ascii=False is critical for cross-language compatibility with JS
+        canonical_args = json.dumps(arguments, sort_keys=True, separators=(',', ':'), ensure_ascii=False)
         # Create a string to hash
         hash_string = f"{name}:{canonical_args}"

autobyteus 1.1.5__py3-none-any.whl → 1.1.6__py3-none-any.whl

autobyteus 1.1.5py3-none-any.whl → 1.1.6py3-none-any.whl