autobyteus 1.1.5__py3-none-any.whl → 1.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. autobyteus/agent/context/agent_config.py +6 -1
  2. autobyteus/agent/context/agent_runtime_state.py +7 -1
  3. autobyteus/agent/handlers/llm_user_message_ready_event_handler.py +30 -7
  4. autobyteus/agent/handlers/tool_result_event_handler.py +100 -88
  5. autobyteus/agent/handlers/user_input_message_event_handler.py +22 -25
  6. autobyteus/agent/llm_response_processor/provider_aware_tool_usage_processor.py +7 -1
  7. autobyteus/agent/message/__init__.py +7 -5
  8. autobyteus/agent/message/agent_input_user_message.py +6 -16
  9. autobyteus/agent/message/context_file.py +24 -24
  10. autobyteus/agent/message/context_file_type.py +29 -8
  11. autobyteus/agent/message/multimodal_message_builder.py +47 -0
  12. autobyteus/agent/streaming/stream_event_payloads.py +23 -4
  13. autobyteus/agent/system_prompt_processor/tool_manifest_injector_processor.py +6 -2
  14. autobyteus/agent/tool_invocation.py +27 -2
  15. autobyteus/agent_team/agent_team_builder.py +22 -1
  16. autobyteus/agent_team/bootstrap_steps/agent_configuration_preparation_step.py +9 -2
  17. autobyteus/agent_team/context/agent_team_config.py +1 -0
  18. autobyteus/agent_team/context/agent_team_runtime_state.py +0 -2
  19. autobyteus/llm/api/autobyteus_llm.py +33 -33
  20. autobyteus/llm/api/bedrock_llm.py +13 -5
  21. autobyteus/llm/api/claude_llm.py +13 -27
  22. autobyteus/llm/api/gemini_llm.py +108 -42
  23. autobyteus/llm/api/groq_llm.py +4 -3
  24. autobyteus/llm/api/mistral_llm.py +97 -51
  25. autobyteus/llm/api/nvidia_llm.py +6 -5
  26. autobyteus/llm/api/ollama_llm.py +37 -12
  27. autobyteus/llm/api/openai_compatible_llm.py +91 -91
  28. autobyteus/llm/autobyteus_provider.py +1 -1
  29. autobyteus/llm/base_llm.py +42 -139
  30. autobyteus/llm/extensions/base_extension.py +6 -6
  31. autobyteus/llm/extensions/token_usage_tracking_extension.py +3 -2
  32. autobyteus/llm/llm_factory.py +131 -61
  33. autobyteus/llm/ollama_provider_resolver.py +1 -0
  34. autobyteus/llm/providers.py +1 -0
  35. autobyteus/llm/token_counter/token_counter_factory.py +3 -1
  36. autobyteus/llm/user_message.py +43 -35
  37. autobyteus/llm/utils/llm_config.py +34 -18
  38. autobyteus/llm/utils/media_payload_formatter.py +99 -0
  39. autobyteus/llm/utils/messages.py +32 -25
  40. autobyteus/llm/utils/response_types.py +9 -3
  41. autobyteus/llm/utils/token_usage.py +6 -5
  42. autobyteus/multimedia/__init__.py +31 -0
  43. autobyteus/multimedia/audio/__init__.py +11 -0
  44. autobyteus/multimedia/audio/api/__init__.py +4 -0
  45. autobyteus/multimedia/audio/api/autobyteus_audio_client.py +59 -0
  46. autobyteus/multimedia/audio/api/gemini_audio_client.py +219 -0
  47. autobyteus/multimedia/audio/audio_client_factory.py +120 -0
  48. autobyteus/multimedia/audio/audio_model.py +97 -0
  49. autobyteus/multimedia/audio/autobyteus_audio_provider.py +108 -0
  50. autobyteus/multimedia/audio/base_audio_client.py +40 -0
  51. autobyteus/multimedia/image/__init__.py +11 -0
  52. autobyteus/multimedia/image/api/__init__.py +9 -0
  53. autobyteus/multimedia/image/api/autobyteus_image_client.py +97 -0
  54. autobyteus/multimedia/image/api/gemini_image_client.py +188 -0
  55. autobyteus/multimedia/image/api/openai_image_client.py +142 -0
  56. autobyteus/multimedia/image/autobyteus_image_provider.py +109 -0
  57. autobyteus/multimedia/image/base_image_client.py +67 -0
  58. autobyteus/multimedia/image/image_client_factory.py +118 -0
  59. autobyteus/multimedia/image/image_model.py +97 -0
  60. autobyteus/multimedia/providers.py +5 -0
  61. autobyteus/multimedia/runtimes.py +8 -0
  62. autobyteus/multimedia/utils/__init__.py +10 -0
  63. autobyteus/multimedia/utils/api_utils.py +19 -0
  64. autobyteus/multimedia/utils/multimedia_config.py +29 -0
  65. autobyteus/multimedia/utils/response_types.py +13 -0
  66. autobyteus/task_management/tools/publish_task_plan.py +4 -16
  67. autobyteus/task_management/tools/update_task_status.py +4 -19
  68. autobyteus/tools/__init__.py +5 -4
  69. autobyteus/tools/base_tool.py +98 -29
  70. autobyteus/tools/browser/standalone/__init__.py +0 -1
  71. autobyteus/tools/google_search.py +149 -0
  72. autobyteus/tools/mcp/schema_mapper.py +29 -71
  73. autobyteus/tools/multimedia/__init__.py +8 -0
  74. autobyteus/tools/multimedia/audio_tools.py +116 -0
  75. autobyteus/tools/multimedia/image_tools.py +186 -0
  76. autobyteus/tools/parameter_schema.py +82 -89
  77. autobyteus/tools/pydantic_schema_converter.py +81 -0
  78. autobyteus/tools/tool_category.py +1 -0
  79. autobyteus/tools/usage/formatters/default_json_example_formatter.py +89 -20
  80. autobyteus/tools/usage/formatters/default_xml_example_formatter.py +115 -41
  81. autobyteus/tools/usage/formatters/default_xml_schema_formatter.py +50 -20
  82. autobyteus/tools/usage/formatters/gemini_json_example_formatter.py +55 -22
  83. autobyteus/tools/usage/formatters/google_json_example_formatter.py +54 -21
  84. autobyteus/tools/usage/formatters/openai_json_example_formatter.py +53 -23
  85. autobyteus/tools/usage/parsers/default_xml_tool_usage_parser.py +270 -94
  86. autobyteus/tools/usage/parsers/provider_aware_tool_usage_parser.py +5 -2
  87. autobyteus/tools/usage/providers/tool_manifest_provider.py +43 -16
  88. autobyteus/tools/usage/registries/tool_formatting_registry.py +9 -2
  89. autobyteus/tools/usage/registries/tool_usage_parser_registry.py +9 -2
  90. autobyteus-1.1.7.dist-info/METADATA +204 -0
  91. {autobyteus-1.1.5.dist-info → autobyteus-1.1.7.dist-info}/RECORD +98 -71
  92. examples/run_browser_agent.py +1 -1
  93. examples/run_google_slides_agent.py +2 -2
  94. examples/run_mcp_google_slides_client.py +1 -1
  95. examples/run_sqlite_agent.py +1 -1
  96. autobyteus/llm/utils/image_payload_formatter.py +0 -89
  97. autobyteus/tools/ask_user_input.py +0 -40
  98. autobyteus/tools/browser/standalone/factory/google_search_factory.py +0 -25
  99. autobyteus/tools/browser/standalone/google_search_ui.py +0 -126
  100. autobyteus-1.1.5.dist-info/METADATA +0 -161
  101. {autobyteus-1.1.5.dist-info → autobyteus-1.1.7.dist-info}/WHEEL +0 -0
  102. {autobyteus-1.1.5.dist-info → autobyteus-1.1.7.dist-info}/licenses/LICENSE +0 -0
  103. {autobyteus-1.1.5.dist-info → autobyteus-1.1.7.dist-info}/top_level.txt +0 -0
@@ -37,6 +37,7 @@ class AgentConfig:
37
37
  system_prompt: Optional[str] = None,
38
38
  tools: Optional[List['BaseTool']] = None,
39
39
  auto_execute_tools: bool = True,
40
+ use_xml_tool_format: bool = False,
40
41
  input_processors: Optional[List['BaseAgentUserInputMessageProcessor']] = None,
41
42
  llm_response_processors: Optional[List['BaseLLMResponseProcessor']] = None,
42
43
  system_prompt_processors: Optional[List['BaseSystemPromptProcessor']] = None,
@@ -57,6 +58,8 @@ class AgentConfig:
57
58
  llm_instance's config will be used as the base.
58
59
  tools: An optional list of pre-initialized tool instances (subclasses of BaseTool).
59
60
  auto_execute_tools: If True, the agent will execute tools without approval.
61
+ use_xml_tool_format: If True, forces the agent to use XML format for tool
62
+ definitions and parsing, overriding provider defaults.
60
63
  input_processors: A list of input processor instances.
61
64
  llm_response_processors: A list of LLM response processor instances.
62
65
  system_prompt_processors: A list of system prompt processor instances.
@@ -74,6 +77,7 @@ class AgentConfig:
74
77
  self.tools = tools or []
75
78
  self.workspace = workspace
76
79
  self.auto_execute_tools = auto_execute_tools
80
+ self.use_xml_tool_format = use_xml_tool_format
77
81
  self.input_processors = input_processors or []
78
82
  self.llm_response_processors = llm_response_processors if llm_response_processors is not None else list(self.DEFAULT_LLM_RESPONSE_PROCESSORS)
79
83
  self.system_prompt_processors = system_prompt_processors if system_prompt_processors is not None else list(self.DEFAULT_SYSTEM_PROMPT_PROCESSORS)
@@ -81,7 +85,7 @@ class AgentConfig:
81
85
  self.phase_hooks = phase_hooks or []
82
86
  self.initial_custom_data = initial_custom_data
83
87
 
84
- logger.debug(f"AgentConfig created for name '{self.name}', role '{self.role}'.")
88
+ logger.debug(f"AgentConfig created for name '{self.name}', role '{self.role}'. XML tool format override: {self.use_xml_tool_format}")
85
89
 
86
90
  def copy(self) -> 'AgentConfig':
87
91
  """
@@ -98,6 +102,7 @@ class AgentConfig:
98
102
  system_prompt=self.system_prompt,
99
103
  tools=self.tools.copy(), # Shallow copy the list, but reference the original tool instances
100
104
  auto_execute_tools=self.auto_execute_tools,
105
+ use_xml_tool_format=self.use_xml_tool_format,
101
106
  input_processors=self.input_processors.copy(), # Shallow copy the list
102
107
  llm_response_processors=self.llm_response_processors.copy(), # Shallow copy the list
103
108
  system_prompt_processors=self.system_prompt_processors.copy(), # Shallow copy the list
@@ -16,6 +16,7 @@ from autobyteus.agent.tool_invocation import ToolInvocation
16
16
  if TYPE_CHECKING:
17
17
  from autobyteus.agent.phases import AgentPhaseManager
18
18
  from autobyteus.tools.base_tool import BaseTool
19
+ from autobyteus.agent.tool_invocation import ToolInvocationTurn
19
20
 
20
21
  logger = logging.getLogger(__name__)
21
22
 
@@ -48,6 +49,9 @@ class AgentRuntimeState:
48
49
  self.pending_tool_approvals: Dict[str, ToolInvocation] = {}
49
50
  self.custom_data: Dict[str, Any] = custom_data or {}
50
51
 
52
+ # NEW: State for multi-tool call invocation turns, with a very explicit name.
53
+ self.active_multi_tool_call_turn: Optional['ToolInvocationTurn'] = None
54
+
51
55
  self.processed_system_prompt: Optional[str] = None
52
56
  # self.final_llm_config_for_creation removed
53
57
 
@@ -83,7 +87,9 @@ class AgentRuntimeState:
83
87
  tools_status = f"{len(self.tool_instances)} Initialized" if self.tool_instances is not None else "Not Initialized"
84
88
  input_queues_status = "Initialized" if self.input_event_queues else "Not Initialized"
85
89
  # REMOVED output_queues_status from repr
90
+ active_turn_status = "Active" if self.active_multi_tool_call_turn else "Inactive"
86
91
  return (f"AgentRuntimeState(agent_id='{self.agent_id}', current_phase='{phase_repr}', "
87
92
  f"llm_status='{llm_status}', tools_status='{tools_status}', "
88
93
  f"input_queues_status='{input_queues_status}', "
89
- f"pending_approvals={len(self.pending_tool_approvals)}, history_len={len(self.conversation_history)})")
94
+ f"pending_approvals={len(self.pending_tool_approvals)}, history_len={len(self.conversation_history)}, "
95
+ f"multi_tool_call_turn='{active_turn_status}')")
@@ -1,7 +1,7 @@
1
1
  # file: autobyteus/autobyteus/agent/handlers/llm_user_message_ready_event_handler.py
2
2
  import logging
3
3
  import traceback
4
- from typing import TYPE_CHECKING, cast, Optional
4
+ from typing import TYPE_CHECKING, cast, Optional, List
5
5
 
6
6
  from autobyteus.agent.handlers.base_event_handler import AgentEventHandler
7
7
  from autobyteus.agent.events import LLMUserMessageReadyEvent, LLMCompleteResponseReceivedEvent
@@ -53,6 +53,9 @@ class LLMUserMessageReadyEventHandler(AgentEventHandler):
53
53
  complete_response_text = ""
54
54
  complete_reasoning_text = ""
55
55
  token_usage: Optional[TokenUsage] = None
56
+ complete_image_urls: List[str] = []
57
+ complete_audio_urls: List[str] = []
58
+ complete_video_urls: List[str] = []
56
59
 
57
60
  notifier: Optional['AgentExternalEventNotifier'] = None
58
61
  if context.phase_manager:
@@ -72,9 +75,19 @@ class LLMUserMessageReadyEventHandler(AgentEventHandler):
72
75
  if chunk_response.reasoning:
73
76
  complete_reasoning_text += chunk_response.reasoning
74
77
 
75
- if chunk_response.is_complete and chunk_response.usage:
76
- token_usage = chunk_response.usage
77
- logger.debug(f"Agent '{agent_id}' received final chunk with token usage: {token_usage}")
78
+ if chunk_response.is_complete:
79
+ if chunk_response.usage:
80
+ token_usage = chunk_response.usage
81
+ logger.debug(f"Agent '{agent_id}' received final chunk with token usage: {token_usage}")
82
+ if chunk_response.image_urls:
83
+ complete_image_urls.extend(chunk_response.image_urls)
84
+ logger.debug(f"Agent '{agent_id}' received final chunk with {len(chunk_response.image_urls)} image URLs.")
85
+ if chunk_response.audio_urls:
86
+ complete_audio_urls.extend(chunk_response.audio_urls)
87
+ logger.debug(f"Agent '{agent_id}' received final chunk with {len(chunk_response.audio_urls)} audio URLs.")
88
+ if chunk_response.video_urls:
89
+ complete_video_urls.extend(chunk_response.video_urls)
90
+ logger.debug(f"Agent '{agent_id}' received final chunk with {len(chunk_response.video_urls)} video URLs.")
78
91
 
79
92
  if notifier:
80
93
  try:
@@ -121,20 +134,30 @@ class LLMUserMessageReadyEventHandler(AgentEventHandler):
121
134
  logger.info(f"Agent '{agent_id}' enqueued LLMCompleteResponseReceivedEvent with error details from LLMUserMessageReadyEventHandler.")
122
135
  return
123
136
 
124
- # Add message to history with reasoning
137
+ # Add message to history with reasoning and multimodal data
125
138
  history_entry = {"role": "assistant", "content": complete_response_text}
126
139
  if complete_reasoning_text:
127
140
  history_entry["reasoning"] = complete_reasoning_text
141
+ if complete_image_urls:
142
+ history_entry["image_urls"] = complete_image_urls
143
+ if complete_audio_urls:
144
+ history_entry["audio_urls"] = complete_audio_urls
145
+ if complete_video_urls:
146
+ history_entry["video_urls"] = complete_video_urls
128
147
  context.state.add_message_to_history(history_entry)
129
148
 
130
- # Create complete response with reasoning
149
+ # Create complete response with reasoning and multimodal data
131
150
  complete_response_obj = CompleteResponse(
132
151
  content=complete_response_text,
133
152
  reasoning=complete_reasoning_text,
134
- usage=token_usage
153
+ usage=token_usage,
154
+ image_urls=complete_image_urls,
155
+ audio_urls=complete_audio_urls,
156
+ video_urls=complete_video_urls
135
157
  )
136
158
  llm_complete_event = LLMCompleteResponseReceivedEvent(
137
159
  complete_response=complete_response_obj
138
160
  )
139
161
  await context.input_event_queues.enqueue_internal_system_event(llm_complete_event)
140
162
  logger.info(f"Agent '{agent_id}' enqueued LLMCompleteResponseReceivedEvent from LLMUserMessageReadyEventHandler.")
163
+
@@ -1,7 +1,7 @@
1
1
  # file: autobyteus/autobyteus/agent/handlers/tool_result_event_handler.py
2
2
  import logging
3
3
  import json
4
- from typing import TYPE_CHECKING, Optional
4
+ from typing import TYPE_CHECKING, Optional, List
5
5
 
6
6
  from autobyteus.agent.handlers.base_event_handler import AgentEventHandler
7
7
  from autobyteus.agent.events import ToolResultEvent, LLMUserMessageReadyEvent
@@ -16,13 +16,60 @@ logger = logging.getLogger(__name__)
16
16
 
17
17
  class ToolResultEventHandler(AgentEventHandler):
18
18
  """
19
- Handles ToolResultEvents by formatting the tool's output (or error)
20
- as a new LLMUserMessage, emitting AGENT_DATA_TOOL_LOG event for this outcome,
21
- and enqueuing an LLMUserMessageReadyEvent for further LLM processing.
19
+ Handles ToolResultEvents. It immediately processes and notifies for each
20
+ individual tool result. If a multi-tool call turn is active, it accumulates
21
+ these processed results until the turn is complete, then sends a single
22
+ aggregated message to the LLM.
22
23
  """
23
24
  def __init__(self):
24
25
  logger.info("ToolResultEventHandler initialized.")
25
26
 
27
+ async def _dispatch_aggregated_results_to_llm(self,
28
+ processed_events: List[ToolResultEvent],
29
+ context: 'AgentContext'):
30
+ """
31
+ Aggregates a list of PRE-PROCESSED tool results into a single message and
32
+ dispatches it to the LLM.
33
+ """
34
+ agent_id = context.agent_id
35
+
36
+ # --- Aggregate results into a single message ---
37
+ aggregated_content_parts = []
38
+ for p_event in processed_events:
39
+ tool_invocation_id = p_event.tool_invocation_id if p_event.tool_invocation_id else 'N/A'
40
+ content_part: str
41
+ if p_event.error:
42
+ content_part = (
43
+ f"Tool: {p_event.tool_name} (ID: {tool_invocation_id})\n"
44
+ f"Status: Error\n"
45
+ f"Details: {p_event.error}"
46
+ )
47
+ else:
48
+ try:
49
+ result_str = json.dumps(p_event.result, indent=2) if not isinstance(p_event.result, str) else p_event.result
50
+ except TypeError: # pragma: no cover
51
+ result_str = str(p_event.result)
52
+ content_part = (
53
+ f"Tool: {p_event.tool_name} (ID: {tool_invocation_id})\n"
54
+ f"Status: Success\n"
55
+ f"Result:\n{result_str}"
56
+ )
57
+ aggregated_content_parts.append(content_part)
58
+
59
+ final_content_for_llm = (
60
+ "The following tool executions have completed. Please analyze their results and decide the next course of action.\n\n"
61
+ + "\n\n---\n\n".join(aggregated_content_parts)
62
+ )
63
+
64
+ logger.debug(f"Agent '{agent_id}' preparing aggregated message for LLM:\n---\n{final_content_for_llm}\n---")
65
+ llm_user_message = LLMUserMessage(content=final_content_for_llm)
66
+
67
+ next_event = LLMUserMessageReadyEvent(llm_user_message=llm_user_message)
68
+ await context.input_event_queues.enqueue_internal_system_event(next_event)
69
+
70
+ logger.info(f"Agent '{agent_id}' enqueued LLMUserMessageReadyEvent with aggregated results from {len(processed_events)} tool(s).")
71
+
72
+
26
73
  async def handle(self,
27
74
  event: ToolResultEvent,
28
75
  context: 'AgentContext') -> None:
@@ -31,99 +78,64 @@ class ToolResultEventHandler(AgentEventHandler):
31
78
  return
32
79
 
33
80
  agent_id = context.agent_id
34
- processed_event = event
81
+ notifier: Optional['AgentExternalEventNotifier'] = context.phase_manager.notifier if context.phase_manager else None
35
82
 
36
- # --- New: Apply Tool Execution Result Processors ---
83
+ # --- Step 1: Immediately process the incoming event ---
84
+ processed_event = event
37
85
  processor_instances = context.config.tool_execution_result_processors
38
86
  if processor_instances:
39
- processor_names = [p.get_name() for p in processor_instances]
40
- logger.debug(f"Agent '{agent_id}': Applying tool execution result processors: {processor_names}")
41
87
  for processor_instance in processor_instances:
42
- processor_name_for_log = "unknown"
88
+ if not isinstance(processor_instance, BaseToolExecutionResultProcessor):
89
+ logger.error(f"Agent '{agent_id}': Invalid tool result processor type: {type(processor_instance)}. Skipping.")
90
+ continue
43
91
  try:
44
- if not isinstance(processor_instance, BaseToolExecutionResultProcessor):
45
- logger.error(f"Agent '{agent_id}': Invalid tool result processor type: {type(processor_instance)}. Skipping.")
46
- continue
47
-
48
- processor_name_for_log = processor_instance.get_name()
49
- logger.debug(f"Agent '{agent_id}': Applying tool result processor '{processor_name_for_log}'.")
50
-
51
- event_before_proc = processed_event
52
- processed_event = await processor_instance.process(event_before_proc, context)
53
- logger.info(f"Agent '{agent_id}': Tool result processor '{processor_name_for_log}' applied successfully.")
54
-
92
+ processed_event = await processor_instance.process(processed_event, context)
55
93
  except Exception as e:
56
- logger.error(f"Agent '{agent_id}': Error applying tool result processor '{processor_name_for_log}': {e}. "
57
- f"Skipping and continuing with result from before this processor.", exc_info=True)
58
- processed_event = event_before_proc
59
- # --- End New ---
60
-
61
- tool_invocation_id = processed_event.tool_invocation_id if processed_event.tool_invocation_id else 'N/A'
62
-
63
- logger.info(f"Agent '{agent_id}' handling processed ToolResultEvent from tool: '{processed_event.tool_name}' (Invocation ID: {tool_invocation_id}). Error: {processed_event.error is not None}")
94
+ logger.error(f"Agent '{agent_id}': Error applying tool result processor '{processor_instance.get_name()}': {e}", exc_info=True)
64
95
 
65
- notifier: Optional['AgentExternalEventNotifier'] = None
66
- if context.phase_manager:
67
- notifier = context.phase_manager.notifier
68
-
69
- if not notifier: # pragma: no cover
70
- logger.error(f"Agent '{agent_id}': Notifier not available in ToolResultEventHandler. Tool result processing logs will not be emitted.")
71
-
72
- if processed_event.error:
73
- logger.debug(f"Agent '{agent_id}' tool '{processed_event.tool_name}' (ID: {tool_invocation_id}) raw error details: {processed_event.error}")
74
- else:
96
+ # --- Step 2: Immediately notify the result of this single tool call ---
97
+ tool_invocation_id = processed_event.tool_invocation_id if processed_event.tool_invocation_id else 'N/A'
98
+ if notifier:
99
+ log_message = ""
100
+ if processed_event.error:
101
+ log_message = f"[TOOL_RESULT_ERROR_PROCESSED] Agent_ID: {agent_id}, Tool: {processed_event.tool_name}, Invocation_ID: {tool_invocation_id}, Error: {processed_event.error}"
102
+ else:
103
+ log_message = f"[TOOL_RESULT_SUCCESS_PROCESSED] Agent_ID: {agent_id}, Tool: {processed_event.tool_name}, Invocation_ID: {tool_invocation_id}, Result: {str(processed_event.result)}"
104
+
75
105
  try:
76
- raw_result_str_for_debug_log = json.dumps(processed_event.result, indent=2)
77
- except TypeError: # pragma: no cover
78
- raw_result_str_for_debug_log = str(processed_event.result)
79
- logger.debug(f"Agent '{agent_id}' tool '{processed_event.tool_name}' (ID: {tool_invocation_id}) raw result:\n---\n{raw_result_str_for_debug_log}\n---")
106
+ log_data = {
107
+ "log_entry": log_message,
108
+ "tool_invocation_id": tool_invocation_id,
109
+ "tool_name": processed_event.tool_name,
110
+ }
111
+ notifier.notify_agent_data_tool_log(log_data)
112
+ logger.debug(f"Agent '{agent_id}': Notified individual tool result for '{processed_event.tool_name}'.")
113
+ except Exception as e_notify:
114
+ logger.error(f"Agent '{agent_id}': Error notifying tool result log: {e_notify}", exc_info=True)
80
115
 
116
+ # --- Step 3: Manage the multi-tool call turn state ---
117
+ active_turn = context.state.active_multi_tool_call_turn
81
118
 
82
- content_for_llm: str
83
- if processed_event.error:
84
- content_for_llm = (
85
- f"The tool '{processed_event.tool_name}' (invocation ID: {tool_invocation_id}) encountered an error.\n"
86
- f"Error details: {processed_event.error}\n"
87
- f"Please analyze this error and decide the next course of action."
88
- )
89
- log_msg_error_processed = f"[TOOL_RESULT_ERROR_PROCESSED] Agent_ID: {agent_id}, Tool: {processed_event.tool_name}, Invocation_ID: {tool_invocation_id}, Error: {processed_event.error}"
90
- if notifier:
91
- try:
92
- log_data = {
93
- "log_entry": log_msg_error_processed,
94
- "tool_invocation_id": tool_invocation_id,
95
- "tool_name": processed_event.tool_name,
96
- }
97
- notifier.notify_agent_data_tool_log(log_data)
98
- except Exception as e_notify:
99
- logger.error(f"Agent '{agent_id}': Error notifying tool result error log: {e_notify}", exc_info=True)
100
- else:
101
- try:
102
- result_str_for_llm = json.dumps(processed_event.result, indent=2) if not isinstance(processed_event.result, str) else processed_event.result
103
- except TypeError: # pragma: no cover
104
- result_str_for_llm = str(processed_event.result)
119
+ # Case 1: Not a multi-tool call turn, dispatch to LLM immediately.
120
+ if not active_turn:
121
+ logger.info(f"Agent '{agent_id}' handling single ToolResultEvent from tool: '{processed_event.tool_name}'.")
122
+ await self._dispatch_aggregated_results_to_llm([processed_event], context)
123
+ return
105
124
 
106
- content_for_llm = (
107
- f"The tool '{processed_event.tool_name}' (invocation ID: {tool_invocation_id}) has executed.\n"
108
- f"Result:\n{result_str_for_llm}\n"
109
- f"Based on this result, what is the next step or final answer?"
110
- )
111
- log_msg_success_processed = f"[TOOL_RESULT_SUCCESS_PROCESSED] Agent_ID: {agent_id}, Tool: {processed_event.tool_name}, Invocation_ID: {tool_invocation_id}, Result: {str(processed_event.result)}"
112
- if notifier:
113
- try:
114
- log_data = {
115
- "log_entry": log_msg_success_processed,
116
- "tool_invocation_id": tool_invocation_id,
117
- "tool_name": processed_event.tool_name,
118
- }
119
- notifier.notify_agent_data_tool_log(log_data)
120
- except Exception as e_notify:
121
- logger.error(f"Agent '{agent_id}': Error notifying tool result success log: {e_notify}", exc_info=True)
122
-
123
- logger.debug(f"Agent '{agent_id}' preparing message for LLM based on tool '{processed_event.tool_name}' (ID: {tool_invocation_id}) result:\n---\n{content_for_llm}\n---")
124
- llm_user_message = LLMUserMessage(content=content_for_llm)
125
-
126
- next_event = LLMUserMessageReadyEvent(llm_user_message=llm_user_message)
127
- await context.input_event_queues.enqueue_internal_system_event(next_event)
125
+ # Case 2: Multi-tool call turn is active, accumulate results.
126
+ active_turn.results.append(processed_event)
127
+ num_results = len(active_turn.results)
128
+ num_expected = len(active_turn.invocations)
129
+ logger.info(f"Agent '{agent_id}' handling ToolResultEvent for multi-tool call turn. "
130
+ f"Collected {num_results}/{num_expected} results.")
131
+
132
+ # If not all results are in, just wait for the next ToolResultEvent.
133
+ if not active_turn.is_complete():
134
+ return
135
+
136
+ # If all results are in, dispatch them to the LLM and clean up the turn state.
137
+ logger.info(f"Agent '{agent_id}': All tool results for the turn collected. Aggregating for LLM.")
138
+ await self._dispatch_aggregated_results_to_llm(active_turn.results, context)
128
139
 
129
- logger.info(f"Agent '{agent_id}' enqueued LLMUserMessageReadyEvent for LLM based on tool '{processed_event.tool_name}' (ID: {tool_invocation_id}) result summary.")
140
+ context.state.active_multi_tool_call_turn = None
141
+ logger.info(f"Agent '{agent_id}': Multi-tool call turn state has been cleared.")
@@ -3,14 +3,14 @@ import logging
3
3
  from typing import TYPE_CHECKING
4
4
 
5
5
  from autobyteus.agent.handlers.base_event_handler import AgentEventHandler
6
- from autobyteus.agent.events import UserMessageReceivedEvent, LLMUserMessageReadyEvent
7
- from autobyteus.agent.message.agent_input_user_message import AgentInputUserMessage
6
+ from autobyteus.agent.events import UserMessageReceivedEvent, LLMUserMessageReadyEvent
7
+ from autobyteus.agent.message.agent_input_user_message import AgentInputUserMessage
8
8
  from autobyteus.agent.input_processor import BaseAgentUserInputMessageProcessor
9
- from autobyteus.llm.user_message import LLMUserMessage
9
+ from autobyteus.agent.message.multimodal_message_builder import build_llm_user_message
10
10
 
11
11
 
12
12
  if TYPE_CHECKING:
13
- from autobyteus.agent.context import AgentContext
13
+ from autobyteus.agent.context import AgentContext
14
14
  from autobyteus.agent.events.notifiers import AgentExternalEventNotifier
15
15
 
16
16
  logger = logging.getLogger(__name__)
@@ -18,24 +18,23 @@ logger = logging.getLogger(__name__)
18
18
  class UserInputMessageEventHandler(AgentEventHandler):
19
19
  """
20
20
  Handles UserMessageReceivedEvents by first applying any configured
21
- AgentUserInputMessageProcessors (provided as instances) to the AgentInputUserMessage,
22
- then converting the processed message into an LLMUserMessage, and finally
21
+ AgentUserInputMessageProcessors, then using the multimodal_message_builder
22
+ to convert the processed message into an LLMUserMessage, and finally
23
23
  enqueuing an LLMUserMessageReadyEvent for further processing by the LLM.
24
- It also checks for metadata to emit special notifications for system-generated tasks.
25
24
  """
26
25
 
27
26
  def __init__(self):
28
27
  logger.info("UserInputMessageEventHandler initialized.")
29
28
 
30
29
  async def handle(self,
31
- event: UserMessageReceivedEvent,
30
+ event: UserMessageReceivedEvent,
32
31
  context: 'AgentContext') -> None:
33
- if not isinstance(event, UserMessageReceivedEvent):
32
+ if not isinstance(event, UserMessageReceivedEvent):
34
33
  logger.warning(f"UserInputMessageEventHandler received non-UserMessageReceivedEvent: {type(event)}. Skipping.")
35
34
  return
36
35
 
37
- original_agent_input_user_msg: AgentInputUserMessage = event.agent_input_user_message
38
-
36
+ original_agent_input_user_msg: AgentInputUserMessage = event.agent_input_user_message
37
+
39
38
  # --- NEW LOGIC: Check metadata for system-generated tasks and notify TUI ---
40
39
  if original_agent_input_user_msg.metadata.get('source') == 'system_task_notifier':
41
40
  if context.phase_manager:
@@ -47,11 +46,11 @@ class UserInputMessageEventHandler(AgentEventHandler):
47
46
  notifier.notify_agent_data_system_task_notification_received(notification_data)
48
47
  logger.info(f"Agent '{context.agent_id}' emitted system task notification for TUI.")
49
48
  # --- END NEW LOGIC ---
50
-
51
- processed_agent_input_user_msg: AgentInputUserMessage = original_agent_input_user_msg
52
-
53
- logger.info(f"Agent '{context.agent_id}' handling UserMessageReceivedEvent: '{original_agent_input_user_msg.content}'")
54
-
49
+
50
+ processed_agent_input_user_msg: AgentInputUserMessage = original_agent_input_user_msg
51
+
52
+ logger.info(f"Agent '{context.agent_id}' handling UserMessageReceivedEvent: '{original_agent_input_user_msg.content}'")
53
+
55
54
  processor_instances = context.config.input_processors
56
55
  if processor_instances:
57
56
  processor_names = [p.get_name() for p in processor_instances]
@@ -62,14 +61,14 @@ class UserInputMessageEventHandler(AgentEventHandler):
62
61
  if not isinstance(processor_instance, BaseAgentUserInputMessageProcessor):
63
62
  logger.error(f"Agent '{context.agent_id}': Invalid input processor type in config: {type(processor_instance)}. Skipping.")
64
63
  continue
65
-
64
+
66
65
  processor_name_for_log = processor_instance.get_name()
67
66
  logger.debug(f"Agent '{context.agent_id}': Applying input processor '{processor_name_for_log}'.")
68
67
  msg_before_this_processor = processed_agent_input_user_msg
69
68
  # Pass the original event to the processor
70
69
  processed_agent_input_user_msg = await processor_instance.process(
71
- message=msg_before_this_processor,
72
- context=context,
70
+ message=msg_before_this_processor,
71
+ context=context,
73
72
  triggering_event=event
74
73
  )
75
74
  logger.info(f"Agent '{context.agent_id}': Input processor '{processor_name_for_log}' applied successfully.")
@@ -81,12 +80,10 @@ class UserInputMessageEventHandler(AgentEventHandler):
81
80
  else:
82
81
  logger.debug(f"Agent '{context.agent_id}': No input processors configured in agent config.")
83
82
 
84
- llm_user_message = LLMUserMessage(
85
- content=processed_agent_input_user_msg.content,
86
- image_urls=processed_agent_input_user_msg.image_urls
87
- )
83
+ # --- Refactored: Use the dedicated builder ---
84
+ llm_user_message = build_llm_user_message(processed_agent_input_user_msg)
88
85
 
89
- llm_user_message_ready_event = LLMUserMessageReadyEvent(llm_user_message=llm_user_message)
86
+ llm_user_message_ready_event = LLMUserMessageReadyEvent(llm_user_message=llm_user_message)
90
87
  await context.input_event_queues.enqueue_internal_system_event(llm_user_message_ready_event)
91
-
88
+
92
89
  logger.info(f"Agent '{context.agent_id}' processed AgentInputUserMessage and enqueued LLMUserMessageReadyEvent.")
@@ -4,7 +4,7 @@ from typing import TYPE_CHECKING, List
4
4
 
5
5
  from .base_processor import BaseLLMResponseProcessor
6
6
  from autobyteus.agent.events import PendingToolInvocationEvent
7
- from autobyteus.agent.tool_invocation import ToolInvocation
7
+ from autobyteus.agent.tool_invocation import ToolInvocation, ToolInvocationTurn
8
8
  from autobyteus.tools.usage.parsers import ProviderAwareToolUsageParser
9
9
  from autobyteus.tools.usage.parsers.exceptions import ToolUsageParseException
10
10
 
@@ -72,6 +72,12 @@ class ProviderAwareToolUsageProcessor(BaseLLMResponseProcessor):
72
72
  processed_invocations.append(invocation)
73
73
 
74
74
  # --- END NEW LOGIC ---
75
+
76
+ # --- NEW: Initialize the multi-tool turn state ---
77
+ if len(processed_invocations) > 0:
78
+ logger.info(f"Agent '{context.agent_id}': Initializing multi-tool call turn with {len(processed_invocations)} invocations.")
79
+ context.state.active_multi_tool_call_turn = ToolInvocationTurn(invocations=processed_invocations)
80
+ # --- END NEW ---
75
81
 
76
82
  logger.info(f"Agent '{context.agent_id}': Parsed {len(processed_invocations)} tool invocations. Enqueuing events with unique IDs.")
77
83
  for invocation in processed_invocations:
@@ -9,12 +9,14 @@ from .agent_input_user_message import AgentInputUserMessage
9
9
  from .send_message_to import SendMessageTo
10
10
  from .context_file import ContextFile
11
11
  from .context_file_type import ContextFileType
12
+ from .multimodal_message_builder import build_llm_user_message
12
13
 
13
14
  __all__ = [
14
- "InterAgentMessage",
15
- "InterAgentMessageType",
16
- "AgentInputUserMessage",
15
+ "InterAgentMessage",
16
+ "InterAgentMessageType",
17
+ "AgentInputUserMessage",
17
18
  "SendMessageTo",
18
- "ContextFile",
19
- "ContextFileType",
19
+ "ContextFile",
20
+ "ContextFileType",
21
+ "build_llm_user_message",
20
22
  ]
@@ -8,21 +8,18 @@ from .context_file import ContextFile # Import the new ContextFile dataclass
8
8
  logger = logging.getLogger(__name__)
9
9
 
10
10
  @dataclass
11
- class AgentInputUserMessage:
11
+ class AgentInputUserMessage:
12
12
  """
13
13
  Represents a message received from an external user interacting with the agent system.
14
- This is a simple dataclass. It includes support for a list of ContextFile objects,
15
- allowing users to provide various documents as context.
14
+ This is a simple dataclass. It includes support for a list of ContextFile objects,
15
+ allowing users to provide various documents and media as context via a single list.
16
16
  """
17
17
  content: str
18
- image_urls: Optional[List[str]] = field(default=None) # Basic list of strings
19
18
  context_files: Optional[List[ContextFile]] = field(default=None)
20
19
  metadata: Dict[str, Any] = field(default_factory=dict)
21
20
 
22
21
  def __post_init__(self):
23
22
  # Basic type validation that dataclasses don't do automatically for mutable defaults or complex types
24
- if self.image_urls is not None and not (isinstance(self.image_urls, list) and all(isinstance(url, str) for url in self.image_urls)):
25
- raise TypeError("AgentInputUserMessage 'image_urls' must be a list of strings if provided.")
26
23
  if self.context_files is not None and not (isinstance(self.context_files, list) and all(isinstance(cf, ContextFile) for cf in self.context_files)):
27
24
  raise TypeError("AgentInputUserMessage 'context_files' must be a list of ContextFile objects if provided.")
28
25
  if not isinstance(self.metadata, dict): # Should be caught by default_factory, but good practice
@@ -34,7 +31,7 @@ class AgentInputUserMessage:
34
31
  num_context_files = len(self.context_files) if self.context_files else 0
35
32
  logger.debug(
36
33
  f"AgentInputUserMessage initialized. Content: '{self.content[:50]}...', "
37
- f"Image URLs: {self.image_urls}, Num ContextFiles: {num_context_files}, "
34
+ f"Num ContextFiles: {num_context_files}, "
38
35
  f"Metadata keys: {list(self.metadata.keys())}"
39
36
  )
40
37
 
@@ -47,7 +44,6 @@ class AgentInputUserMessage:
47
44
 
48
45
  return {
49
46
  "content": self.content,
50
- "image_urls": self.image_urls,
51
47
  "context_files": context_files_dict_list,
52
48
  "metadata": self.metadata,
53
49
  }
@@ -59,31 +55,25 @@ class AgentInputUserMessage:
59
55
  if not isinstance(content, str): # Ensure content is string
60
56
  raise ValueError("AgentInputUserMessage 'content' in dictionary must be a string.")
61
57
 
62
- image_urls = data.get("image_urls")
63
- if image_urls is not None and not (isinstance(image_urls, list) and all(isinstance(url, str) for url in image_urls)):
64
- raise ValueError("AgentInputUserMessage 'image_urls' in dictionary must be a list of strings if provided.")
65
-
66
58
  context_files_data = data.get("context_files")
67
59
  context_files_list: Optional[List[ContextFile]] = None
68
60
  if context_files_data is not None:
69
61
  if not isinstance(context_files_data, list):
70
62
  raise ValueError("AgentInputUserMessage 'context_files' in dictionary must be a list if provided.")
71
63
  context_files_list = [ContextFile.from_dict(cf_data) for cf_data in context_files_data]
72
-
64
+
73
65
  metadata = data.get("metadata", {})
74
66
  if not isinstance(metadata, dict):
75
67
  raise ValueError("AgentInputUserMessage 'metadata' in dictionary must be a dict if provided.")
76
68
 
77
69
  return cls(
78
70
  content=content,
79
- image_urls=image_urls,
80
71
  context_files=context_files_list,
81
72
  metadata=metadata
82
73
  )
83
74
 
84
75
  def __repr__(self) -> str:
85
76
  content_preview = f"{self.content[:100]}..." if len(self.content) > 100 else self.content
86
- images_repr = f", image_urls={self.image_urls}" if self.image_urls else ""
87
77
 
88
78
  if self.context_files:
89
79
  context_repr = f", context_files=[{len(self.context_files)} ContextFile(s)]"
@@ -93,4 +83,4 @@ class AgentInputUserMessage:
93
83
  meta_repr = f", metadata_keys={list(self.metadata.keys())}" if self.metadata else ""
94
84
 
95
85
  return (f"AgentInputUserMessage(content='{content_preview}'"
96
- f"{images_repr}{context_repr}{meta_repr})")
86
+ f"{context_repr}{meta_repr})")