autobyteus 1.1.5__py3-none-any.whl → 1.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. autobyteus/agent/context/agent_config.py +6 -1
  2. autobyteus/agent/context/agent_runtime_state.py +7 -1
  3. autobyteus/agent/handlers/llm_user_message_ready_event_handler.py +30 -7
  4. autobyteus/agent/handlers/tool_result_event_handler.py +100 -88
  5. autobyteus/agent/handlers/user_input_message_event_handler.py +22 -25
  6. autobyteus/agent/llm_response_processor/provider_aware_tool_usage_processor.py +7 -1
  7. autobyteus/agent/message/__init__.py +7 -5
  8. autobyteus/agent/message/agent_input_user_message.py +6 -16
  9. autobyteus/agent/message/context_file.py +24 -24
  10. autobyteus/agent/message/context_file_type.py +29 -8
  11. autobyteus/agent/message/multimodal_message_builder.py +47 -0
  12. autobyteus/agent/streaming/stream_event_payloads.py +23 -4
  13. autobyteus/agent/system_prompt_processor/tool_manifest_injector_processor.py +6 -2
  14. autobyteus/agent/tool_invocation.py +27 -2
  15. autobyteus/agent_team/agent_team_builder.py +22 -1
  16. autobyteus/agent_team/bootstrap_steps/agent_configuration_preparation_step.py +9 -2
  17. autobyteus/agent_team/context/agent_team_config.py +1 -0
  18. autobyteus/agent_team/context/agent_team_runtime_state.py +0 -2
  19. autobyteus/llm/api/autobyteus_llm.py +33 -33
  20. autobyteus/llm/api/bedrock_llm.py +13 -5
  21. autobyteus/llm/api/claude_llm.py +13 -27
  22. autobyteus/llm/api/gemini_llm.py +108 -42
  23. autobyteus/llm/api/groq_llm.py +4 -3
  24. autobyteus/llm/api/mistral_llm.py +97 -51
  25. autobyteus/llm/api/nvidia_llm.py +6 -5
  26. autobyteus/llm/api/ollama_llm.py +37 -12
  27. autobyteus/llm/api/openai_compatible_llm.py +91 -91
  28. autobyteus/llm/autobyteus_provider.py +1 -1
  29. autobyteus/llm/base_llm.py +42 -139
  30. autobyteus/llm/extensions/base_extension.py +6 -6
  31. autobyteus/llm/extensions/token_usage_tracking_extension.py +3 -2
  32. autobyteus/llm/llm_factory.py +131 -61
  33. autobyteus/llm/ollama_provider_resolver.py +1 -0
  34. autobyteus/llm/providers.py +1 -0
  35. autobyteus/llm/token_counter/token_counter_factory.py +3 -1
  36. autobyteus/llm/user_message.py +43 -35
  37. autobyteus/llm/utils/llm_config.py +34 -18
  38. autobyteus/llm/utils/media_payload_formatter.py +99 -0
  39. autobyteus/llm/utils/messages.py +32 -25
  40. autobyteus/llm/utils/response_types.py +9 -3
  41. autobyteus/llm/utils/token_usage.py +6 -5
  42. autobyteus/multimedia/__init__.py +31 -0
  43. autobyteus/multimedia/audio/__init__.py +11 -0
  44. autobyteus/multimedia/audio/api/__init__.py +4 -0
  45. autobyteus/multimedia/audio/api/autobyteus_audio_client.py +59 -0
  46. autobyteus/multimedia/audio/api/gemini_audio_client.py +219 -0
  47. autobyteus/multimedia/audio/audio_client_factory.py +120 -0
  48. autobyteus/multimedia/audio/audio_model.py +97 -0
  49. autobyteus/multimedia/audio/autobyteus_audio_provider.py +108 -0
  50. autobyteus/multimedia/audio/base_audio_client.py +40 -0
  51. autobyteus/multimedia/image/__init__.py +11 -0
  52. autobyteus/multimedia/image/api/__init__.py +9 -0
  53. autobyteus/multimedia/image/api/autobyteus_image_client.py +97 -0
  54. autobyteus/multimedia/image/api/gemini_image_client.py +188 -0
  55. autobyteus/multimedia/image/api/openai_image_client.py +142 -0
  56. autobyteus/multimedia/image/autobyteus_image_provider.py +109 -0
  57. autobyteus/multimedia/image/base_image_client.py +67 -0
  58. autobyteus/multimedia/image/image_client_factory.py +118 -0
  59. autobyteus/multimedia/image/image_model.py +97 -0
  60. autobyteus/multimedia/providers.py +5 -0
  61. autobyteus/multimedia/runtimes.py +8 -0
  62. autobyteus/multimedia/utils/__init__.py +10 -0
  63. autobyteus/multimedia/utils/api_utils.py +19 -0
  64. autobyteus/multimedia/utils/multimedia_config.py +29 -0
  65. autobyteus/multimedia/utils/response_types.py +13 -0
  66. autobyteus/task_management/tools/publish_task_plan.py +4 -16
  67. autobyteus/task_management/tools/update_task_status.py +4 -19
  68. autobyteus/tools/__init__.py +5 -4
  69. autobyteus/tools/base_tool.py +98 -29
  70. autobyteus/tools/browser/standalone/__init__.py +0 -1
  71. autobyteus/tools/google_search.py +149 -0
  72. autobyteus/tools/mcp/schema_mapper.py +29 -71
  73. autobyteus/tools/multimedia/__init__.py +8 -0
  74. autobyteus/tools/multimedia/audio_tools.py +116 -0
  75. autobyteus/tools/multimedia/image_tools.py +186 -0
  76. autobyteus/tools/parameter_schema.py +82 -89
  77. autobyteus/tools/pydantic_schema_converter.py +81 -0
  78. autobyteus/tools/tool_category.py +1 -0
  79. autobyteus/tools/usage/formatters/default_json_example_formatter.py +89 -20
  80. autobyteus/tools/usage/formatters/default_xml_example_formatter.py +115 -41
  81. autobyteus/tools/usage/formatters/default_xml_schema_formatter.py +50 -20
  82. autobyteus/tools/usage/formatters/gemini_json_example_formatter.py +55 -22
  83. autobyteus/tools/usage/formatters/google_json_example_formatter.py +54 -21
  84. autobyteus/tools/usage/formatters/openai_json_example_formatter.py +53 -23
  85. autobyteus/tools/usage/parsers/default_xml_tool_usage_parser.py +270 -94
  86. autobyteus/tools/usage/parsers/provider_aware_tool_usage_parser.py +5 -2
  87. autobyteus/tools/usage/providers/tool_manifest_provider.py +43 -16
  88. autobyteus/tools/usage/registries/tool_formatting_registry.py +9 -2
  89. autobyteus/tools/usage/registries/tool_usage_parser_registry.py +9 -2
  90. autobyteus-1.1.7.dist-info/METADATA +204 -0
  91. {autobyteus-1.1.5.dist-info → autobyteus-1.1.7.dist-info}/RECORD +98 -71
  92. examples/run_browser_agent.py +1 -1
  93. examples/run_google_slides_agent.py +2 -2
  94. examples/run_mcp_google_slides_client.py +1 -1
  95. examples/run_sqlite_agent.py +1 -1
  96. autobyteus/llm/utils/image_payload_formatter.py +0 -89
  97. autobyteus/tools/ask_user_input.py +0 -40
  98. autobyteus/tools/browser/standalone/factory/google_search_factory.py +0 -25
  99. autobyteus/tools/browser/standalone/google_search_ui.py +0 -126
  100. autobyteus-1.1.5.dist-info/METADATA +0 -161
  101. {autobyteus-1.1.5.dist-info → autobyteus-1.1.7.dist-info}/WHEEL +0 -0
  102. {autobyteus-1.1.5.dist-info → autobyteus-1.1.7.dist-info}/licenses/LICENSE +0 -0
  103. {autobyteus-1.1.5.dist-info → autobyteus-1.1.7.dist-info}/top_level.txt +0 -0
@@ -3,6 +3,7 @@ import os
3
3
  import logging
4
4
  from typing import Optional, Dict, Any
5
5
  from dataclasses import dataclass, field
6
+ from urllib.parse import urlparse
6
7
 
7
8
  from .context_file_type import ContextFileType
8
9
 
@@ -12,10 +13,9 @@ logger = logging.getLogger(__name__)
12
13
  class ContextFile:
13
14
  """
14
15
  Represents a single context file provided to an agent.
15
- This is a simple dataclass, deferring path validation and file access
16
- to input processors.
16
+ The 'uri' can be a local file path or a network URL.
17
17
  """
18
- path: str
18
+ uri: str
19
19
  file_type: ContextFileType = ContextFileType.UNKNOWN
20
20
  file_name: Optional[str] = None
21
21
  metadata: Dict[str, Any] = field(default_factory=dict)
@@ -25,33 +25,33 @@ class ContextFile:
25
25
  Called after the dataclass's __init__ method.
26
26
  Used here to infer file_name and file_type if not provided or UNKNOWN.
27
27
  """
28
- if self.file_name is None and self.path:
28
+ if not isinstance(self.uri, str) or not self.uri:
29
+ raise TypeError(f"ContextFile uri must be a non-empty string, got {type(self.uri)}")
30
+
31
+ if self.file_name is None:
29
32
  try:
30
- self.file_name = os.path.basename(self.path)
33
+ # Use urlparse to correctly handle both URLs and local paths
34
+ parsed_path = urlparse(self.uri).path
35
+ self.file_name = os.path.basename(parsed_path)
31
36
  except Exception as e:
32
- logger.warning(f"Could not determine basename for path '{self.path}': {e}")
37
+ logger.warning(f"Could not determine basename for uri '{self.uri}': {e}")
33
38
  self.file_name = "unknown_file"
34
39
 
35
- if self.file_type == ContextFileType.UNKNOWN and self.path:
36
- inferred_type = ContextFileType.from_path(self.path)
40
+ if self.file_type == ContextFileType.UNKNOWN:
41
+ inferred_type = ContextFileType.from_path(self.uri)
37
42
  if inferred_type != ContextFileType.UNKNOWN:
38
43
  self.file_type = inferred_type
39
- logger.debug(f"Inferred file type for '{self.path}' as {self.file_type.value}")
44
+ logger.debug(f"Inferred file type for '{self.uri}' as {self.file_type.value}")
40
45
  else:
41
- logger.debug(f"Could not infer specific file type for '{self.path}', remaining UNKNOWN.")
42
-
43
- # Ensure path is a string
44
- if not isinstance(self.path, str):
45
- # This ideally should be caught by type hints earlier, but as a runtime safeguard:
46
- raise TypeError(f"ContextFile path must be a string, got {type(self.path)}")
47
-
46
+ logger.debug(f"Could not infer specific file type for '{self.uri}', remaining UNKNOWN.")
47
+
48
48
  if logger.isEnabledFor(logging.DEBUG):
49
- logger.debug(f"ContextFile initialized: path='{self.path}', type='{self.file_type.value}', name='{self.file_name}'")
49
+ logger.debug(f"ContextFile initialized: uri='{self.uri}', type='{self.file_type.value}', name='{self.file_name}'")
50
50
 
51
51
  def to_dict(self) -> Dict[str, Any]:
52
52
  """Serializes the ContextFile to a dictionary."""
53
53
  return {
54
- "path": self.path,
54
+ "uri": self.uri,
55
55
  "file_type": self.file_type.value, # Serialize enum to its value
56
56
  "file_name": self.file_name,
57
57
  "metadata": self.metadata,
@@ -60,23 +60,23 @@ class ContextFile:
60
60
  @classmethod
61
61
  def from_dict(cls, data: Dict[str, Any]) -> 'ContextFile':
62
62
  """Deserializes a ContextFile from a dictionary."""
63
- if not isinstance(data.get("path"), str):
64
- raise ValueError("ContextFile 'path' in dictionary must be a string.")
65
-
63
+ if not isinstance(data.get("uri"), str):
64
+ raise ValueError("ContextFile 'uri' in dictionary must be a string.")
65
+
66
66
  file_type_str = data.get("file_type", ContextFileType.UNKNOWN.value)
67
67
  try:
68
68
  file_type = ContextFileType(file_type_str)
69
69
  except ValueError:
70
70
  logger.warning(f"Invalid file_type string '{file_type_str}' in ContextFile data. Defaulting to UNKNOWN.")
71
71
  file_type = ContextFileType.UNKNOWN
72
-
72
+
73
73
  return cls(
74
- path=data["path"],
74
+ uri=data["uri"],
75
75
  file_type=file_type,
76
76
  file_name=data.get("file_name"),
77
77
  metadata=data.get("metadata", {})
78
78
  )
79
79
 
80
80
  def __repr__(self) -> str:
81
- return (f"ContextFile(path='{self.path}', file_name='{self.file_name}', "
81
+ return (f"ContextFile(uri='{self.uri}', file_name='{self.file_name}', "
82
82
  f"file_type='{self.file_type.value}', metadata_keys={list(self.metadata.keys())})")
@@ -1,5 +1,6 @@
1
1
  from enum import Enum
2
2
  import os
3
+ from urllib.parse import urlparse
3
4
 
4
5
  class ContextFileType(str, Enum):
5
6
  """
@@ -23,19 +24,25 @@ class ContextFileType(str, Enum):
23
24
  UNKNOWN = "unknown" # Fallback for unrecognized types
24
25
 
25
26
  @classmethod
26
- def from_path(cls, file_path: str) -> 'ContextFileType':
27
+ def from_path(cls, uri: str) -> 'ContextFileType':
27
28
  """
28
- Infers the ContextFileType from a file path based on its extension.
29
+ Infers the ContextFileType from a file path or URL based on its extension.
29
30
  """
30
- if not file_path or not isinstance(file_path, str):
31
+ if not uri or not isinstance(uri, str):
31
32
  return cls.UNKNOWN
32
-
33
- _, extension = os.path.splitext(file_path.lower())
34
-
33
+
34
+ try:
35
+ # Parse the URI to handle both file paths and URLs gracefully
36
+ parsed_path = urlparse(uri).path
37
+ _, extension = os.path.splitext(parsed_path.lower())
38
+ except Exception:
39
+ # Fallback for malformed URIs
40
+ _, extension = os.path.splitext(uri.lower())
41
+
35
42
  if extension == ".txt":
36
43
  return cls.TEXT
37
44
  elif extension == ".md":
38
- return cls.MARKDOWN
45
+ return cls.MARKDOWN
39
46
  elif extension == ".pdf":
40
47
  return cls.PDF
41
48
  elif extension == ".docx":
@@ -61,9 +68,23 @@ class ContextFileType(str, Enum):
61
68
  elif extension in [".mp4", ".mov", ".avi", ".mkv", ".webm"]:
62
69
  return cls.VIDEO
63
70
  elif extension in [".png", ".jpg", ".jpeg", ".gif", ".webp"]:
64
- return cls.IMAGE
71
+ return cls.IMAGE
65
72
  else:
66
73
  return cls.UNKNOWN
67
74
 
75
+ @classmethod
76
+ def get_readable_text_types(cls) -> list['ContextFileType']:
77
+ """Returns a list of file types that can be read as plain text for context."""
78
+ return [
79
+ cls.TEXT,
80
+ cls.MARKDOWN,
81
+ cls.JSON,
82
+ cls.XML,
83
+ cls.HTML,
84
+ cls.PYTHON,
85
+ cls.JAVASCRIPT,
86
+ cls.CSV,
87
+ ]
88
+
68
89
  def __str__(self) -> str:
69
90
  return self.value
@@ -0,0 +1,47 @@
1
+ # file: autobyteus/autobyteus/agent/message/multimodal_message_builder.py
2
+ import logging
3
+
4
+ from autobyteus.agent.message.agent_input_user_message import AgentInputUserMessage
5
+ from autobyteus.agent.message.context_file_type import ContextFileType
6
+ from autobyteus.llm.user_message import LLMUserMessage
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ def build_llm_user_message(agent_input_user_message: AgentInputUserMessage) -> LLMUserMessage:
11
+ """
12
+ Builds an LLMUserMessage from an AgentInputUserMessage by categorizing its context files.
13
+
14
+ This function iterates through the context files, sorting URIs for images, audio, and video
15
+ into the appropriate fields of the LLMUserMessage. It ignores other file types for now.
16
+
17
+ Args:
18
+ agent_input_user_message: The user input message containing content and context files.
19
+
20
+ Returns:
21
+ An LLMUserMessage ready to be sent to the LLM.
22
+ """
23
+ image_urls = []
24
+ audio_urls = []
25
+ video_urls = []
26
+
27
+ if agent_input_user_message.context_files:
28
+ for context_file in agent_input_user_message.context_files:
29
+ file_type = context_file.file_type
30
+ if file_type == ContextFileType.IMAGE:
31
+ image_urls.append(context_file.uri)
32
+ elif file_type == ContextFileType.AUDIO:
33
+ audio_urls.append(context_file.uri)
34
+ elif file_type == ContextFileType.VIDEO:
35
+ video_urls.append(context_file.uri)
36
+ else:
37
+ logger.debug(f"Ignoring non-media context file of type '{file_type.value}' during LLM message build: {context_file.uri}")
38
+
39
+ llm_user_message = LLMUserMessage(
40
+ content=agent_input_user_message.content,
41
+ image_urls=image_urls if image_urls else None,
42
+ audio_urls=audio_urls if audio_urls else None,
43
+ video_urls=video_urls if video_urls else None
44
+ )
45
+
46
+ logger.info(f"Built LLMUserMessage with {len(image_urls)} images, {len(audio_urls)} audio, {len(video_urls)} video files.")
47
+ return llm_user_message
@@ -20,12 +20,18 @@ class AssistantChunkData(BaseStreamPayload):
20
20
  reasoning: Optional[str] = None
21
21
  is_complete: bool
22
22
  usage: Optional[TokenUsage] = None
23
+ image_urls: Optional[List[str]] = None
24
+ audio_urls: Optional[List[str]] = None
25
+ video_urls: Optional[List[str]] = None
23
26
 
24
27
 
25
28
  class AssistantCompleteResponseData(BaseStreamPayload):
26
29
  content: str
27
30
  reasoning: Optional[str] = None
28
31
  usage: Optional[TokenUsage] = None
32
+ image_urls: Optional[List[str]] = None
33
+ audio_urls: Optional[List[str]] = None
34
+ video_urls: Optional[List[str]] = None
29
35
 
30
36
  class ToolInteractionLogEntryData(BaseStreamPayload):
31
37
  log_entry: str
@@ -102,14 +108,20 @@ def create_assistant_chunk_data(chunk_obj: Any) -> AssistantChunkData:
102
108
  content=str(getattr(chunk_obj, 'content', '')),
103
109
  reasoning=getattr(chunk_obj, 'reasoning', None),
104
110
  is_complete=bool(getattr(chunk_obj, 'is_complete', False)),
105
- usage=parsed_usage
111
+ usage=parsed_usage,
112
+ image_urls=getattr(chunk_obj, 'image_urls', None),
113
+ audio_urls=getattr(chunk_obj, 'audio_urls', None),
114
+ video_urls=getattr(chunk_obj, 'video_urls', None)
106
115
  )
107
116
  elif isinstance(chunk_obj, dict):
108
117
  return AssistantChunkData(
109
118
  content=str(chunk_obj.get('content', '')),
110
119
  reasoning=chunk_obj.get('reasoning', None),
111
120
  is_complete=bool(chunk_obj.get('is_complete', False)),
112
- usage=parsed_usage
121
+ usage=parsed_usage,
122
+ image_urls=chunk_obj.get('image_urls', None),
123
+ audio_urls=chunk_obj.get('audio_urls', None),
124
+ video_urls=chunk_obj.get('video_urls', None)
113
125
  )
114
126
  raise ValueError(f"Cannot create AssistantChunkData from {type(chunk_obj)}")
115
127
 
@@ -136,13 +148,19 @@ def create_assistant_complete_response_data(complete_resp_obj: Any) -> Assistant
136
148
  return AssistantCompleteResponseData(
137
149
  content=str(getattr(complete_resp_obj, 'content', '')),
138
150
  reasoning=getattr(complete_resp_obj, 'reasoning', None),
139
- usage=parsed_usage
151
+ usage=parsed_usage,
152
+ image_urls=getattr(complete_resp_obj, 'image_urls', None),
153
+ audio_urls=getattr(complete_resp_obj, 'audio_urls', None),
154
+ video_urls=getattr(complete_resp_obj, 'video_urls', None)
140
155
  )
141
156
  elif isinstance(complete_resp_obj, dict):
142
157
  return AssistantCompleteResponseData(
143
158
  content=str(complete_resp_obj.get('content', '')),
144
159
  reasoning=complete_resp_obj.get('reasoning', None),
145
- usage=parsed_usage
160
+ usage=parsed_usage,
161
+ image_urls=complete_resp_obj.get('image_urls', None),
162
+ audio_urls=complete_resp_obj.get('audio_urls', None),
163
+ video_urls=complete_resp_obj.get('video_urls', None)
146
164
  )
147
165
  raise ValueError(f"Cannot create AssistantCompleteResponseData from {type(complete_resp_obj)}")
148
166
 
@@ -177,3 +195,4 @@ def create_system_task_notification_data(notification_data_dict: Any) -> SystemT
177
195
  if isinstance(notification_data_dict, dict):
178
196
  return SystemTaskNotificationData(**notification_data_dict)
179
197
  raise ValueError(f"Cannot create SystemTaskNotificationData from {type(notification_data_dict)}")
198
+
@@ -47,6 +47,9 @@ class ToolManifestInjectorProcessor(BaseSystemPromptProcessor):
47
47
  llm_provider = None
48
48
  if context.llm_instance and context.llm_instance.model:
49
49
  llm_provider = context.llm_instance.model.provider
50
+
51
+ # Retrieve the override flag from the agent's configuration.
52
+ use_xml_tool_format = context.config.use_xml_tool_format
50
53
 
51
54
  # Generate the manifest string for the 'tools' variable.
52
55
  tools_manifest: str
@@ -59,10 +62,11 @@ class ToolManifestInjectorProcessor(BaseSystemPromptProcessor):
59
62
  ]
60
63
 
61
64
  try:
62
- # Delegate manifest generation to the provider, which now handles all format logic.
65
+ # Delegate manifest generation to the provider, passing the override flag.
63
66
  tools_manifest = self._manifest_provider.provide(
64
67
  tool_definitions=tool_definitions,
65
- provider=llm_provider
68
+ provider=llm_provider,
69
+ use_xml_tool_format=use_xml_tool_format
66
70
  )
67
71
  except Exception as e:
68
72
  logger.exception(f"An unexpected error occurred during tool manifest generation for agent '{agent_id}': {e}")
@@ -2,7 +2,14 @@
2
2
  import uuid
3
3
  import hashlib
4
4
  import json
5
- from typing import Optional, Dict, Any
5
+ import logging
6
+ from typing import Optional, Dict, Any, List, TYPE_CHECKING
7
+ from dataclasses import dataclass, field
8
+
9
+ if TYPE_CHECKING:
10
+ from autobyteus.agent.events import ToolResultEvent
11
+
12
+ logger = logging.getLogger(__name__)
6
13
 
7
14
  class ToolInvocation:
8
15
  def __init__(self, name: Optional[str] = None, arguments: Optional[Dict[str, Any]] = None, id: Optional[str] = None):
@@ -33,11 +40,15 @@ class ToolInvocation:
33
40
  """
34
41
  # Create a canonical representation of the arguments
35
42
  # sort_keys=True ensures that the order of keys doesn't change the hash
36
- canonical_args = json.dumps(arguments, sort_keys=True, separators=(',', ':'))
43
+ # ensure_ascii=False is critical for cross-language compatibility with JS
44
+ canonical_args = json.dumps(arguments, sort_keys=True, separators=(',', ':'), ensure_ascii=False)
37
45
 
38
46
  # Create a string to hash
39
47
  hash_string = f"{name}:{canonical_args}"
40
48
 
49
+ # --- ADDED LOGGING ---
50
+ logger.debug(f"Generating tool invocation ID from hash_string: '{hash_string}'")
51
+
41
52
  # Use SHA256 for a robust hash
42
53
  sha256_hash = hashlib.sha256(hash_string.encode('utf-8')).hexdigest()
43
54
 
@@ -54,3 +65,17 @@ class ToolInvocation:
54
65
  def __repr__(self) -> str:
55
66
  return (f"ToolInvocation(id='{self.id}', name='{self.name}', "
56
67
  f"arguments={self.arguments})")
68
+
69
+
70
+ @dataclass
71
+ class ToolInvocationTurn:
72
+ """
73
+ A data class to encapsulate the state of a multi-tool invocation turn.
74
+ Its existence in the agent's state signifies that a multi-tool turn is active.
75
+ """
76
+ invocations: List[ToolInvocation]
77
+ results: List['ToolResultEvent'] = field(default_factory=list)
78
+
79
+ def is_complete(self) -> bool:
80
+ """Checks if all expected tool results have been collected."""
81
+ return len(self.results) >= len(self.invocations)
@@ -45,6 +45,7 @@ class AgentTeamBuilder:
45
45
  self._coordinator_config: Optional[AgentConfig] = None
46
46
  self._added_node_names: Set[str] = set()
47
47
  self._task_notification_mode: TaskNotificationMode = TaskNotificationMode.AGENT_MANUAL_NOTIFICATION
48
+ self._use_xml_tool_format: Optional[bool] = None
48
49
  logger.info(f"AgentTeamBuilder initialized for team: '{self._name}'.")
49
50
 
50
51
  def add_agent_node(self, agent_config: AgentConfig, dependencies: Optional[List[NodeDefinition]] = None) -> 'AgentTeamBuilder':
@@ -142,6 +143,25 @@ class AgentTeamBuilder:
142
143
  logger.debug(f"Task notification mode set to '{mode.value}'.")
143
144
  return self
144
145
 
146
+ def set_use_xml_tool_format(self, use_xml: bool) -> 'AgentTeamBuilder':
147
+ """
148
+ Sets the team-level override for using XML tool format.
149
+
150
+ If set, this will override the setting on all individual agents within the team.
151
+
152
+ Args:
153
+ use_xml: If True, forces the team to use XML format for tool
154
+ definitions and parsing.
155
+
156
+ Returns:
157
+ The builder instance for fluent chaining.
158
+ """
159
+ if not isinstance(use_xml, bool):
160
+ raise TypeError("use_xml must be a boolean.")
161
+ self._use_xml_tool_format = use_xml
162
+ logger.debug(f"Team-level XML tool format override set to '{use_xml}'.")
163
+ return self
164
+
145
165
  def build(self) -> AgentTeam:
146
166
  """
147
167
  Constructs and returns the final AgentTeam instance using the
@@ -175,7 +195,8 @@ class AgentTeamBuilder:
175
195
  role=self._role,
176
196
  nodes=tuple(final_nodes),
177
197
  coordinator_node=coordinator_node_instance,
178
- task_notification_mode=self._task_notification_mode
198
+ task_notification_mode=self._task_notification_mode,
199
+ use_xml_tool_format=self._use_xml_tool_format
179
200
  )
180
201
 
181
202
  logger.info(f"AgentTeamConfig created successfully. Name: '{team_config.name}'. Total nodes: {len(final_nodes)}. Coordinator: '{coordinator_node_instance.name}'.")
@@ -15,8 +15,8 @@ logger = logging.getLogger(__name__)
15
15
  class AgentConfigurationPreparationStep(BaseAgentTeamBootstrapStep):
16
16
  """
17
17
  Bootstrap step to prepare the final, immutable configuration for every
18
- agent in the team. It injects team-specific context and applies the final
19
- coordinator prompt. It no longer injects tools.
18
+ agent in the team. It injects team-specific context, applies team-level
19
+ settings like tool format overrides, and prepares the final coordinator prompt.
20
20
  """
21
21
  async def execute(self, context: 'AgentTeamContext', phase_manager: 'AgentTeamPhaseManager') -> bool:
22
22
  team_id = context.team_id
@@ -44,6 +44,13 @@ class AgentConfigurationPreparationStep(BaseAgentTeamBootstrapStep):
44
44
 
45
45
  final_config = node_definition.copy()
46
46
 
47
+ # --- Team-level Setting Propagation ---
48
+ # If the team config specifies a tool format, it overrides any agent-level setting.
49
+ if context.config.use_xml_tool_format is not None:
50
+ final_config.use_xml_tool_format = context.config.use_xml_tool_format
51
+ logger.debug(f"Team '{team_id}': Applied team-level use_xml_tool_format={final_config.use_xml_tool_format} to agent '{unique_name}'.")
52
+
53
+
47
54
  # --- Shared Context Injection ---
48
55
  # The shared context is injected into the initial_custom_data dictionary,
49
56
  # which is then used by the AgentFactory to create the AgentRuntimeState.
@@ -20,6 +20,7 @@ class AgentTeamConfig:
20
20
  coordinator_node: TeamNodeConfig
21
21
  role: Optional[str] = None
22
22
  task_notification_mode: TaskNotificationMode = TaskNotificationMode.AGENT_MANUAL_NOTIFICATION
23
+ use_xml_tool_format: Optional[bool] = None
23
24
 
24
25
  def __post_init__(self):
25
26
  if not self.name or not isinstance(self.name, str):
@@ -14,7 +14,6 @@ if TYPE_CHECKING:
14
14
  from autobyteus.agent_team.context.team_manager import TeamManager
15
15
  from autobyteus.agent_team.streaming.agent_event_multiplexer import AgentEventMultiplexer
16
16
  from autobyteus.task_management.base_task_board import BaseTaskBoard
17
- from autobyteus.task_management.artifacts.artifact_manifest import ArtifactManifest
18
17
  from autobyteus.agent_team.task_notification.system_event_driven_agent_task_notifier import SystemEventDrivenAgentTaskNotifier
19
18
 
20
19
  logger = logging.getLogger(__name__)
@@ -40,7 +39,6 @@ class AgentTeamRuntimeState:
40
39
 
41
40
  # Dynamic planning and artifact state
42
41
  task_board: Optional['BaseTaskBoard'] = None
43
- artifact_registry: Dict[str, 'ArtifactManifest'] = field(default_factory=dict)
44
42
 
45
43
  def __post_init__(self):
46
44
  if not self.team_id or not isinstance(self.team_id, str):
@@ -4,6 +4,7 @@ from autobyteus.llm.models import LLMModel
4
4
  from autobyteus.llm.utils.llm_config import LLMConfig
5
5
  from autobyteus.llm.utils.token_usage import TokenUsage
6
6
  from autobyteus.llm.utils.response_types import CompleteResponse, ChunkResponse
7
+ from autobyteus.llm.user_message import LLMUserMessage
7
8
  from autobyteus_llm_client.client import AutobyteusClient
8
9
  import logging
9
10
  import uuid
@@ -12,36 +13,35 @@ logger = logging.getLogger(__name__)
12
13
 
13
14
  class AutobyteusLLM(BaseLLM):
14
15
  def __init__(self, model: LLMModel, llm_config: LLMConfig):
15
- # The host URL is now passed via the model object.
16
16
  if not model.host_url:
17
17
  raise ValueError("AutobyteusLLM requires a host_url to be set in its LLMModel object.")
18
18
 
19
19
  super().__init__(model=model, llm_config=llm_config)
20
20
 
21
- # Instantiate the client with the specific host for this model.
22
21
  self.client = AutobyteusClient(server_url=self.model.host_url)
23
22
  self.conversation_id = str(uuid.uuid4())
24
23
  logger.info(f"AutobyteusLLM initialized for model '{self.model.model_identifier}' with conversation ID: {self.conversation_id}")
25
24
 
26
25
  async def _send_user_message_to_llm(
27
26
  self,
28
- user_message: str,
29
- image_urls: Optional[List[str]] = None,
27
+ user_message: LLMUserMessage,
30
28
  **kwargs
31
29
  ) -> CompleteResponse:
32
30
  self.add_user_message(user_message)
33
31
  try:
34
32
  response = await self.client.send_message(
35
33
  conversation_id=self.conversation_id,
36
- model_name=self.model.name, # Use `name` as it's the original model name for the API
37
- user_message=user_message,
38
- image_urls=image_urls
34
+ model_name=self.model.name,
35
+ user_message=user_message.content,
36
+ image_urls=user_message.image_urls,
37
+ audio_urls=user_message.audio_urls,
38
+ video_urls=user_message.video_urls
39
39
  )
40
40
 
41
41
  assistant_message = response['response']
42
42
  self.add_assistant_message(assistant_message)
43
43
 
44
- token_usage_data = response.get('token_usage', {})
44
+ token_usage_data = response.get('token_usage') or {}
45
45
  token_usage = TokenUsage(
46
46
  prompt_tokens=token_usage_data.get('prompt_tokens', 0),
47
47
  completion_tokens=token_usage_data.get('completion_tokens', 0),
@@ -59,8 +59,7 @@ class AutobyteusLLM(BaseLLM):
59
59
 
60
60
  async def _stream_user_message_to_llm(
61
61
  self,
62
- user_message: str,
63
- image_urls: Optional[List[str]] = None,
62
+ user_message: LLMUserMessage,
64
63
  **kwargs
65
64
  ) -> AsyncGenerator[ChunkResponse, None]:
66
65
  self.add_user_message(user_message)
@@ -69,36 +68,38 @@ class AutobyteusLLM(BaseLLM):
69
68
  try:
70
69
  async for chunk in self.client.stream_message(
71
70
  conversation_id=self.conversation_id,
72
- model_name=self.model.name, # Use `name` for the API call
73
- user_message=user_message,
74
- image_urls=image_urls
71
+ model_name=self.model.name,
72
+ user_message=user_message.content,
73
+ image_urls=user_message.image_urls,
74
+ audio_urls=user_message.audio_urls,
75
+ video_urls=user_message.video_urls
75
76
  ):
76
77
  if 'error' in chunk:
77
78
  raise RuntimeError(chunk['error'])
78
79
 
79
80
  content = chunk.get('content', '')
80
- complete_response += content
81
+ if content:
82
+ complete_response += content
83
+
81
84
  is_complete = chunk.get('is_complete', False)
82
-
83
- # If this is the final chunk, include token usage
85
+ token_usage = None
84
86
  if is_complete:
85
- token_usage = None
86
- if chunk.get('token_usage'):
87
- token_usage = TokenUsage(
88
- prompt_tokens=chunk['token_usage'].get('prompt_tokens', 0),
89
- completion_tokens=chunk['token_usage'].get('completion_tokens', 0),
90
- total_tokens=chunk['token_usage'].get('total_tokens', 0)
91
- )
92
- yield ChunkResponse(
93
- content=content,
94
- is_complete=True,
95
- usage=token_usage
96
- )
97
- else:
98
- yield ChunkResponse(
99
- content=content,
100
- is_complete=False
87
+ token_usage_data = chunk.get('token_usage') or {}
88
+ token_usage = TokenUsage(
89
+ prompt_tokens=token_usage_data.get('prompt_tokens', 0),
90
+ completion_tokens=token_usage_data.get('completion_tokens', 0),
91
+ total_tokens=token_usage_data.get('total_tokens', 0)
101
92
  )
93
+
94
+ yield ChunkResponse(
95
+ content=content,
96
+ reasoning=chunk.get('reasoning'),
97
+ is_complete=is_complete,
98
+ image_urls=chunk.get('image_urls', []),
99
+ audio_urls=chunk.get('audio_urls', []),
100
+ video_urls=chunk.get('video_urls', []),
101
+ usage=token_usage
102
+ )
102
103
 
103
104
  self.add_assistant_message(complete_response)
104
105
  except Exception as e:
@@ -116,7 +117,6 @@ class AutobyteusLLM(BaseLLM):
116
117
  await self.client.close()
117
118
 
118
119
  async def _handle_error_cleanup(self):
119
- """Handle cleanup operations after errors"""
120
120
  try:
121
121
  await self.cleanup()
122
122
  except Exception as cleanup_error:
@@ -9,10 +9,10 @@ from autobyteus.llm.utils.llm_config import LLMConfig
9
9
  from autobyteus.llm.utils.messages import MessageRole, Message
10
10
  from autobyteus.llm.utils.token_usage import TokenUsage
11
11
  from autobyteus.llm.utils.response_types import CompleteResponse, ChunkResponse
12
+ from autobyteus.llm.user_message import LLMUserMessage
12
13
 
13
14
  class BedrockLLM(BaseLLM):
14
15
  def __init__(self, model: LLMModel = None, llm_config: LLMConfig = None):
15
- # Provide defaults if not specified
16
16
  if model is None:
17
17
  model = LLMModel.BEDROCK_CLAUDE_3_5_SONNET_API
18
18
  if llm_config is None:
@@ -43,14 +43,17 @@ class BedrockLLM(BaseLLM):
43
43
  except Exception as e:
44
44
  raise ValueError(f"Failed to initialize Bedrock client: {str(e)}")
45
45
 
46
- async def _send_user_message_to_llm(self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs) -> CompleteResponse:
46
+ async def _send_user_message_to_llm(self, user_message: LLMUserMessage, **kwargs) -> CompleteResponse:
47
47
  self.add_user_message(user_message)
48
48
 
49
+ # NOTE: This implementation does not yet support multimodal inputs for Bedrock.
50
+ # It will only send the text content.
51
+
49
52
  request_body = json.dumps({
50
53
  "anthropic_version": "bedrock-2023-05-31",
51
54
  "max_tokens": 1000,
52
55
  "temperature": 0,
53
- "messages": [msg.to_dict() for msg in self.messages],
56
+ "messages": [msg.to_dict() for msg in self.messages if msg.role != MessageRole.SYSTEM],
54
57
  "system": self.system_message if self.system_message else ""
55
58
  })
56
59
 
@@ -79,6 +82,11 @@ class BedrockLLM(BaseLLM):
79
82
  raise ValueError(f"Bedrock API error: {error_code} - {error_message}")
80
83
  except Exception as e:
81
84
  raise ValueError(f"Error in Bedrock API call: {str(e)}")
82
-
85
+
86
+ async def _stream_user_message_to_llm(self, user_message: LLMUserMessage, **kwargs) -> AsyncGenerator[ChunkResponse, None]:
87
+ # Placeholder for future implementation
88
+ response = await self._send_user_message_to_llm(user_message, **kwargs)
89
+ yield ChunkResponse(content=response.content, is_complete=True, usage=response.usage)
90
+
83
91
  async def cleanup(self):
84
- super().cleanup()
92
+ await super().cleanup()