autobyteus 1.1.5__py3-none-any.whl → 1.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autobyteus/agent/context/agent_config.py +6 -1
- autobyteus/agent/context/agent_runtime_state.py +7 -1
- autobyteus/agent/handlers/llm_user_message_ready_event_handler.py +30 -7
- autobyteus/agent/handlers/tool_result_event_handler.py +100 -88
- autobyteus/agent/handlers/user_input_message_event_handler.py +22 -25
- autobyteus/agent/llm_response_processor/provider_aware_tool_usage_processor.py +7 -1
- autobyteus/agent/message/__init__.py +7 -5
- autobyteus/agent/message/agent_input_user_message.py +6 -16
- autobyteus/agent/message/context_file.py +24 -24
- autobyteus/agent/message/context_file_type.py +29 -8
- autobyteus/agent/message/multimodal_message_builder.py +47 -0
- autobyteus/agent/streaming/stream_event_payloads.py +23 -4
- autobyteus/agent/system_prompt_processor/tool_manifest_injector_processor.py +6 -2
- autobyteus/agent/tool_invocation.py +27 -2
- autobyteus/agent_team/agent_team_builder.py +22 -1
- autobyteus/agent_team/bootstrap_steps/agent_configuration_preparation_step.py +9 -2
- autobyteus/agent_team/context/agent_team_config.py +1 -0
- autobyteus/agent_team/context/agent_team_runtime_state.py +0 -2
- autobyteus/llm/api/autobyteus_llm.py +33 -33
- autobyteus/llm/api/bedrock_llm.py +13 -5
- autobyteus/llm/api/claude_llm.py +13 -27
- autobyteus/llm/api/gemini_llm.py +108 -42
- autobyteus/llm/api/groq_llm.py +4 -3
- autobyteus/llm/api/mistral_llm.py +97 -51
- autobyteus/llm/api/nvidia_llm.py +6 -5
- autobyteus/llm/api/ollama_llm.py +37 -12
- autobyteus/llm/api/openai_compatible_llm.py +91 -91
- autobyteus/llm/autobyteus_provider.py +1 -1
- autobyteus/llm/base_llm.py +42 -139
- autobyteus/llm/extensions/base_extension.py +6 -6
- autobyteus/llm/extensions/token_usage_tracking_extension.py +3 -2
- autobyteus/llm/llm_factory.py +131 -61
- autobyteus/llm/ollama_provider_resolver.py +1 -0
- autobyteus/llm/providers.py +1 -0
- autobyteus/llm/token_counter/token_counter_factory.py +3 -1
- autobyteus/llm/user_message.py +43 -35
- autobyteus/llm/utils/llm_config.py +34 -18
- autobyteus/llm/utils/media_payload_formatter.py +99 -0
- autobyteus/llm/utils/messages.py +32 -25
- autobyteus/llm/utils/response_types.py +9 -3
- autobyteus/llm/utils/token_usage.py +6 -5
- autobyteus/multimedia/__init__.py +31 -0
- autobyteus/multimedia/audio/__init__.py +11 -0
- autobyteus/multimedia/audio/api/__init__.py +4 -0
- autobyteus/multimedia/audio/api/autobyteus_audio_client.py +59 -0
- autobyteus/multimedia/audio/api/gemini_audio_client.py +219 -0
- autobyteus/multimedia/audio/audio_client_factory.py +120 -0
- autobyteus/multimedia/audio/audio_model.py +97 -0
- autobyteus/multimedia/audio/autobyteus_audio_provider.py +108 -0
- autobyteus/multimedia/audio/base_audio_client.py +40 -0
- autobyteus/multimedia/image/__init__.py +11 -0
- autobyteus/multimedia/image/api/__init__.py +9 -0
- autobyteus/multimedia/image/api/autobyteus_image_client.py +97 -0
- autobyteus/multimedia/image/api/gemini_image_client.py +188 -0
- autobyteus/multimedia/image/api/openai_image_client.py +142 -0
- autobyteus/multimedia/image/autobyteus_image_provider.py +109 -0
- autobyteus/multimedia/image/base_image_client.py +67 -0
- autobyteus/multimedia/image/image_client_factory.py +118 -0
- autobyteus/multimedia/image/image_model.py +97 -0
- autobyteus/multimedia/providers.py +5 -0
- autobyteus/multimedia/runtimes.py +8 -0
- autobyteus/multimedia/utils/__init__.py +10 -0
- autobyteus/multimedia/utils/api_utils.py +19 -0
- autobyteus/multimedia/utils/multimedia_config.py +29 -0
- autobyteus/multimedia/utils/response_types.py +13 -0
- autobyteus/task_management/tools/publish_task_plan.py +4 -16
- autobyteus/task_management/tools/update_task_status.py +4 -19
- autobyteus/tools/__init__.py +5 -4
- autobyteus/tools/base_tool.py +98 -29
- autobyteus/tools/browser/standalone/__init__.py +0 -1
- autobyteus/tools/google_search.py +149 -0
- autobyteus/tools/mcp/schema_mapper.py +29 -71
- autobyteus/tools/multimedia/__init__.py +8 -0
- autobyteus/tools/multimedia/audio_tools.py +116 -0
- autobyteus/tools/multimedia/image_tools.py +186 -0
- autobyteus/tools/parameter_schema.py +82 -89
- autobyteus/tools/pydantic_schema_converter.py +81 -0
- autobyteus/tools/tool_category.py +1 -0
- autobyteus/tools/usage/formatters/default_json_example_formatter.py +89 -20
- autobyteus/tools/usage/formatters/default_xml_example_formatter.py +115 -41
- autobyteus/tools/usage/formatters/default_xml_schema_formatter.py +50 -20
- autobyteus/tools/usage/formatters/gemini_json_example_formatter.py +55 -22
- autobyteus/tools/usage/formatters/google_json_example_formatter.py +54 -21
- autobyteus/tools/usage/formatters/openai_json_example_formatter.py +53 -23
- autobyteus/tools/usage/parsers/default_xml_tool_usage_parser.py +270 -94
- autobyteus/tools/usage/parsers/provider_aware_tool_usage_parser.py +5 -2
- autobyteus/tools/usage/providers/tool_manifest_provider.py +43 -16
- autobyteus/tools/usage/registries/tool_formatting_registry.py +9 -2
- autobyteus/tools/usage/registries/tool_usage_parser_registry.py +9 -2
- autobyteus-1.1.7.dist-info/METADATA +204 -0
- {autobyteus-1.1.5.dist-info → autobyteus-1.1.7.dist-info}/RECORD +98 -71
- examples/run_browser_agent.py +1 -1
- examples/run_google_slides_agent.py +2 -2
- examples/run_mcp_google_slides_client.py +1 -1
- examples/run_sqlite_agent.py +1 -1
- autobyteus/llm/utils/image_payload_formatter.py +0 -89
- autobyteus/tools/ask_user_input.py +0 -40
- autobyteus/tools/browser/standalone/factory/google_search_factory.py +0 -25
- autobyteus/tools/browser/standalone/google_search_ui.py +0 -126
- autobyteus-1.1.5.dist-info/METADATA +0 -161
- {autobyteus-1.1.5.dist-info → autobyteus-1.1.7.dist-info}/WHEEL +0 -0
- {autobyteus-1.1.5.dist-info → autobyteus-1.1.7.dist-info}/licenses/LICENSE +0 -0
- {autobyteus-1.1.5.dist-info → autobyteus-1.1.7.dist-info}/top_level.txt +0 -0
|
@@ -3,6 +3,7 @@ import os
|
|
|
3
3
|
import logging
|
|
4
4
|
from typing import Optional, Dict, Any
|
|
5
5
|
from dataclasses import dataclass, field
|
|
6
|
+
from urllib.parse import urlparse
|
|
6
7
|
|
|
7
8
|
from .context_file_type import ContextFileType
|
|
8
9
|
|
|
@@ -12,10 +13,9 @@ logger = logging.getLogger(__name__)
|
|
|
12
13
|
class ContextFile:
|
|
13
14
|
"""
|
|
14
15
|
Represents a single context file provided to an agent.
|
|
15
|
-
|
|
16
|
-
to input processors.
|
|
16
|
+
The 'uri' can be a local file path or a network URL.
|
|
17
17
|
"""
|
|
18
|
-
|
|
18
|
+
uri: str
|
|
19
19
|
file_type: ContextFileType = ContextFileType.UNKNOWN
|
|
20
20
|
file_name: Optional[str] = None
|
|
21
21
|
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
@@ -25,33 +25,33 @@ class ContextFile:
|
|
|
25
25
|
Called after the dataclass's __init__ method.
|
|
26
26
|
Used here to infer file_name and file_type if not provided or UNKNOWN.
|
|
27
27
|
"""
|
|
28
|
-
if self.
|
|
28
|
+
if not isinstance(self.uri, str) or not self.uri:
|
|
29
|
+
raise TypeError(f"ContextFile uri must be a non-empty string, got {type(self.uri)}")
|
|
30
|
+
|
|
31
|
+
if self.file_name is None:
|
|
29
32
|
try:
|
|
30
|
-
|
|
33
|
+
# Use urlparse to correctly handle both URLs and local paths
|
|
34
|
+
parsed_path = urlparse(self.uri).path
|
|
35
|
+
self.file_name = os.path.basename(parsed_path)
|
|
31
36
|
except Exception as e:
|
|
32
|
-
logger.warning(f"Could not determine basename for
|
|
37
|
+
logger.warning(f"Could not determine basename for uri '{self.uri}': {e}")
|
|
33
38
|
self.file_name = "unknown_file"
|
|
34
39
|
|
|
35
|
-
if self.file_type == ContextFileType.UNKNOWN
|
|
36
|
-
inferred_type = ContextFileType.from_path(self.
|
|
40
|
+
if self.file_type == ContextFileType.UNKNOWN:
|
|
41
|
+
inferred_type = ContextFileType.from_path(self.uri)
|
|
37
42
|
if inferred_type != ContextFileType.UNKNOWN:
|
|
38
43
|
self.file_type = inferred_type
|
|
39
|
-
logger.debug(f"Inferred file type for '{self.
|
|
44
|
+
logger.debug(f"Inferred file type for '{self.uri}' as {self.file_type.value}")
|
|
40
45
|
else:
|
|
41
|
-
logger.debug(f"Could not infer specific file type for '{self.
|
|
42
|
-
|
|
43
|
-
# Ensure path is a string
|
|
44
|
-
if not isinstance(self.path, str):
|
|
45
|
-
# This ideally should be caught by type hints earlier, but as a runtime safeguard:
|
|
46
|
-
raise TypeError(f"ContextFile path must be a string, got {type(self.path)}")
|
|
47
|
-
|
|
46
|
+
logger.debug(f"Could not infer specific file type for '{self.uri}', remaining UNKNOWN.")
|
|
47
|
+
|
|
48
48
|
if logger.isEnabledFor(logging.DEBUG):
|
|
49
|
-
logger.debug(f"ContextFile initialized:
|
|
49
|
+
logger.debug(f"ContextFile initialized: uri='{self.uri}', type='{self.file_type.value}', name='{self.file_name}'")
|
|
50
50
|
|
|
51
51
|
def to_dict(self) -> Dict[str, Any]:
|
|
52
52
|
"""Serializes the ContextFile to a dictionary."""
|
|
53
53
|
return {
|
|
54
|
-
"
|
|
54
|
+
"uri": self.uri,
|
|
55
55
|
"file_type": self.file_type.value, # Serialize enum to its value
|
|
56
56
|
"file_name": self.file_name,
|
|
57
57
|
"metadata": self.metadata,
|
|
@@ -60,23 +60,23 @@ class ContextFile:
|
|
|
60
60
|
@classmethod
|
|
61
61
|
def from_dict(cls, data: Dict[str, Any]) -> 'ContextFile':
|
|
62
62
|
"""Deserializes a ContextFile from a dictionary."""
|
|
63
|
-
if not isinstance(data.get("
|
|
64
|
-
raise ValueError("ContextFile '
|
|
65
|
-
|
|
63
|
+
if not isinstance(data.get("uri"), str):
|
|
64
|
+
raise ValueError("ContextFile 'uri' in dictionary must be a string.")
|
|
65
|
+
|
|
66
66
|
file_type_str = data.get("file_type", ContextFileType.UNKNOWN.value)
|
|
67
67
|
try:
|
|
68
68
|
file_type = ContextFileType(file_type_str)
|
|
69
69
|
except ValueError:
|
|
70
70
|
logger.warning(f"Invalid file_type string '{file_type_str}' in ContextFile data. Defaulting to UNKNOWN.")
|
|
71
71
|
file_type = ContextFileType.UNKNOWN
|
|
72
|
-
|
|
72
|
+
|
|
73
73
|
return cls(
|
|
74
|
-
|
|
74
|
+
uri=data["uri"],
|
|
75
75
|
file_type=file_type,
|
|
76
76
|
file_name=data.get("file_name"),
|
|
77
77
|
metadata=data.get("metadata", {})
|
|
78
78
|
)
|
|
79
79
|
|
|
80
80
|
def __repr__(self) -> str:
|
|
81
|
-
return (f"ContextFile(
|
|
81
|
+
return (f"ContextFile(uri='{self.uri}', file_name='{self.file_name}', "
|
|
82
82
|
f"file_type='{self.file_type.value}', metadata_keys={list(self.metadata.keys())})")
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
2
|
import os
|
|
3
|
+
from urllib.parse import urlparse
|
|
3
4
|
|
|
4
5
|
class ContextFileType(str, Enum):
|
|
5
6
|
"""
|
|
@@ -23,19 +24,25 @@ class ContextFileType(str, Enum):
|
|
|
23
24
|
UNKNOWN = "unknown" # Fallback for unrecognized types
|
|
24
25
|
|
|
25
26
|
@classmethod
|
|
26
|
-
def from_path(cls,
|
|
27
|
+
def from_path(cls, uri: str) -> 'ContextFileType':
|
|
27
28
|
"""
|
|
28
|
-
Infers the ContextFileType from a file path based on its extension.
|
|
29
|
+
Infers the ContextFileType from a file path or URL based on its extension.
|
|
29
30
|
"""
|
|
30
|
-
if not
|
|
31
|
+
if not uri or not isinstance(uri, str):
|
|
31
32
|
return cls.UNKNOWN
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
33
|
+
|
|
34
|
+
try:
|
|
35
|
+
# Parse the URI to handle both file paths and URLs gracefully
|
|
36
|
+
parsed_path = urlparse(uri).path
|
|
37
|
+
_, extension = os.path.splitext(parsed_path.lower())
|
|
38
|
+
except Exception:
|
|
39
|
+
# Fallback for malformed URIs
|
|
40
|
+
_, extension = os.path.splitext(uri.lower())
|
|
41
|
+
|
|
35
42
|
if extension == ".txt":
|
|
36
43
|
return cls.TEXT
|
|
37
44
|
elif extension == ".md":
|
|
38
|
-
return cls.MARKDOWN
|
|
45
|
+
return cls.MARKDOWN
|
|
39
46
|
elif extension == ".pdf":
|
|
40
47
|
return cls.PDF
|
|
41
48
|
elif extension == ".docx":
|
|
@@ -61,9 +68,23 @@ class ContextFileType(str, Enum):
|
|
|
61
68
|
elif extension in [".mp4", ".mov", ".avi", ".mkv", ".webm"]:
|
|
62
69
|
return cls.VIDEO
|
|
63
70
|
elif extension in [".png", ".jpg", ".jpeg", ".gif", ".webp"]:
|
|
64
|
-
return cls.IMAGE
|
|
71
|
+
return cls.IMAGE
|
|
65
72
|
else:
|
|
66
73
|
return cls.UNKNOWN
|
|
67
74
|
|
|
75
|
+
@classmethod
|
|
76
|
+
def get_readable_text_types(cls) -> list['ContextFileType']:
|
|
77
|
+
"""Returns a list of file types that can be read as plain text for context."""
|
|
78
|
+
return [
|
|
79
|
+
cls.TEXT,
|
|
80
|
+
cls.MARKDOWN,
|
|
81
|
+
cls.JSON,
|
|
82
|
+
cls.XML,
|
|
83
|
+
cls.HTML,
|
|
84
|
+
cls.PYTHON,
|
|
85
|
+
cls.JAVASCRIPT,
|
|
86
|
+
cls.CSV,
|
|
87
|
+
]
|
|
88
|
+
|
|
68
89
|
def __str__(self) -> str:
|
|
69
90
|
return self.value
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# file: autobyteus/autobyteus/agent/message/multimodal_message_builder.py
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
from autobyteus.agent.message.agent_input_user_message import AgentInputUserMessage
|
|
5
|
+
from autobyteus.agent.message.context_file_type import ContextFileType
|
|
6
|
+
from autobyteus.llm.user_message import LLMUserMessage
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
def build_llm_user_message(agent_input_user_message: AgentInputUserMessage) -> LLMUserMessage:
|
|
11
|
+
"""
|
|
12
|
+
Builds an LLMUserMessage from an AgentInputUserMessage by categorizing its context files.
|
|
13
|
+
|
|
14
|
+
This function iterates through the context files, sorting URIs for images, audio, and video
|
|
15
|
+
into the appropriate fields of the LLMUserMessage. It ignores other file types for now.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
agent_input_user_message: The user input message containing content and context files.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
An LLMUserMessage ready to be sent to the LLM.
|
|
22
|
+
"""
|
|
23
|
+
image_urls = []
|
|
24
|
+
audio_urls = []
|
|
25
|
+
video_urls = []
|
|
26
|
+
|
|
27
|
+
if agent_input_user_message.context_files:
|
|
28
|
+
for context_file in agent_input_user_message.context_files:
|
|
29
|
+
file_type = context_file.file_type
|
|
30
|
+
if file_type == ContextFileType.IMAGE:
|
|
31
|
+
image_urls.append(context_file.uri)
|
|
32
|
+
elif file_type == ContextFileType.AUDIO:
|
|
33
|
+
audio_urls.append(context_file.uri)
|
|
34
|
+
elif file_type == ContextFileType.VIDEO:
|
|
35
|
+
video_urls.append(context_file.uri)
|
|
36
|
+
else:
|
|
37
|
+
logger.debug(f"Ignoring non-media context file of type '{file_type.value}' during LLM message build: {context_file.uri}")
|
|
38
|
+
|
|
39
|
+
llm_user_message = LLMUserMessage(
|
|
40
|
+
content=agent_input_user_message.content,
|
|
41
|
+
image_urls=image_urls if image_urls else None,
|
|
42
|
+
audio_urls=audio_urls if audio_urls else None,
|
|
43
|
+
video_urls=video_urls if video_urls else None
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
logger.info(f"Built LLMUserMessage with {len(image_urls)} images, {len(audio_urls)} audio, {len(video_urls)} video files.")
|
|
47
|
+
return llm_user_message
|
|
@@ -20,12 +20,18 @@ class AssistantChunkData(BaseStreamPayload):
|
|
|
20
20
|
reasoning: Optional[str] = None
|
|
21
21
|
is_complete: bool
|
|
22
22
|
usage: Optional[TokenUsage] = None
|
|
23
|
+
image_urls: Optional[List[str]] = None
|
|
24
|
+
audio_urls: Optional[List[str]] = None
|
|
25
|
+
video_urls: Optional[List[str]] = None
|
|
23
26
|
|
|
24
27
|
|
|
25
28
|
class AssistantCompleteResponseData(BaseStreamPayload):
|
|
26
29
|
content: str
|
|
27
30
|
reasoning: Optional[str] = None
|
|
28
31
|
usage: Optional[TokenUsage] = None
|
|
32
|
+
image_urls: Optional[List[str]] = None
|
|
33
|
+
audio_urls: Optional[List[str]] = None
|
|
34
|
+
video_urls: Optional[List[str]] = None
|
|
29
35
|
|
|
30
36
|
class ToolInteractionLogEntryData(BaseStreamPayload):
|
|
31
37
|
log_entry: str
|
|
@@ -102,14 +108,20 @@ def create_assistant_chunk_data(chunk_obj: Any) -> AssistantChunkData:
|
|
|
102
108
|
content=str(getattr(chunk_obj, 'content', '')),
|
|
103
109
|
reasoning=getattr(chunk_obj, 'reasoning', None),
|
|
104
110
|
is_complete=bool(getattr(chunk_obj, 'is_complete', False)),
|
|
105
|
-
usage=parsed_usage
|
|
111
|
+
usage=parsed_usage,
|
|
112
|
+
image_urls=getattr(chunk_obj, 'image_urls', None),
|
|
113
|
+
audio_urls=getattr(chunk_obj, 'audio_urls', None),
|
|
114
|
+
video_urls=getattr(chunk_obj, 'video_urls', None)
|
|
106
115
|
)
|
|
107
116
|
elif isinstance(chunk_obj, dict):
|
|
108
117
|
return AssistantChunkData(
|
|
109
118
|
content=str(chunk_obj.get('content', '')),
|
|
110
119
|
reasoning=chunk_obj.get('reasoning', None),
|
|
111
120
|
is_complete=bool(chunk_obj.get('is_complete', False)),
|
|
112
|
-
usage=parsed_usage
|
|
121
|
+
usage=parsed_usage,
|
|
122
|
+
image_urls=chunk_obj.get('image_urls', None),
|
|
123
|
+
audio_urls=chunk_obj.get('audio_urls', None),
|
|
124
|
+
video_urls=chunk_obj.get('video_urls', None)
|
|
113
125
|
)
|
|
114
126
|
raise ValueError(f"Cannot create AssistantChunkData from {type(chunk_obj)}")
|
|
115
127
|
|
|
@@ -136,13 +148,19 @@ def create_assistant_complete_response_data(complete_resp_obj: Any) -> Assistant
|
|
|
136
148
|
return AssistantCompleteResponseData(
|
|
137
149
|
content=str(getattr(complete_resp_obj, 'content', '')),
|
|
138
150
|
reasoning=getattr(complete_resp_obj, 'reasoning', None),
|
|
139
|
-
usage=parsed_usage
|
|
151
|
+
usage=parsed_usage,
|
|
152
|
+
image_urls=getattr(complete_resp_obj, 'image_urls', None),
|
|
153
|
+
audio_urls=getattr(complete_resp_obj, 'audio_urls', None),
|
|
154
|
+
video_urls=getattr(complete_resp_obj, 'video_urls', None)
|
|
140
155
|
)
|
|
141
156
|
elif isinstance(complete_resp_obj, dict):
|
|
142
157
|
return AssistantCompleteResponseData(
|
|
143
158
|
content=str(complete_resp_obj.get('content', '')),
|
|
144
159
|
reasoning=complete_resp_obj.get('reasoning', None),
|
|
145
|
-
usage=parsed_usage
|
|
160
|
+
usage=parsed_usage,
|
|
161
|
+
image_urls=complete_resp_obj.get('image_urls', None),
|
|
162
|
+
audio_urls=complete_resp_obj.get('audio_urls', None),
|
|
163
|
+
video_urls=complete_resp_obj.get('video_urls', None)
|
|
146
164
|
)
|
|
147
165
|
raise ValueError(f"Cannot create AssistantCompleteResponseData from {type(complete_resp_obj)}")
|
|
148
166
|
|
|
@@ -177,3 +195,4 @@ def create_system_task_notification_data(notification_data_dict: Any) -> SystemT
|
|
|
177
195
|
if isinstance(notification_data_dict, dict):
|
|
178
196
|
return SystemTaskNotificationData(**notification_data_dict)
|
|
179
197
|
raise ValueError(f"Cannot create SystemTaskNotificationData from {type(notification_data_dict)}")
|
|
198
|
+
|
|
@@ -47,6 +47,9 @@ class ToolManifestInjectorProcessor(BaseSystemPromptProcessor):
|
|
|
47
47
|
llm_provider = None
|
|
48
48
|
if context.llm_instance and context.llm_instance.model:
|
|
49
49
|
llm_provider = context.llm_instance.model.provider
|
|
50
|
+
|
|
51
|
+
# Retrieve the override flag from the agent's configuration.
|
|
52
|
+
use_xml_tool_format = context.config.use_xml_tool_format
|
|
50
53
|
|
|
51
54
|
# Generate the manifest string for the 'tools' variable.
|
|
52
55
|
tools_manifest: str
|
|
@@ -59,10 +62,11 @@ class ToolManifestInjectorProcessor(BaseSystemPromptProcessor):
|
|
|
59
62
|
]
|
|
60
63
|
|
|
61
64
|
try:
|
|
62
|
-
# Delegate manifest generation to the provider,
|
|
65
|
+
# Delegate manifest generation to the provider, passing the override flag.
|
|
63
66
|
tools_manifest = self._manifest_provider.provide(
|
|
64
67
|
tool_definitions=tool_definitions,
|
|
65
|
-
provider=llm_provider
|
|
68
|
+
provider=llm_provider,
|
|
69
|
+
use_xml_tool_format=use_xml_tool_format
|
|
66
70
|
)
|
|
67
71
|
except Exception as e:
|
|
68
72
|
logger.exception(f"An unexpected error occurred during tool manifest generation for agent '{agent_id}': {e}")
|
|
@@ -2,7 +2,14 @@
|
|
|
2
2
|
import uuid
|
|
3
3
|
import hashlib
|
|
4
4
|
import json
|
|
5
|
-
|
|
5
|
+
import logging
|
|
6
|
+
from typing import Optional, Dict, Any, List, TYPE_CHECKING
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from autobyteus.agent.events import ToolResultEvent
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
6
13
|
|
|
7
14
|
class ToolInvocation:
|
|
8
15
|
def __init__(self, name: Optional[str] = None, arguments: Optional[Dict[str, Any]] = None, id: Optional[str] = None):
|
|
@@ -33,11 +40,15 @@ class ToolInvocation:
|
|
|
33
40
|
"""
|
|
34
41
|
# Create a canonical representation of the arguments
|
|
35
42
|
# sort_keys=True ensures that the order of keys doesn't change the hash
|
|
36
|
-
|
|
43
|
+
# ensure_ascii=False is critical for cross-language compatibility with JS
|
|
44
|
+
canonical_args = json.dumps(arguments, sort_keys=True, separators=(',', ':'), ensure_ascii=False)
|
|
37
45
|
|
|
38
46
|
# Create a string to hash
|
|
39
47
|
hash_string = f"{name}:{canonical_args}"
|
|
40
48
|
|
|
49
|
+
# --- ADDED LOGGING ---
|
|
50
|
+
logger.debug(f"Generating tool invocation ID from hash_string: '{hash_string}'")
|
|
51
|
+
|
|
41
52
|
# Use SHA256 for a robust hash
|
|
42
53
|
sha256_hash = hashlib.sha256(hash_string.encode('utf-8')).hexdigest()
|
|
43
54
|
|
|
@@ -54,3 +65,17 @@ class ToolInvocation:
|
|
|
54
65
|
def __repr__(self) -> str:
|
|
55
66
|
return (f"ToolInvocation(id='{self.id}', name='{self.name}', "
|
|
56
67
|
f"arguments={self.arguments})")
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@dataclass
|
|
71
|
+
class ToolInvocationTurn:
|
|
72
|
+
"""
|
|
73
|
+
A data class to encapsulate the state of a multi-tool invocation turn.
|
|
74
|
+
Its existence in the agent's state signifies that a multi-tool turn is active.
|
|
75
|
+
"""
|
|
76
|
+
invocations: List[ToolInvocation]
|
|
77
|
+
results: List['ToolResultEvent'] = field(default_factory=list)
|
|
78
|
+
|
|
79
|
+
def is_complete(self) -> bool:
|
|
80
|
+
"""Checks if all expected tool results have been collected."""
|
|
81
|
+
return len(self.results) >= len(self.invocations)
|
|
@@ -45,6 +45,7 @@ class AgentTeamBuilder:
|
|
|
45
45
|
self._coordinator_config: Optional[AgentConfig] = None
|
|
46
46
|
self._added_node_names: Set[str] = set()
|
|
47
47
|
self._task_notification_mode: TaskNotificationMode = TaskNotificationMode.AGENT_MANUAL_NOTIFICATION
|
|
48
|
+
self._use_xml_tool_format: Optional[bool] = None
|
|
48
49
|
logger.info(f"AgentTeamBuilder initialized for team: '{self._name}'.")
|
|
49
50
|
|
|
50
51
|
def add_agent_node(self, agent_config: AgentConfig, dependencies: Optional[List[NodeDefinition]] = None) -> 'AgentTeamBuilder':
|
|
@@ -142,6 +143,25 @@ class AgentTeamBuilder:
|
|
|
142
143
|
logger.debug(f"Task notification mode set to '{mode.value}'.")
|
|
143
144
|
return self
|
|
144
145
|
|
|
146
|
+
def set_use_xml_tool_format(self, use_xml: bool) -> 'AgentTeamBuilder':
|
|
147
|
+
"""
|
|
148
|
+
Sets the team-level override for using XML tool format.
|
|
149
|
+
|
|
150
|
+
If set, this will override the setting on all individual agents within the team.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
use_xml: If True, forces the team to use XML format for tool
|
|
154
|
+
definitions and parsing.
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
The builder instance for fluent chaining.
|
|
158
|
+
"""
|
|
159
|
+
if not isinstance(use_xml, bool):
|
|
160
|
+
raise TypeError("use_xml must be a boolean.")
|
|
161
|
+
self._use_xml_tool_format = use_xml
|
|
162
|
+
logger.debug(f"Team-level XML tool format override set to '{use_xml}'.")
|
|
163
|
+
return self
|
|
164
|
+
|
|
145
165
|
def build(self) -> AgentTeam:
|
|
146
166
|
"""
|
|
147
167
|
Constructs and returns the final AgentTeam instance using the
|
|
@@ -175,7 +195,8 @@ class AgentTeamBuilder:
|
|
|
175
195
|
role=self._role,
|
|
176
196
|
nodes=tuple(final_nodes),
|
|
177
197
|
coordinator_node=coordinator_node_instance,
|
|
178
|
-
task_notification_mode=self._task_notification_mode
|
|
198
|
+
task_notification_mode=self._task_notification_mode,
|
|
199
|
+
use_xml_tool_format=self._use_xml_tool_format
|
|
179
200
|
)
|
|
180
201
|
|
|
181
202
|
logger.info(f"AgentTeamConfig created successfully. Name: '{team_config.name}'. Total nodes: {len(final_nodes)}. Coordinator: '{coordinator_node_instance.name}'.")
|
|
@@ -15,8 +15,8 @@ logger = logging.getLogger(__name__)
|
|
|
15
15
|
class AgentConfigurationPreparationStep(BaseAgentTeamBootstrapStep):
|
|
16
16
|
"""
|
|
17
17
|
Bootstrap step to prepare the final, immutable configuration for every
|
|
18
|
-
agent in the team. It injects team-specific context
|
|
19
|
-
|
|
18
|
+
agent in the team. It injects team-specific context, applies team-level
|
|
19
|
+
settings like tool format overrides, and prepares the final coordinator prompt.
|
|
20
20
|
"""
|
|
21
21
|
async def execute(self, context: 'AgentTeamContext', phase_manager: 'AgentTeamPhaseManager') -> bool:
|
|
22
22
|
team_id = context.team_id
|
|
@@ -44,6 +44,13 @@ class AgentConfigurationPreparationStep(BaseAgentTeamBootstrapStep):
|
|
|
44
44
|
|
|
45
45
|
final_config = node_definition.copy()
|
|
46
46
|
|
|
47
|
+
# --- Team-level Setting Propagation ---
|
|
48
|
+
# If the team config specifies a tool format, it overrides any agent-level setting.
|
|
49
|
+
if context.config.use_xml_tool_format is not None:
|
|
50
|
+
final_config.use_xml_tool_format = context.config.use_xml_tool_format
|
|
51
|
+
logger.debug(f"Team '{team_id}': Applied team-level use_xml_tool_format={final_config.use_xml_tool_format} to agent '{unique_name}'.")
|
|
52
|
+
|
|
53
|
+
|
|
47
54
|
# --- Shared Context Injection ---
|
|
48
55
|
# The shared context is injected into the initial_custom_data dictionary,
|
|
49
56
|
# which is then used by the AgentFactory to create the AgentRuntimeState.
|
|
@@ -20,6 +20,7 @@ class AgentTeamConfig:
|
|
|
20
20
|
coordinator_node: TeamNodeConfig
|
|
21
21
|
role: Optional[str] = None
|
|
22
22
|
task_notification_mode: TaskNotificationMode = TaskNotificationMode.AGENT_MANUAL_NOTIFICATION
|
|
23
|
+
use_xml_tool_format: Optional[bool] = None
|
|
23
24
|
|
|
24
25
|
def __post_init__(self):
|
|
25
26
|
if not self.name or not isinstance(self.name, str):
|
|
@@ -14,7 +14,6 @@ if TYPE_CHECKING:
|
|
|
14
14
|
from autobyteus.agent_team.context.team_manager import TeamManager
|
|
15
15
|
from autobyteus.agent_team.streaming.agent_event_multiplexer import AgentEventMultiplexer
|
|
16
16
|
from autobyteus.task_management.base_task_board import BaseTaskBoard
|
|
17
|
-
from autobyteus.task_management.artifacts.artifact_manifest import ArtifactManifest
|
|
18
17
|
from autobyteus.agent_team.task_notification.system_event_driven_agent_task_notifier import SystemEventDrivenAgentTaskNotifier
|
|
19
18
|
|
|
20
19
|
logger = logging.getLogger(__name__)
|
|
@@ -40,7 +39,6 @@ class AgentTeamRuntimeState:
|
|
|
40
39
|
|
|
41
40
|
# Dynamic planning and artifact state
|
|
42
41
|
task_board: Optional['BaseTaskBoard'] = None
|
|
43
|
-
artifact_registry: Dict[str, 'ArtifactManifest'] = field(default_factory=dict)
|
|
44
42
|
|
|
45
43
|
def __post_init__(self):
|
|
46
44
|
if not self.team_id or not isinstance(self.team_id, str):
|
|
@@ -4,6 +4,7 @@ from autobyteus.llm.models import LLMModel
|
|
|
4
4
|
from autobyteus.llm.utils.llm_config import LLMConfig
|
|
5
5
|
from autobyteus.llm.utils.token_usage import TokenUsage
|
|
6
6
|
from autobyteus.llm.utils.response_types import CompleteResponse, ChunkResponse
|
|
7
|
+
from autobyteus.llm.user_message import LLMUserMessage
|
|
7
8
|
from autobyteus_llm_client.client import AutobyteusClient
|
|
8
9
|
import logging
|
|
9
10
|
import uuid
|
|
@@ -12,36 +13,35 @@ logger = logging.getLogger(__name__)
|
|
|
12
13
|
|
|
13
14
|
class AutobyteusLLM(BaseLLM):
|
|
14
15
|
def __init__(self, model: LLMModel, llm_config: LLMConfig):
|
|
15
|
-
# The host URL is now passed via the model object.
|
|
16
16
|
if not model.host_url:
|
|
17
17
|
raise ValueError("AutobyteusLLM requires a host_url to be set in its LLMModel object.")
|
|
18
18
|
|
|
19
19
|
super().__init__(model=model, llm_config=llm_config)
|
|
20
20
|
|
|
21
|
-
# Instantiate the client with the specific host for this model.
|
|
22
21
|
self.client = AutobyteusClient(server_url=self.model.host_url)
|
|
23
22
|
self.conversation_id = str(uuid.uuid4())
|
|
24
23
|
logger.info(f"AutobyteusLLM initialized for model '{self.model.model_identifier}' with conversation ID: {self.conversation_id}")
|
|
25
24
|
|
|
26
25
|
async def _send_user_message_to_llm(
|
|
27
26
|
self,
|
|
28
|
-
user_message:
|
|
29
|
-
image_urls: Optional[List[str]] = None,
|
|
27
|
+
user_message: LLMUserMessage,
|
|
30
28
|
**kwargs
|
|
31
29
|
) -> CompleteResponse:
|
|
32
30
|
self.add_user_message(user_message)
|
|
33
31
|
try:
|
|
34
32
|
response = await self.client.send_message(
|
|
35
33
|
conversation_id=self.conversation_id,
|
|
36
|
-
model_name=self.model.name,
|
|
37
|
-
user_message=user_message,
|
|
38
|
-
image_urls=image_urls
|
|
34
|
+
model_name=self.model.name,
|
|
35
|
+
user_message=user_message.content,
|
|
36
|
+
image_urls=user_message.image_urls,
|
|
37
|
+
audio_urls=user_message.audio_urls,
|
|
38
|
+
video_urls=user_message.video_urls
|
|
39
39
|
)
|
|
40
40
|
|
|
41
41
|
assistant_message = response['response']
|
|
42
42
|
self.add_assistant_message(assistant_message)
|
|
43
43
|
|
|
44
|
-
token_usage_data = response.get('token_usage'
|
|
44
|
+
token_usage_data = response.get('token_usage') or {}
|
|
45
45
|
token_usage = TokenUsage(
|
|
46
46
|
prompt_tokens=token_usage_data.get('prompt_tokens', 0),
|
|
47
47
|
completion_tokens=token_usage_data.get('completion_tokens', 0),
|
|
@@ -59,8 +59,7 @@ class AutobyteusLLM(BaseLLM):
|
|
|
59
59
|
|
|
60
60
|
async def _stream_user_message_to_llm(
|
|
61
61
|
self,
|
|
62
|
-
user_message:
|
|
63
|
-
image_urls: Optional[List[str]] = None,
|
|
62
|
+
user_message: LLMUserMessage,
|
|
64
63
|
**kwargs
|
|
65
64
|
) -> AsyncGenerator[ChunkResponse, None]:
|
|
66
65
|
self.add_user_message(user_message)
|
|
@@ -69,36 +68,38 @@ class AutobyteusLLM(BaseLLM):
|
|
|
69
68
|
try:
|
|
70
69
|
async for chunk in self.client.stream_message(
|
|
71
70
|
conversation_id=self.conversation_id,
|
|
72
|
-
model_name=self.model.name,
|
|
73
|
-
user_message=user_message,
|
|
74
|
-
image_urls=image_urls
|
|
71
|
+
model_name=self.model.name,
|
|
72
|
+
user_message=user_message.content,
|
|
73
|
+
image_urls=user_message.image_urls,
|
|
74
|
+
audio_urls=user_message.audio_urls,
|
|
75
|
+
video_urls=user_message.video_urls
|
|
75
76
|
):
|
|
76
77
|
if 'error' in chunk:
|
|
77
78
|
raise RuntimeError(chunk['error'])
|
|
78
79
|
|
|
79
80
|
content = chunk.get('content', '')
|
|
80
|
-
|
|
81
|
+
if content:
|
|
82
|
+
complete_response += content
|
|
83
|
+
|
|
81
84
|
is_complete = chunk.get('is_complete', False)
|
|
82
|
-
|
|
83
|
-
# If this is the final chunk, include token usage
|
|
85
|
+
token_usage = None
|
|
84
86
|
if is_complete:
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
total_tokens=chunk['token_usage'].get('total_tokens', 0)
|
|
91
|
-
)
|
|
92
|
-
yield ChunkResponse(
|
|
93
|
-
content=content,
|
|
94
|
-
is_complete=True,
|
|
95
|
-
usage=token_usage
|
|
96
|
-
)
|
|
97
|
-
else:
|
|
98
|
-
yield ChunkResponse(
|
|
99
|
-
content=content,
|
|
100
|
-
is_complete=False
|
|
87
|
+
token_usage_data = chunk.get('token_usage') or {}
|
|
88
|
+
token_usage = TokenUsage(
|
|
89
|
+
prompt_tokens=token_usage_data.get('prompt_tokens', 0),
|
|
90
|
+
completion_tokens=token_usage_data.get('completion_tokens', 0),
|
|
91
|
+
total_tokens=token_usage_data.get('total_tokens', 0)
|
|
101
92
|
)
|
|
93
|
+
|
|
94
|
+
yield ChunkResponse(
|
|
95
|
+
content=content,
|
|
96
|
+
reasoning=chunk.get('reasoning'),
|
|
97
|
+
is_complete=is_complete,
|
|
98
|
+
image_urls=chunk.get('image_urls', []),
|
|
99
|
+
audio_urls=chunk.get('audio_urls', []),
|
|
100
|
+
video_urls=chunk.get('video_urls', []),
|
|
101
|
+
usage=token_usage
|
|
102
|
+
)
|
|
102
103
|
|
|
103
104
|
self.add_assistant_message(complete_response)
|
|
104
105
|
except Exception as e:
|
|
@@ -116,7 +117,6 @@ class AutobyteusLLM(BaseLLM):
|
|
|
116
117
|
await self.client.close()
|
|
117
118
|
|
|
118
119
|
async def _handle_error_cleanup(self):
|
|
119
|
-
"""Handle cleanup operations after errors"""
|
|
120
120
|
try:
|
|
121
121
|
await self.cleanup()
|
|
122
122
|
except Exception as cleanup_error:
|
|
@@ -9,10 +9,10 @@ from autobyteus.llm.utils.llm_config import LLMConfig
|
|
|
9
9
|
from autobyteus.llm.utils.messages import MessageRole, Message
|
|
10
10
|
from autobyteus.llm.utils.token_usage import TokenUsage
|
|
11
11
|
from autobyteus.llm.utils.response_types import CompleteResponse, ChunkResponse
|
|
12
|
+
from autobyteus.llm.user_message import LLMUserMessage
|
|
12
13
|
|
|
13
14
|
class BedrockLLM(BaseLLM):
|
|
14
15
|
def __init__(self, model: LLMModel = None, llm_config: LLMConfig = None):
|
|
15
|
-
# Provide defaults if not specified
|
|
16
16
|
if model is None:
|
|
17
17
|
model = LLMModel.BEDROCK_CLAUDE_3_5_SONNET_API
|
|
18
18
|
if llm_config is None:
|
|
@@ -43,14 +43,17 @@ class BedrockLLM(BaseLLM):
|
|
|
43
43
|
except Exception as e:
|
|
44
44
|
raise ValueError(f"Failed to initialize Bedrock client: {str(e)}")
|
|
45
45
|
|
|
46
|
-
async def _send_user_message_to_llm(self, user_message:
|
|
46
|
+
async def _send_user_message_to_llm(self, user_message: LLMUserMessage, **kwargs) -> CompleteResponse:
|
|
47
47
|
self.add_user_message(user_message)
|
|
48
48
|
|
|
49
|
+
# NOTE: This implementation does not yet support multimodal inputs for Bedrock.
|
|
50
|
+
# It will only send the text content.
|
|
51
|
+
|
|
49
52
|
request_body = json.dumps({
|
|
50
53
|
"anthropic_version": "bedrock-2023-05-31",
|
|
51
54
|
"max_tokens": 1000,
|
|
52
55
|
"temperature": 0,
|
|
53
|
-
"messages": [msg.to_dict() for msg in self.messages],
|
|
56
|
+
"messages": [msg.to_dict() for msg in self.messages if msg.role != MessageRole.SYSTEM],
|
|
54
57
|
"system": self.system_message if self.system_message else ""
|
|
55
58
|
})
|
|
56
59
|
|
|
@@ -79,6 +82,11 @@ class BedrockLLM(BaseLLM):
|
|
|
79
82
|
raise ValueError(f"Bedrock API error: {error_code} - {error_message}")
|
|
80
83
|
except Exception as e:
|
|
81
84
|
raise ValueError(f"Error in Bedrock API call: {str(e)}")
|
|
82
|
-
|
|
85
|
+
|
|
86
|
+
async def _stream_user_message_to_llm(self, user_message: LLMUserMessage, **kwargs) -> AsyncGenerator[ChunkResponse, None]:
|
|
87
|
+
# Placeholder for future implementation
|
|
88
|
+
response = await self._send_user_message_to_llm(user_message, **kwargs)
|
|
89
|
+
yield ChunkResponse(content=response.content, is_complete=True, usage=response.usage)
|
|
90
|
+
|
|
83
91
|
async def cleanup(self):
|
|
84
|
-
super().cleanup()
|
|
92
|
+
await super().cleanup()
|