autobyteus 1.1.5__py3-none-any.whl → 1.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. autobyteus/agent/context/agent_config.py +6 -1
  2. autobyteus/agent/context/agent_runtime_state.py +7 -1
  3. autobyteus/agent/handlers/llm_user_message_ready_event_handler.py +30 -7
  4. autobyteus/agent/handlers/tool_result_event_handler.py +100 -88
  5. autobyteus/agent/handlers/user_input_message_event_handler.py +22 -25
  6. autobyteus/agent/llm_response_processor/provider_aware_tool_usage_processor.py +7 -1
  7. autobyteus/agent/message/__init__.py +7 -5
  8. autobyteus/agent/message/agent_input_user_message.py +6 -16
  9. autobyteus/agent/message/context_file.py +24 -24
  10. autobyteus/agent/message/context_file_type.py +29 -8
  11. autobyteus/agent/message/multimodal_message_builder.py +47 -0
  12. autobyteus/agent/streaming/stream_event_payloads.py +23 -4
  13. autobyteus/agent/system_prompt_processor/tool_manifest_injector_processor.py +6 -2
  14. autobyteus/agent/tool_invocation.py +27 -2
  15. autobyteus/agent_team/agent_team_builder.py +22 -1
  16. autobyteus/agent_team/bootstrap_steps/agent_configuration_preparation_step.py +9 -2
  17. autobyteus/agent_team/context/agent_team_config.py +1 -0
  18. autobyteus/agent_team/context/agent_team_runtime_state.py +0 -2
  19. autobyteus/llm/api/autobyteus_llm.py +33 -33
  20. autobyteus/llm/api/bedrock_llm.py +13 -5
  21. autobyteus/llm/api/claude_llm.py +13 -27
  22. autobyteus/llm/api/gemini_llm.py +108 -42
  23. autobyteus/llm/api/groq_llm.py +4 -3
  24. autobyteus/llm/api/mistral_llm.py +97 -51
  25. autobyteus/llm/api/nvidia_llm.py +6 -5
  26. autobyteus/llm/api/ollama_llm.py +37 -12
  27. autobyteus/llm/api/openai_compatible_llm.py +91 -91
  28. autobyteus/llm/autobyteus_provider.py +1 -1
  29. autobyteus/llm/base_llm.py +42 -139
  30. autobyteus/llm/extensions/base_extension.py +6 -6
  31. autobyteus/llm/extensions/token_usage_tracking_extension.py +3 -2
  32. autobyteus/llm/llm_factory.py +131 -61
  33. autobyteus/llm/ollama_provider_resolver.py +1 -0
  34. autobyteus/llm/providers.py +1 -0
  35. autobyteus/llm/token_counter/token_counter_factory.py +3 -1
  36. autobyteus/llm/user_message.py +43 -35
  37. autobyteus/llm/utils/llm_config.py +34 -18
  38. autobyteus/llm/utils/media_payload_formatter.py +99 -0
  39. autobyteus/llm/utils/messages.py +32 -25
  40. autobyteus/llm/utils/response_types.py +9 -3
  41. autobyteus/llm/utils/token_usage.py +6 -5
  42. autobyteus/multimedia/__init__.py +31 -0
  43. autobyteus/multimedia/audio/__init__.py +11 -0
  44. autobyteus/multimedia/audio/api/__init__.py +4 -0
  45. autobyteus/multimedia/audio/api/autobyteus_audio_client.py +59 -0
  46. autobyteus/multimedia/audio/api/gemini_audio_client.py +219 -0
  47. autobyteus/multimedia/audio/audio_client_factory.py +120 -0
  48. autobyteus/multimedia/audio/audio_model.py +97 -0
  49. autobyteus/multimedia/audio/autobyteus_audio_provider.py +108 -0
  50. autobyteus/multimedia/audio/base_audio_client.py +40 -0
  51. autobyteus/multimedia/image/__init__.py +11 -0
  52. autobyteus/multimedia/image/api/__init__.py +9 -0
  53. autobyteus/multimedia/image/api/autobyteus_image_client.py +97 -0
  54. autobyteus/multimedia/image/api/gemini_image_client.py +188 -0
  55. autobyteus/multimedia/image/api/openai_image_client.py +142 -0
  56. autobyteus/multimedia/image/autobyteus_image_provider.py +109 -0
  57. autobyteus/multimedia/image/base_image_client.py +67 -0
  58. autobyteus/multimedia/image/image_client_factory.py +118 -0
  59. autobyteus/multimedia/image/image_model.py +97 -0
  60. autobyteus/multimedia/providers.py +5 -0
  61. autobyteus/multimedia/runtimes.py +8 -0
  62. autobyteus/multimedia/utils/__init__.py +10 -0
  63. autobyteus/multimedia/utils/api_utils.py +19 -0
  64. autobyteus/multimedia/utils/multimedia_config.py +29 -0
  65. autobyteus/multimedia/utils/response_types.py +13 -0
  66. autobyteus/task_management/tools/publish_task_plan.py +4 -16
  67. autobyteus/task_management/tools/update_task_status.py +4 -19
  68. autobyteus/tools/__init__.py +5 -4
  69. autobyteus/tools/base_tool.py +98 -29
  70. autobyteus/tools/browser/standalone/__init__.py +0 -1
  71. autobyteus/tools/google_search.py +149 -0
  72. autobyteus/tools/mcp/schema_mapper.py +29 -71
  73. autobyteus/tools/multimedia/__init__.py +8 -0
  74. autobyteus/tools/multimedia/audio_tools.py +116 -0
  75. autobyteus/tools/multimedia/image_tools.py +186 -0
  76. autobyteus/tools/parameter_schema.py +82 -89
  77. autobyteus/tools/pydantic_schema_converter.py +81 -0
  78. autobyteus/tools/tool_category.py +1 -0
  79. autobyteus/tools/usage/formatters/default_json_example_formatter.py +89 -20
  80. autobyteus/tools/usage/formatters/default_xml_example_formatter.py +115 -41
  81. autobyteus/tools/usage/formatters/default_xml_schema_formatter.py +50 -20
  82. autobyteus/tools/usage/formatters/gemini_json_example_formatter.py +55 -22
  83. autobyteus/tools/usage/formatters/google_json_example_formatter.py +54 -21
  84. autobyteus/tools/usage/formatters/openai_json_example_formatter.py +53 -23
  85. autobyteus/tools/usage/parsers/default_xml_tool_usage_parser.py +270 -94
  86. autobyteus/tools/usage/parsers/provider_aware_tool_usage_parser.py +5 -2
  87. autobyteus/tools/usage/providers/tool_manifest_provider.py +43 -16
  88. autobyteus/tools/usage/registries/tool_formatting_registry.py +9 -2
  89. autobyteus/tools/usage/registries/tool_usage_parser_registry.py +9 -2
  90. autobyteus-1.1.7.dist-info/METADATA +204 -0
  91. {autobyteus-1.1.5.dist-info → autobyteus-1.1.7.dist-info}/RECORD +98 -71
  92. examples/run_browser_agent.py +1 -1
  93. examples/run_google_slides_agent.py +2 -2
  94. examples/run_mcp_google_slides_client.py +1 -1
  95. examples/run_sqlite_agent.py +1 -1
  96. autobyteus/llm/utils/image_payload_formatter.py +0 -89
  97. autobyteus/tools/ask_user_input.py +0 -40
  98. autobyteus/tools/browser/standalone/factory/google_search_factory.py +0 -25
  99. autobyteus/tools/browser/standalone/google_search_ui.py +0 -126
  100. autobyteus-1.1.5.dist-info/METADATA +0 -161
  101. {autobyteus-1.1.5.dist-info → autobyteus-1.1.7.dist-info}/WHEEL +0 -0
  102. {autobyteus-1.1.5.dist-info → autobyteus-1.1.7.dist-info}/licenses/LICENSE +0 -0
  103. {autobyteus-1.1.5.dist-info → autobyteus-1.1.7.dist-info}/top_level.txt +0 -0
@@ -25,7 +25,7 @@ def get_token_counter(model: LLMModel, llm: 'BaseLLM') -> BaseTokenCounter:
25
25
  if model.provider == LLMProvider.OPENAI:
26
26
  return OpenAITokenCounter(model, llm)
27
27
  elif model.provider == LLMProvider.ANTHROPIC:
28
- return ClaudeTokenCounter(model, llm)
28
+ return OpenAITokenCounter(model, llm)
29
29
  elif model.provider == LLMProvider.MISTRAL:
30
30
  return MistralTokenCounter(model, llm)
31
31
  elif model.provider == LLMProvider.DEEPSEEK:
@@ -34,6 +34,8 @@ def get_token_counter(model: LLMModel, llm: 'BaseLLM') -> BaseTokenCounter:
34
34
  return DeepSeekTokenCounter(model, llm)
35
35
  elif model.provider == LLMProvider.KIMI:
36
36
  return KimiTokenCounter(model, llm)
37
+ elif model.provider == LLMProvider.QWEN:
38
+ return OpenAITokenCounter(model, llm)
37
39
  elif model.provider == LLMProvider.OLLAMA:
38
40
  return OpenAITokenCounter(model, llm)
39
41
  elif model.provider == LLMProvider.LMSTUDIO:
@@ -7,53 +7,65 @@ logger = logging.getLogger(__name__)
7
7
  class LLMUserMessage:
8
8
  """
9
9
  Represents a user message formatted specifically for input to an LLM.
10
- It includes content and optionally image URLs.
11
- This structure is typically used when constructing prompts for multimodal LLMs
12
- or when sending a "user" role message in a conversation.
10
+ It includes content and optionally URLs for various media types.
11
+ This structure is typically used when constructing prompts for multimodal LLMs.
13
12
  """
14
13
  def __init__(self,
15
14
  content: str,
16
- image_urls: Optional[List[str]] = None):
15
+ image_urls: Optional[List[str]] = None,
16
+ audio_urls: Optional[List[str]] = None,
17
+ video_urls: Optional[List[str]] = None):
17
18
  """
18
19
  Initializes an LLMUserMessage.
19
20
 
20
21
  Args:
21
22
  content: The textual content of the user's message.
22
- image_urls: An optional list of URLs or local paths to images
23
- to be included with the message for the LLM.
23
+ image_urls: An optional list of URLs or local paths to images.
24
+ audio_urls: An optional list of URLs or local paths to audio files.
25
+ video_urls: An optional list of URLs or local paths to video files.
24
26
  """
25
- if not isinstance(content, str):
26
- # Allow empty string for content, as images might be the only input.
27
- # But content must still be a string type.
28
- pass # Validation can be more strict if empty content is disallowed with no images
29
-
30
- if image_urls is None:
31
- image_urls = [] # Default to empty list for easier processing
27
+ self.content: str = content
28
+ self.image_urls: List[str] = image_urls or []
29
+ self.audio_urls: List[str] = audio_urls or []
30
+ self.video_urls: List[str] = video_urls or []
32
31
 
33
- if not (isinstance(image_urls, list) and all(isinstance(url, str) for url in image_urls)):
32
+ # --- Validation ---
33
+ if not isinstance(self.content, str):
34
+ raise TypeError("LLMUserMessage 'content' must be a string.")
35
+ if not (isinstance(self.image_urls, list) and all(isinstance(url, str) for url in self.image_urls)):
34
36
  raise TypeError("LLMUserMessage 'image_urls' must be a list of strings.")
35
-
36
- if not content and not image_urls:
37
- raise ValueError("LLMUserMessage must have either content or image_urls or both.")
37
+ if not (isinstance(self.audio_urls, list) and all(isinstance(url, str) for url in self.audio_urls)):
38
+ raise TypeError("LLMUserMessage 'audio_urls' must be a list of strings.")
39
+ if not (isinstance(self.video_urls, list) and all(isinstance(url, str) for url in self.video_urls)):
40
+ raise TypeError("LLMUserMessage 'video_urls' must be a list of strings.")
38
41
 
39
- self.content: str = content
40
- self.image_urls: List[str] = image_urls
42
+ if not self.content and not self.image_urls and not self.audio_urls and not self.video_urls:
43
+ raise ValueError("LLMUserMessage must have either content or at least one media URL.")
41
44
 
42
- logger.debug(f"LLMUserMessage created. Content: '{content[:50]}...', Image URLs: {image_urls}")
45
+ logger.debug(f"LLMUserMessage created. Content: '{self.content[:50]}...', "
46
+ f"Images: {len(self.image_urls)}, Audio: {len(self.audio_urls)}, Video: {len(self.video_urls)}")
43
47
 
44
48
  def __repr__(self) -> str:
45
- image_urls_repr = f", image_urls={self.image_urls}" if self.image_urls else ""
46
- return f"LLMUserMessage(content='{self.content[:100]}...'{image_urls_repr})"
49
+ parts = [f"content='{self.content[:100]}...'"]
50
+ if self.image_urls:
51
+ parts.append(f"image_urls={self.image_urls}")
52
+ if self.audio_urls:
53
+ parts.append(f"audio_urls={self.audio_urls}")
54
+ if self.video_urls:
55
+ parts.append(f"video_urls={self.video_urls}")
56
+ return f"LLMUserMessage({', '.join(parts)})"
47
57
 
48
58
  def to_dict(self) -> Dict[str, Any]:
49
59
  """
50
- Serializes the LLMUserMessage to a dictionary. This method might be less used
51
- now that BaseLLM._add_user_message handles the conversion to the Message format.
52
- Kept for potential direct use or testing.
60
+ Serializes the LLMUserMessage to a dictionary.
53
61
  """
54
62
  data = {"content": self.content}
55
63
  if self.image_urls:
56
64
  data["image_urls"] = self.image_urls
65
+ if self.audio_urls:
66
+ data["audio_urls"] = self.audio_urls
67
+ if self.video_urls:
68
+ data["video_urls"] = self.video_urls
57
69
  return data
58
70
 
59
71
  @classmethod
@@ -61,13 +73,9 @@ class LLMUserMessage:
61
73
  """
62
74
  Deserializes an LLMUserMessage from a dictionary.
63
75
  """
64
- content = data.get("content", "") # Default to empty string if not present
65
- image_urls = data.get("image_urls") # Expects a list or None
66
-
67
- # Basic validation, more can be added if needed
68
- if not isinstance(content, str):
69
- raise ValueError("LLMUserMessage 'content' in dictionary must be a string.")
70
- if image_urls is not None and not (isinstance(image_urls, list) and all(isinstance(url, str) for url in image_urls)):
71
- raise ValueError("LLMUserMessage 'image_urls' in dictionary must be a list of strings if provided.")
72
-
73
- return cls(content=content, image_urls=image_urls)
76
+ return cls(
77
+ content=data.get("content", ""),
78
+ image_urls=data.get("image_urls"),
79
+ audio_urls=data.get("audio_urls"),
80
+ video_urls=data.get("video_urls")
81
+ )
@@ -55,6 +55,7 @@ class LLMConfig:
55
55
  frequency_penalty: Optional[float] = None
56
56
  presence_penalty: Optional[float] = None
57
57
  stop_sequences: Optional[List] = None
58
+ uses_max_completion_tokens: bool = False
58
59
  extra_params: Dict[str, Any] = field(default_factory=dict)
59
60
  pricing_config: TokenPricingConfig = field(default_factory=TokenPricingConfig)
60
61
 
@@ -102,17 +103,28 @@ class LLMConfig:
102
103
  data_copy = data.copy()
103
104
  pricing_config_data = data_copy.pop('pricing_config', {})
104
105
 
106
+ # Create a new dictionary for known fields to avoid passing them in twice
107
+ known_fields = {
108
+ 'rate_limit', 'token_limit', 'system_message', 'temperature',
109
+ 'max_tokens', 'top_p', 'frequency_penalty', 'presence_penalty',
110
+ 'stop_sequences', 'uses_max_completion_tokens', 'extra_params',
111
+ 'pricing_config'
112
+ }
113
+
114
+ init_kwargs = {k: v for k, v in data_copy.items() if k in known_fields}
115
+
105
116
  config = cls(
106
- rate_limit=data_copy.get('rate_limit'),
107
- token_limit=data_copy.get('token_limit'),
108
- system_message=data_copy.get('system_message', "You are a helpful assistant."),
109
- temperature=data_copy.get('temperature', 0.7),
110
- max_tokens=data_copy.get('max_tokens'),
111
- top_p=data_copy.get('top_p'),
112
- frequency_penalty=data_copy.get('frequency_penalty'),
113
- presence_penalty=data_copy.get('presence_penalty'),
114
- stop_sequences=data_copy.get('stop_sequences'),
115
- extra_params=data_copy.get('extra_params', {}),
117
+ rate_limit=init_kwargs.get('rate_limit'),
118
+ token_limit=init_kwargs.get('token_limit'),
119
+ system_message=init_kwargs.get('system_message', "You are a helpful assistant."),
120
+ temperature=init_kwargs.get('temperature', 0.7),
121
+ max_tokens=init_kwargs.get('max_tokens'),
122
+ top_p=init_kwargs.get('top_p'),
123
+ frequency_penalty=init_kwargs.get('frequency_penalty'),
124
+ presence_penalty=init_kwargs.get('presence_penalty'),
125
+ stop_sequences=init_kwargs.get('stop_sequences'),
126
+ uses_max_completion_tokens=init_kwargs.get('uses_max_completion_tokens', False),
127
+ extra_params=init_kwargs.get('extra_params', {}),
116
128
  pricing_config=pricing_config_data
117
129
  )
118
130
  return config
@@ -162,26 +174,30 @@ class LLMConfig:
162
174
  for f_info in fields(override_config):
163
175
  override_value = getattr(override_config, f_info.name)
164
176
 
177
+ # Special handling for booleans where we want to merge if it's not the default
178
+ # For `uses_max_completion_tokens`, the default is False, so `if override_value:` is fine
179
+ is_boolean_field = f_info.type == bool
180
+
181
+ # Standard check for None, but also merge if it's a non-default boolean
165
182
  if override_value is not None:
166
- if f_info.name == 'pricing_config':
167
- # Ensure self.pricing_config is an object (should be by __post_init__)
183
+ # For uses_max_completion_tokens, `False` is a valid override value, but `None` is not
184
+ if is_boolean_field and override_value is False and getattr(self, f_info.name) is True:
185
+ setattr(self, f_info.name, override_value)
186
+ elif f_info.name == 'pricing_config':
168
187
  if not isinstance(self.pricing_config, TokenPricingConfig):
169
- self.pricing_config = TokenPricingConfig() # Should not be needed
188
+ self.pricing_config = TokenPricingConfig()
170
189
 
171
- # override_value here is override_config.pricing_config, which is TokenPricingConfig
172
190
  if isinstance(override_value, TokenPricingConfig):
173
191
  self.pricing_config.merge_with(override_value)
174
- elif isinstance(override_value, dict): # Should not happen if override_config is LLMConfig
192
+ elif isinstance(override_value, dict):
175
193
  self.pricing_config.merge_with(TokenPricingConfig.from_dict(override_value))
176
194
  else:
177
195
  logger.warning(f"Skipping merge for pricing_config due to unexpected override type: {type(override_value)}")
178
196
  elif f_info.name == 'extra_params':
179
- # For extra_params (dict), merge dictionaries
180
197
  if isinstance(override_value, dict) and isinstance(self.extra_params, dict):
181
198
  self.extra_params.update(override_value)
182
199
  else:
183
- setattr(self, f_info.name, override_value) # Fallback to direct set if types mismatch
200
+ setattr(self, f_info.name, override_value)
184
201
  else:
185
202
  setattr(self, f_info.name, override_value)
186
203
  logger.debug(f"LLMConfig merged. Current state after merge: rate_limit={self.rate_limit}, temp={self.temperature}, system_message='{self.system_message}'")
187
-
@@ -0,0 +1,99 @@
1
+ import base64
2
+ import mimetypes
3
+ from typing import Dict, Union
4
+ from pathlib import Path
5
+ import httpx
6
+ import logging
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ # FIX: Instantiate the client with verify=False to allow for self-signed certificates
11
+ # in local development environments, which is a common use case.
12
+ _http_client = httpx.AsyncClient(verify=False)
13
+
14
+ # Add a prominent security warning to inform developers about the disabled SSL verification.
15
+ logger.warning(
16
+ "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"
17
+ "SECURITY WARNING: SSL certificate verification is DISABLED for the image "
18
+ "downloader (httpx client in media_payload_formatter.py).\n"
19
+ "This is intended for development and testing with local servers using "
20
+ "self-signed certificates. In a production environment, this could expose "
21
+ "the system to Man-in-the-Middle (MitM) attacks when downloading images from "
22
+ "the public internet.\n"
23
+ "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
24
+ )
25
+
26
+
27
+ def get_mime_type(file_path: str) -> str:
28
+ """Determine MIME type of file."""
29
+ mime_type, _ = mimetypes.guess_type(file_path)
30
+ if not mime_type or not mime_type.startswith('image/'):
31
+ return 'image/jpeg' # default fallback
32
+ return mime_type
33
+
34
+
35
+ def is_base64(s: str) -> bool:
36
+ """Check if a string is a valid base64 encoded string."""
37
+ try:
38
+ # Check if the string has valid base64 characters and padding
39
+ if not isinstance(s, str) or len(s) % 4 != 0:
40
+ return False
41
+ base64.b64decode(s, validate=True)
42
+ return True
43
+ except (ValueError, TypeError):
44
+ return False
45
+
46
+
47
+ def is_valid_image_path(path: str) -> bool:
48
+ """Check if path exists and has a valid image extension."""
49
+ valid_extensions = {".jpg", ".jpeg", ".png", ".gif", ".webp"}
50
+ try:
51
+ file_path = Path(path)
52
+ return file_path.is_file() and file_path.suffix.lower() in valid_extensions
53
+ except (TypeError, ValueError):
54
+ return False
55
+
56
+
57
+ def create_data_uri(mime_type: str, base64_data: str) -> Dict:
58
+ """Create properly structured data URI object for API."""
59
+ return {
60
+ "type": "image_url",
61
+ "image_url": {
62
+ "url": f"data:{mime_type};base64,{base64_data}"
63
+ }
64
+ }
65
+
66
+ def file_to_base64(path: str) -> str:
67
+ """Reads an image file from a local path and returns it as a base64 encoded string."""
68
+ try:
69
+ with open(path, "rb") as img_file:
70
+ return base64.b64encode(img_file.read()).decode("utf-8")
71
+ except Exception as e:
72
+ logger.error(f"Failed to read and encode image file at {path}: {e}")
73
+ raise
74
+
75
+ async def url_to_base64(url: str) -> str:
76
+ """Downloads an image from a URL and returns it as a base64 encoded string."""
77
+ try:
78
+ response = await _http_client.get(url)
79
+ response.raise_for_status()
80
+ return base64.b64encode(response.content).decode("utf-8")
81
+ except httpx.HTTPError as e:
82
+ logger.error(f"Failed to download image from URL {url}: {e}")
83
+ raise
84
+
85
+ async def image_source_to_base64(image_source: str) -> str:
86
+ """
87
+ Orchestrator function that converts an image source (file path, URL, or existing base64)
88
+ into a base64 encoded string by delegating to specialized functions.
89
+ """
90
+ if is_valid_image_path(image_source):
91
+ return file_to_base64(image_source)
92
+
93
+ if image_source.startswith(("http://", "https://")):
94
+ return await url_to_base64(image_source)
95
+
96
+ if is_base64(image_source):
97
+ return image_source
98
+
99
+ raise ValueError(f"Invalid image source: not a valid file path, URL, or base64 string.")
@@ -7,34 +7,41 @@ class MessageRole(Enum):
7
7
  ASSISTANT = "assistant"
8
8
 
9
9
  class Message:
10
- def __init__(self, role: MessageRole, content: Union[str, List[Dict]], reasoning_content: Optional[str] = None):
10
+ def __init__(self,
11
+ role: MessageRole,
12
+ content: Optional[str] = None,
13
+ reasoning_content: Optional[str] = None,
14
+ image_urls: Optional[List[str]] = None,
15
+ audio_urls: Optional[List[str]] = None,
16
+ video_urls: Optional[List[str]] = None):
11
17
  """
12
- Initializes a Message.
13
-
18
+ Initializes a rich Message object for conversation history.
19
+
14
20
  Args:
15
- role (MessageRole): The role of the message.
16
- content (Union[str, List[Dict]]): The content of the message.
17
- reasoning_content (Optional[str]): Optional reasoning content for reasoning models.
21
+ role: The role of the message originator.
22
+ content: The textual content of the message.
23
+ reasoning_content: Optional reasoning/thought process from an assistant.
24
+ image_urls: Optional list of image URIs.
25
+ audio_urls: Optional list of audio URIs.
26
+ video_urls: Optional list of video URIs.
18
27
  """
19
28
  self.role = role
20
29
  self.content = content
21
- self.reasoning_content = reasoning_content # Optional field for reasoning content
22
-
23
- def to_dict(self) -> Dict[str, Union[str, List[Dict]]]:
24
- result: Dict[str, Union[str, List[Dict]]] = {"role": self.role.value, "content": self.content}
25
- if self.reasoning_content:
26
- result["reasoning_content"] = self.reasoning_content
27
- return result
30
+ self.reasoning_content = reasoning_content
31
+ self.image_urls = image_urls or []
32
+ self.audio_urls = audio_urls or []
33
+ self.video_urls = video_urls or []
28
34
 
29
- def to_mistral_message(self):
30
- if self.role == MessageRole.USER:
31
- from mistralai import UserMessage
32
- return UserMessage(content=self.content)
33
- elif self.role == MessageRole.ASSISTANT:
34
- from mistralai import AssistantMessage
35
- return AssistantMessage(content=self.content)
36
- elif self.role == MessageRole.SYSTEM:
37
- from mistralai import SystemMessage
38
- return SystemMessage(content=self.content)
39
- else:
40
- raise ValueError(f"Unsupported message role: {self.role}")
35
+ def to_dict(self) -> Dict[str, Union[str, List[str], None]]:
36
+ """
37
+ Returns a simple dictionary representation of the Message object.
38
+ This is for internal use and does not format for any specific API.
39
+ """
40
+ return {
41
+ "role": self.role.value,
42
+ "content": self.content,
43
+ "reasoning_content": self.reasoning_content,
44
+ "image_urls": self.image_urls,
45
+ "audio_urls": self.audio_urls,
46
+ "video_urls": self.video_urls,
47
+ }
@@ -1,5 +1,5 @@
1
- from dataclasses import dataclass
2
- from typing import Optional
1
+ from dataclasses import dataclass, field
2
+ from typing import Optional, List
3
3
  from autobyteus.llm.utils.token_usage import TokenUsage
4
4
 
5
5
  @dataclass
@@ -7,6 +7,9 @@ class CompleteResponse:
7
7
  content: str
8
8
  reasoning: Optional[str] = None
9
9
  usage: Optional[TokenUsage] = None
10
+ image_urls: List[str] = field(default_factory=list)
11
+ audio_urls: List[str] = field(default_factory=list)
12
+ video_urls: List[str] = field(default_factory=list)
10
13
 
11
14
  @classmethod
12
15
  def from_content(cls, content: str) -> 'CompleteResponse':
@@ -17,4 +20,7 @@ class ChunkResponse:
17
20
  content: str # The actual content/text of the chunk
18
21
  reasoning: Optional[str] = None
19
22
  is_complete: bool = False # Indicates if this is the final chunk
20
- usage: Optional[TokenUsage] = None # Token usage stats, typically available in final chunk
23
+ usage: Optional[TokenUsage] = None # Token usage stats, typically available in final chunk
24
+ image_urls: List[str] = field(default_factory=list)
25
+ audio_urls: List[str] = field(default_factory=list)
26
+ video_urls: List[str] = field(default_factory=list)
@@ -1,8 +1,8 @@
1
1
  # file: autobyteus/autobyteus/llm/utils/token_usage.py
2
2
  from typing import Optional
3
- from pydantic import BaseModel # MODIFIED: Import BaseModel
3
+ from pydantic import BaseModel, ConfigDict # MODIFIED: Import ConfigDict
4
4
 
5
- # MODIFIED: Change from dataclass to Pydantic BaseModel
5
+ # MODIFIED: Change from dataclass to Pydantic BaseModel and use model_config
6
6
  class TokenUsage(BaseModel):
7
7
  prompt_tokens: int
8
8
  completion_tokens: int
@@ -11,6 +11,7 @@ class TokenUsage(BaseModel):
11
11
  completion_cost: Optional[float] = None
12
12
  total_cost: Optional[float] = None
13
13
 
14
- class Config:
15
- populate_by_name = True # If you use aliases, or for general Pydantic v2 compatibility
16
- # or model_config = ConfigDict(populate_by_name=True) for Pydantic v2
14
+ # FIX: Use model_config with ConfigDict for Pydantic v2 compatibility
15
+ model_config = ConfigDict(
16
+ populate_by_name=True,
17
+ )
@@ -0,0 +1,31 @@
1
+ from .providers import MultimediaProvider
2
+ from .runtimes import MultimediaRuntime
3
+ from .utils import *
4
+ from .image import *
5
+ from .audio import *
6
+
7
+
8
+ __all__ = [
9
+ # Factories
10
+ "image_client_factory",
11
+ "ImageClientFactory",
12
+ "audio_client_factory",
13
+ "AudioClientFactory",
14
+
15
+ # Models
16
+ "ImageModel",
17
+ "AudioModel",
18
+
19
+ # Base Clients
20
+ "BaseImageClient",
21
+ "BaseAudioClient",
22
+
23
+ # Enums
24
+ "MultimediaProvider",
25
+ "MultimediaRuntime",
26
+
27
+ # Response Types and Config
28
+ "ImageGenerationResponse",
29
+ "SpeechGenerationResponse",
30
+ "MultimediaConfig",
31
+ ]
@@ -0,0 +1,11 @@
1
+ from .audio_client_factory import audio_client_factory, AudioClientFactory
2
+ from .audio_model import AudioModel
3
+ from .base_audio_client import BaseAudioClient
4
+ from .api import *
5
+
6
+ __all__ = [
7
+ "audio_client_factory",
8
+ "AudioClientFactory",
9
+ "AudioModel",
10
+ "BaseAudioClient",
11
+ ]
@@ -0,0 +1,4 @@
1
+ from .gemini_audio_client import GeminiAudioClient
2
+ from .autobyteus_audio_client import AutobyteusAudioClient
3
+
4
+ __all__ = ["GeminiAudioClient", "AutobyteusAudioClient"]
@@ -0,0 +1,59 @@
1
+ import logging
2
+ from typing import Optional, List, Dict, Any, TYPE_CHECKING
3
+ from autobyteus_llm_client import AutobyteusClient
4
+ from autobyteus.multimedia.audio.base_audio_client import BaseAudioClient
5
+ from autobyteus.multimedia.utils.response_types import SpeechGenerationResponse
6
+
7
+ if TYPE_CHECKING:
8
+ from autobyteus.multimedia.audio.audio_model import AudioModel
9
+ from autobyteus.multimedia.utils.multimedia_config import MultimediaConfig
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ class AutobyteusAudioClient(BaseAudioClient):
14
+ """
15
+ An audio client that connects to an Autobyteus server instance for audio tasks.
16
+ """
17
+
18
+ def __init__(self, model: "AudioModel", config: "MultimediaConfig"):
19
+ super().__init__(model, config)
20
+ if not model.host_url:
21
+ raise ValueError("AutobyteusAudioClient requires a host_url in its AudioModel.")
22
+
23
+ self.autobyteus_client = AutobyteusClient(server_url=model.host_url)
24
+ logger.info(f"AutobyteusAudioClient initialized for model '{model.name}' on host '{model.host_url}'.")
25
+
26
+ async def generate_speech(
27
+ self,
28
+ prompt: str,
29
+ generation_config: Optional[Dict[str, Any]] = None
30
+ ) -> SpeechGenerationResponse:
31
+ """
32
+ Generates speech by calling the generate_speech endpoint on the remote Autobyteus server.
33
+ """
34
+ try:
35
+ logger.info(f"Sending speech generation request for model '{self.model.name}' to {self.model.host_url}")
36
+
37
+ model_name_for_server = self.model.name
38
+
39
+ response_data = await self.autobyteus_client.generate_speech(
40
+ model_name=model_name_for_server,
41
+ prompt=prompt,
42
+ generation_config=generation_config
43
+ )
44
+
45
+ audio_urls = response_data.get("audio_urls", [])
46
+ if not audio_urls:
47
+ raise ValueError("Remote Autobyteus server did not return any audio URLs.")
48
+
49
+ return SpeechGenerationResponse(audio_urls=audio_urls)
50
+
51
+ except Exception as e:
52
+ logger.error(f"Error calling Autobyteus server for speech generation: {e}", exc_info=True)
53
+ raise
54
+
55
+ async def cleanup(self):
56
+ """Closes the underlying AutobyteusClient."""
57
+ if self.autobyteus_client:
58
+ await self.autobyteus_client.close()
59
+ logger.debug("AutobyteusAudioClient cleaned up.")