autobyteus 1.1.5__py3-none-any.whl → 1.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. autobyteus/agent/context/agent_config.py +6 -1
  2. autobyteus/agent/handlers/llm_user_message_ready_event_handler.py +30 -7
  3. autobyteus/agent/handlers/user_input_message_event_handler.py +22 -25
  4. autobyteus/agent/message/__init__.py +7 -5
  5. autobyteus/agent/message/agent_input_user_message.py +6 -16
  6. autobyteus/agent/message/context_file.py +24 -24
  7. autobyteus/agent/message/context_file_type.py +29 -8
  8. autobyteus/agent/message/multimodal_message_builder.py +47 -0
  9. autobyteus/agent/streaming/stream_event_payloads.py +23 -4
  10. autobyteus/agent/system_prompt_processor/tool_manifest_injector_processor.py +6 -2
  11. autobyteus/agent/tool_invocation.py +2 -1
  12. autobyteus/agent_team/bootstrap_steps/agent_configuration_preparation_step.py +9 -2
  13. autobyteus/agent_team/context/agent_team_config.py +1 -0
  14. autobyteus/llm/api/autobyteus_llm.py +33 -33
  15. autobyteus/llm/api/bedrock_llm.py +13 -5
  16. autobyteus/llm/api/claude_llm.py +13 -27
  17. autobyteus/llm/api/gemini_llm.py +108 -42
  18. autobyteus/llm/api/groq_llm.py +4 -3
  19. autobyteus/llm/api/mistral_llm.py +97 -51
  20. autobyteus/llm/api/nvidia_llm.py +6 -5
  21. autobyteus/llm/api/ollama_llm.py +37 -12
  22. autobyteus/llm/api/openai_compatible_llm.py +91 -91
  23. autobyteus/llm/autobyteus_provider.py +1 -1
  24. autobyteus/llm/base_llm.py +42 -139
  25. autobyteus/llm/extensions/base_extension.py +6 -6
  26. autobyteus/llm/extensions/token_usage_tracking_extension.py +3 -2
  27. autobyteus/llm/llm_factory.py +106 -4
  28. autobyteus/llm/token_counter/token_counter_factory.py +1 -1
  29. autobyteus/llm/user_message.py +43 -35
  30. autobyteus/llm/utils/llm_config.py +34 -18
  31. autobyteus/llm/utils/media_payload_formatter.py +99 -0
  32. autobyteus/llm/utils/messages.py +32 -25
  33. autobyteus/llm/utils/response_types.py +9 -3
  34. autobyteus/llm/utils/token_usage.py +6 -5
  35. autobyteus/multimedia/__init__.py +31 -0
  36. autobyteus/multimedia/audio/__init__.py +11 -0
  37. autobyteus/multimedia/audio/api/__init__.py +4 -0
  38. autobyteus/multimedia/audio/api/autobyteus_audio_client.py +59 -0
  39. autobyteus/multimedia/audio/api/gemini_audio_client.py +219 -0
  40. autobyteus/multimedia/audio/audio_client_factory.py +120 -0
  41. autobyteus/multimedia/audio/audio_model.py +96 -0
  42. autobyteus/multimedia/audio/autobyteus_audio_provider.py +108 -0
  43. autobyteus/multimedia/audio/base_audio_client.py +40 -0
  44. autobyteus/multimedia/image/__init__.py +11 -0
  45. autobyteus/multimedia/image/api/__init__.py +9 -0
  46. autobyteus/multimedia/image/api/autobyteus_image_client.py +97 -0
  47. autobyteus/multimedia/image/api/gemini_image_client.py +188 -0
  48. autobyteus/multimedia/image/api/openai_image_client.py +142 -0
  49. autobyteus/multimedia/image/autobyteus_image_provider.py +109 -0
  50. autobyteus/multimedia/image/base_image_client.py +67 -0
  51. autobyteus/multimedia/image/image_client_factory.py +118 -0
  52. autobyteus/multimedia/image/image_model.py +96 -0
  53. autobyteus/multimedia/providers.py +5 -0
  54. autobyteus/multimedia/runtimes.py +8 -0
  55. autobyteus/multimedia/utils/__init__.py +10 -0
  56. autobyteus/multimedia/utils/api_utils.py +19 -0
  57. autobyteus/multimedia/utils/multimedia_config.py +29 -0
  58. autobyteus/multimedia/utils/response_types.py +13 -0
  59. autobyteus/tools/__init__.py +3 -0
  60. autobyteus/tools/multimedia/__init__.py +8 -0
  61. autobyteus/tools/multimedia/audio_tools.py +116 -0
  62. autobyteus/tools/multimedia/image_tools.py +186 -0
  63. autobyteus/tools/tool_category.py +1 -0
  64. autobyteus/tools/usage/parsers/provider_aware_tool_usage_parser.py +5 -2
  65. autobyteus/tools/usage/providers/tool_manifest_provider.py +5 -3
  66. autobyteus/tools/usage/registries/tool_formatting_registry.py +9 -2
  67. autobyteus/tools/usage/registries/tool_usage_parser_registry.py +9 -2
  68. {autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/METADATA +9 -9
  69. {autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/RECORD +73 -45
  70. examples/run_browser_agent.py +1 -1
  71. autobyteus/llm/utils/image_payload_formatter.py +0 -89
  72. {autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/WHEEL +0 -0
  73. {autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/licenses/LICENSE +0 -0
  74. {autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/top_level.txt +0 -0
@@ -15,15 +15,6 @@ class BaseLLM(ABC):
15
15
  DEFAULT_SYSTEM_MESSAGE = "You are a helpful assistant"
16
16
 
17
17
  def __init__(self, model: LLMModel, llm_config: LLMConfig):
18
- """
19
- Base class for all LLMs. Provides core messaging functionality
20
- and extension support.
21
-
22
- Args:
23
- model (LLMModel): An LLMModel enum value.
24
- llm_config (LLMConfig): Configuration for the LLM including system message,
25
- rate limits, token limits, etc.
26
- """
27
18
  if not isinstance(model, LLMModel):
28
19
  raise TypeError(f"Expected LLMModel, got {type(model)}")
29
20
  if not isinstance(llm_config, LLMConfig):
@@ -33,100 +24,65 @@ class BaseLLM(ABC):
33
24
  self.config = llm_config
34
25
  self._extension_registry = ExtensionRegistry()
35
26
 
36
- # Register TokenUsageTrackingExtension by default
37
27
  self._token_usage_extension: TokenUsageTrackingExtension = self.register_extension(TokenUsageTrackingExtension)
38
28
 
39
29
  self.messages: List[Message] = []
40
- # Use system_message from config, with fallback to default if not provided
41
30
  self.system_message = self.config.system_message or self.DEFAULT_SYSTEM_MESSAGE
42
31
  self.add_system_message(self.system_message)
43
32
 
44
33
  @property
45
34
  def latest_token_usage(self):
46
- """
47
- Get the token usage from the last interaction with the LLM.
48
-
49
- Returns:
50
- The token usage information from the last interaction
51
- """
52
35
  return self._token_usage_extension.latest_token_usage
53
36
 
54
37
  def register_extension(self, extension_class: Type[LLMExtension]) -> LLMExtension:
55
- """
56
- Register a new extension.
57
-
58
- Args:
59
- extension_class: The extension class to instantiate and register
60
-
61
- Returns:
62
- LLMExtension: The instantiated extension
63
- """
64
38
  extension = extension_class(self)
65
39
  self._extension_registry.register(extension)
66
40
  return extension
67
41
 
68
42
  def unregister_extension(self, extension: LLMExtension) -> None:
69
- """
70
- Unregister an existing extension.
71
-
72
- Args:
73
- extension (LLMExtension): The extension to unregister
74
- """
75
43
  self._extension_registry.unregister(extension)
76
44
 
77
45
  def get_extension(self, extension_class: Type[LLMExtension]) -> Optional[LLMExtension]:
78
- """
79
- Get a registered extension by its class.
80
-
81
- Args:
82
- extension_class: The class of the extension to retrieve
83
-
84
- Returns:
85
- Optional[LLMExtension]: The extension instance if found, None otherwise
86
- """
87
46
  return self._extension_registry.get(extension_class)
88
47
 
89
48
  def add_system_message(self, message: str):
90
- """
91
- Add a system message to the conversation history.
49
+ self.messages.append(Message(MessageRole.SYSTEM, content=message))
92
50
 
93
- Args:
94
- message (str): The system message content.
51
+ def add_user_message(self, user_message: LLMUserMessage):
95
52
  """
96
- self.messages.append(Message(MessageRole.SYSTEM, message))
97
-
98
- def add_user_message(self, user_message: Union[str, List[Dict]]):
53
+ Adds a user message to history, converting from LLMUserMessage to Message.
99
54
  """
100
- Add a user message to the conversation history.
101
-
102
- Args:
103
- user_message (Union[str, List[Dict]]): The user message content. Can be a simple string
104
- or a list of dictionaries for multimodal content.
105
- """
106
- msg = Message(MessageRole.USER, user_message)
55
+ msg = Message(
56
+ role=MessageRole.USER,
57
+ content=user_message.content,
58
+ image_urls=user_message.image_urls,
59
+ audio_urls=user_message.audio_urls,
60
+ video_urls=user_message.video_urls
61
+ )
107
62
  self.messages.append(msg)
108
63
  self._trigger_on_user_message_added(msg)
109
64
 
110
- def add_assistant_message(self, message: str, reasoning_content: Optional[str] = None):
111
- """
112
- Add an assistant message to the conversation history.
113
-
114
- Args:
115
- message (str): The assistant message content.
116
- reasoning_content (Optional[str]): Optional reasoning content to attach.
117
- """
118
- msg = Message(MessageRole.ASSISTANT, message, reasoning_content=reasoning_content)
65
+ def add_assistant_message(self,
66
+ content: Optional[str],
67
+ reasoning_content: Optional[str] = None,
68
+ image_urls: Optional[List[str]] = None,
69
+ audio_urls: Optional[List[str]] = None,
70
+ video_urls: Optional[List[str]] = None):
71
+ """
72
+ Adds a multimodal assistant message to the conversation history.
73
+ """
74
+ msg = Message(
75
+ role=MessageRole.ASSISTANT,
76
+ content=content,
77
+ reasoning_content=reasoning_content,
78
+ image_urls=image_urls,
79
+ audio_urls=audio_urls,
80
+ video_urls=video_urls
81
+ )
119
82
  self.messages.append(msg)
120
83
  self._trigger_on_assistant_message_added(msg)
121
84
 
122
85
  def configure_system_prompt(self, new_system_prompt: str):
123
- """
124
- Updates the system prompt for the LLM instance after initialization.
125
- This will replace the existing system message in the conversation history.
126
-
127
- Args:
128
- new_system_prompt (str): The new system prompt content.
129
- """
130
86
  if not new_system_prompt or not isinstance(new_system_prompt, str):
131
87
  logging.warning("Attempted to configure an empty or invalid system prompt. No changes made.")
132
88
  return
@@ -134,7 +90,6 @@ class BaseLLM(ABC):
134
90
  self.system_message = new_system_prompt
135
91
  self.config.system_message = new_system_prompt
136
92
 
137
- # Find and update the existing system message, or add a new one if not found.
138
93
  system_message_found = False
139
94
  for i, msg in enumerate(self.messages):
140
95
  if msg.role == MessageRole.SYSTEM:
@@ -144,113 +99,65 @@ class BaseLLM(ABC):
144
99
  break
145
100
 
146
101
  if not system_message_found:
147
- # If for some reason no system message was there, insert it at the beginning.
148
102
  self.messages.insert(0, Message(MessageRole.SYSTEM, new_system_prompt))
149
103
  logging.debug("No existing system message found, inserted new one at the beginning.")
150
104
 
151
105
  logging.info(f"LLM instance system prompt updated. New prompt length: {len(new_system_prompt)}")
152
106
 
153
107
  def _trigger_on_user_message_added(self, message: Message):
154
- """
155
- Internal helper to invoke the on_user_message_added hook on every extension.
156
-
157
- Args:
158
- message (Message): The user message that was added
159
- """
160
108
  for extension in self._extension_registry.get_all():
161
109
  extension.on_user_message_added(message)
162
110
 
163
111
  def _trigger_on_assistant_message_added(self, message: Message):
164
- """
165
- Internal helper to invoke the on_assistant_message_added hook on every extension.
166
-
167
- Args:
168
- message (Message): The assistant message that was added
169
- """
170
112
  for extension in self._extension_registry.get_all():
171
113
  extension.on_assistant_message_added(message)
172
114
 
173
115
  async def _execute_before_hooks(self, user_message: LLMUserMessage, **kwargs) -> None:
174
- """
175
- Execute all registered before_invoke hooks.
176
- """
177
116
  for extension in self._extension_registry.get_all():
178
- await extension.before_invoke(user_message.content, user_message.image_urls, **kwargs)
117
+ await extension.before_invoke(user_message, **kwargs)
179
118
 
180
119
  async def _execute_after_hooks(self, user_message: LLMUserMessage, response: CompleteResponse = None, **kwargs) -> None:
181
- """
182
- Execute all registered after_invoke hooks.
183
-
184
- Args:
185
- user_message (LLMUserMessage): The user message object
186
- response (CompleteResponse): The complete response from the LLM
187
- **kwargs: Additional arguments for LLM-specific usage
188
- """
189
120
  for extension in self._extension_registry.get_all():
190
- await extension.after_invoke(user_message.content, user_message.image_urls, response, **kwargs)
121
+ await extension.after_invoke(user_message, response, **kwargs)
191
122
 
192
123
  async def send_user_message(self, user_message: LLMUserMessage, **kwargs) -> CompleteResponse:
193
- """
194
- Sends a user message to the LLM and returns the complete LLM response.
195
-
196
- Args:
197
- user_message (LLMUserMessage): The user message object.
198
- **kwargs: Additional arguments for LLM-specific usage.
199
-
200
- Returns:
201
- CompleteResponse: The complete response from the LLM including content and usage.
202
- """
203
124
  await self._execute_before_hooks(user_message, **kwargs)
204
- response = await self._send_user_message_to_llm(
205
- user_message.content,
206
- user_message.image_urls if user_message.image_urls else None,
207
- **kwargs
208
- )
125
+ response = await self._send_user_message_to_llm(user_message, **kwargs)
209
126
  await self._execute_after_hooks(user_message, response, **kwargs)
210
127
  return response
211
128
 
212
129
  async def stream_user_message(self, user_message: LLMUserMessage, **kwargs) -> AsyncGenerator[ChunkResponse, None]:
213
- """
214
- Streams the LLM response as ChunkResponse objects.
215
-
216
- Args:
217
- user_message (LLMUserMessage): The user message object.
218
- **kwargs: Additional arguments for LLM-specific usage.
219
-
220
- Yields:
221
- AsyncGenerator[ChunkResponse, None]: ChunkResponse objects from the LLM.
222
- """
223
130
  await self._execute_before_hooks(user_message, **kwargs)
224
131
 
225
132
  accumulated_content = ""
133
+ accumulated_reasoning = ""
226
134
  final_chunk = None
227
135
 
228
- async for chunk in self._stream_user_message_to_llm(
229
- user_message.content,
230
- user_message.image_urls if user_message.image_urls else None,
231
- **kwargs
232
- ):
233
- accumulated_content += chunk.content
136
+ async for chunk in self._stream_user_message_to_llm(user_message, **kwargs):
137
+ if chunk.content:
138
+ accumulated_content += chunk.content
139
+ if chunk.reasoning:
140
+ accumulated_reasoning += chunk.reasoning
141
+
234
142
  if chunk.is_complete:
235
143
  final_chunk = chunk
236
144
  yield chunk
237
145
 
238
- # Create a CompleteResponse from the accumulated content and final chunk's usage
239
146
  complete_response = CompleteResponse(
240
147
  content=accumulated_content,
148
+ reasoning=accumulated_reasoning if accumulated_reasoning else None,
241
149
  usage=final_chunk.usage if final_chunk else None
242
150
  )
243
151
 
244
152
  await self._execute_after_hooks(user_message, complete_response, **kwargs)
245
153
 
246
154
  @abstractmethod
247
- async def _send_user_message_to_llm(self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs) -> CompleteResponse:
155
+ async def _send_user_message_to_llm(self, user_message: LLMUserMessage, **kwargs) -> CompleteResponse:
248
156
  """
249
157
  Abstract method for sending a user message to an LLM. Must be implemented by subclasses.
250
158
 
251
159
  Args:
252
- user_message (str): The user message content.
253
- image_urls (Optional[List[str]]): Optional list of image URLs or file paths.
160
+ user_message (LLMUserMessage): The user message object.
254
161
  **kwargs: Additional arguments for LLM-specific usage.
255
162
 
256
163
  Returns:
@@ -259,13 +166,12 @@ class BaseLLM(ABC):
259
166
  pass
260
167
 
261
168
  @abstractmethod
262
- async def _stream_user_message_to_llm(self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs) -> AsyncGenerator[ChunkResponse, None]:
169
+ async def _stream_user_message_to_llm(self, user_message: LLMUserMessage, **kwargs) -> AsyncGenerator[ChunkResponse, None]:
263
170
  """
264
171
  Abstract method for streaming a user message response from the LLM. Must be implemented by subclasses.
265
172
 
266
173
  Args:
267
- user_message (str): The user message content.
268
- image_urls (Optional[List[str]]): Optional list of image URLs or file paths.
174
+ user_message (LLMUserMessage): The user message object.
269
175
  **kwargs: Additional arguments for LLM-specific usage.
270
176
 
271
177
  Yields:
@@ -274,9 +180,6 @@ class BaseLLM(ABC):
274
180
  pass
275
181
 
276
182
  async def cleanup(self):
277
- """
278
- Perform cleanup operations for the LLM and all extensions.
279
- """
280
183
  for extension in self._extension_registry.get_all():
281
184
  await extension.cleanup()
282
185
  self._extension_registry.clear()
@@ -2,6 +2,7 @@ from abc import ABC, abstractmethod
2
2
  from typing import List, Optional, TYPE_CHECKING
3
3
  from autobyteus.llm.utils.messages import Message
4
4
  from autobyteus.llm.utils.response_types import CompleteResponse
5
+ from autobyteus.llm.user_message import LLMUserMessage
5
6
 
6
7
  if TYPE_CHECKING:
7
8
  from autobyteus.llm.base_llm import BaseLLM
@@ -12,7 +13,7 @@ class LLMExtension(ABC):
12
13
 
13
14
  @abstractmethod
14
15
  async def before_invoke(
15
- self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs
16
+ self, user_message: LLMUserMessage, **kwargs
16
17
  ) -> None:
17
18
  """
18
19
  Called before invoking the LLM with a user message.
@@ -21,16 +22,15 @@ class LLMExtension(ABC):
21
22
 
22
23
  @abstractmethod
23
24
  async def after_invoke(
24
- self, user_message: str, image_urls: Optional[List[str]] = None, response: CompleteResponse = None, **kwargs
25
+ self, user_message: LLMUserMessage, response: CompleteResponse = None, **kwargs
25
26
  ) -> None:
26
27
  """
27
28
  Called after receiving the response from the LLM.
28
29
 
29
30
  Args:
30
- user_message: Original user message
31
- image_urls: Optional image URLs used in request
32
- response: Complete response including content and usage information
33
- kwargs: Additional arguments
31
+ user_message: The original user message object.
32
+ response: Complete response including content and usage information.
33
+ kwargs: Additional arguments.
34
34
  """
35
35
  pass
36
36
 
@@ -6,6 +6,7 @@ from autobyteus.llm.utils.token_usage import TokenUsage
6
6
  from autobyteus.llm.utils.token_usage_tracker import TokenUsageTracker
7
7
  from autobyteus.llm.utils.messages import Message, MessageRole
8
8
  from autobyteus.llm.utils.response_types import CompleteResponse
9
+ from autobyteus.llm.user_message import LLMUserMessage
9
10
 
10
11
  if TYPE_CHECKING:
11
12
  from autobyteus.llm.base_llm import BaseLLM
@@ -29,12 +30,12 @@ class TokenUsageTrackingExtension(LLMExtension):
29
30
  return self._latest_usage
30
31
 
31
32
  async def before_invoke(
32
- self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs
33
+ self, user_message: LLMUserMessage, **kwargs
33
34
  ) -> None:
34
35
  pass
35
36
 
36
37
  async def after_invoke(
37
- self, user_message: str, image_urls: Optional[List[str]] = None, response: CompleteResponse = None, **kwargs
38
+ self, user_message: LLMUserMessage, response: CompleteResponse = None, **kwargs
38
39
  ) -> None:
39
40
  """
40
41
  Get the latest usage from tracker and optionally override token counts with provider's usage if available
@@ -10,6 +10,7 @@ from autobyteus.llm.utils.llm_config import LLMConfig, TokenPricingConfig
10
10
  from autobyteus.llm.base_llm import BaseLLM
11
11
 
12
12
  from autobyteus.llm.api.claude_llm import ClaudeLLM
13
+ from autobyteus.llm.api.bedrock_llm import BedrockLLM
13
14
  from autobyteus.llm.api.mistral_llm import MistralLLM
14
15
  from autobyteus.llm.api.openai_llm import OpenAILLM
15
16
  from autobyteus.llm.api.deepseek_llm import DeepSeekLLM
@@ -61,6 +62,83 @@ class LLMFactory(metaclass=SingletonMeta):
61
62
  pricing_config=TokenPricingConfig(2.50, 10.00)
62
63
  )
63
64
  ),
65
+ LLMModel(
66
+ name="gpt-5",
67
+ value="gpt-5",
68
+ provider=LLMProvider.OPENAI,
69
+ llm_class=OpenAILLM,
70
+ canonical_name="gpt-5",
71
+ default_config=LLMConfig(
72
+ uses_max_completion_tokens=True,
73
+ pricing_config=TokenPricingConfig(1.25, 10.00)
74
+ )
75
+ ),
76
+ LLMModel(
77
+ name="gpt-5-mini",
78
+ value="gpt-5-mini",
79
+ provider=LLMProvider.OPENAI,
80
+ llm_class=OpenAILLM,
81
+ canonical_name="gpt-5-mini",
82
+ default_config=LLMConfig(
83
+ uses_max_completion_tokens=True,
84
+ pricing_config=TokenPricingConfig(0.25, 2.00)
85
+ )
86
+ ),
87
+ LLMModel(
88
+ name="gpt-5-nano",
89
+ value="gpt-5-nano",
90
+ provider=LLMProvider.OPENAI,
91
+ llm_class=OpenAILLM,
92
+ canonical_name="gpt-5-nano",
93
+ default_config=LLMConfig(
94
+ uses_max_completion_tokens=True,
95
+ pricing_config=TokenPricingConfig(0.05, 0.40)
96
+ )
97
+ ),
98
+ LLMModel(
99
+ name="gpt-5-chat-latest",
100
+ value="gpt-5-chat-latest",
101
+ provider=LLMProvider.OPENAI,
102
+ llm_class=OpenAILLM,
103
+ canonical_name="gpt-5-chat-latest",
104
+ default_config=LLMConfig(
105
+ uses_max_completion_tokens=True,
106
+ pricing_config=TokenPricingConfig(1.25, 10.00)
107
+ )
108
+ ),
109
+ LLMModel(
110
+ name="gpt-4.1",
111
+ value="gpt-4.1",
112
+ provider=LLMProvider.OPENAI,
113
+ llm_class=OpenAILLM,
114
+ canonical_name="gpt-4.1",
115
+ default_config=LLMConfig(
116
+ uses_max_completion_tokens=True,
117
+ pricing_config=TokenPricingConfig(2.00, 8.00)
118
+ )
119
+ ),
120
+ LLMModel(
121
+ name="gpt-4.1-mini",
122
+ value="gpt-4.1-mini",
123
+ provider=LLMProvider.OPENAI,
124
+ llm_class=OpenAILLM,
125
+ canonical_name="gpt-4.1-mini",
126
+ default_config=LLMConfig(
127
+ uses_max_completion_tokens=True,
128
+ pricing_config=TokenPricingConfig(0.40, 1.60)
129
+ )
130
+ ),
131
+ LLMModel(
132
+ name="gpt-4.1-nano",
133
+ value="gpt-4.1-nano",
134
+ provider=LLMProvider.OPENAI,
135
+ llm_class=OpenAILLM,
136
+ canonical_name="gpt-4.1-nano",
137
+ default_config=LLMConfig(
138
+ uses_max_completion_tokens=True,
139
+ pricing_config=TokenPricingConfig(0.10, 0.40)
140
+ )
141
+ ),
64
142
  LLMModel(
65
143
  name="o3",
66
144
  value="o3",
@@ -68,6 +146,7 @@ class LLMFactory(metaclass=SingletonMeta):
68
146
  llm_class=OpenAILLM,
69
147
  canonical_name="o3",
70
148
  default_config=LLMConfig(
149
+ uses_max_completion_tokens=True,
71
150
  pricing_config=TokenPricingConfig(15.00, 60.00)
72
151
  )
73
152
  ),
@@ -78,6 +157,7 @@ class LLMFactory(metaclass=SingletonMeta):
78
157
  llm_class=OpenAILLM,
79
158
  canonical_name="o4-mini",
80
159
  default_config=LLMConfig(
160
+ uses_max_completion_tokens=True,
81
161
  pricing_config=TokenPricingConfig(1.0, 4.00)
82
162
  )
83
163
  ),
@@ -104,12 +184,13 @@ class LLMFactory(metaclass=SingletonMeta):
104
184
  )
105
185
  ),
106
186
  LLMModel(
107
- name="bedrock-claude-4-opus",
108
- value="anthropic.claude-opus-4-20250514-v1:0",
187
+ name="claude-4.1-opus",
188
+ value="claude-opus-4-1-20250805",
109
189
  provider=LLMProvider.ANTHROPIC,
110
190
  llm_class=ClaudeLLM,
111
- canonical_name="claude-4-opus",
191
+ canonical_name="claude-4.1-opus",
112
192
  default_config=LLMConfig(
193
+ # NOTE: Pricing is assumed to be the same as claude-4-opus
113
194
  pricing_config=TokenPricingConfig(15.00, 75.00)
114
195
  )
115
196
  ),
@@ -123,11 +204,32 @@ class LLMFactory(metaclass=SingletonMeta):
123
204
  pricing_config=TokenPricingConfig(3.00, 15.00)
124
205
  )
125
206
  ),
207
+ LLMModel(
208
+ name="bedrock-claude-4-opus",
209
+ value="anthropic.claude-opus-4-20250514-v1:0",
210
+ provider=LLMProvider.ANTHROPIC,
211
+ llm_class=BedrockLLM,
212
+ canonical_name="claude-4-opus",
213
+ default_config=LLMConfig(
214
+ pricing_config=TokenPricingConfig(15.00, 75.00)
215
+ )
216
+ ),
217
+ LLMModel(
218
+ name="bedrock-claude-4.1-opus",
219
+ value="anthropic.claude-opus-4-1-20250805-v1:0",
220
+ provider=LLMProvider.ANTHROPIC,
221
+ llm_class=BedrockLLM,
222
+ canonical_name="claude-4.1-opus",
223
+ default_config=LLMConfig(
224
+ # NOTE: Pricing is assumed to be the same as claude-4-opus
225
+ pricing_config=TokenPricingConfig(15.00, 75.00)
226
+ )
227
+ ),
126
228
  LLMModel(
127
229
  name="bedrock-claude-4-sonnet",
128
230
  value="anthropic.claude-sonnet-4-20250514-v1:0",
129
231
  provider=LLMProvider.ANTHROPIC,
130
- llm_class=ClaudeLLM,
232
+ llm_class=BedrockLLM,
131
233
  canonical_name="claude-4-sonnet",
132
234
  default_config=LLMConfig(
133
235
  pricing_config=TokenPricingConfig(3.00, 15.00)
@@ -25,7 +25,7 @@ def get_token_counter(model: LLMModel, llm: 'BaseLLM') -> BaseTokenCounter:
25
25
  if model.provider == LLMProvider.OPENAI:
26
26
  return OpenAITokenCounter(model, llm)
27
27
  elif model.provider == LLMProvider.ANTHROPIC:
28
- return ClaudeTokenCounter(model, llm)
28
+ return OpenAITokenCounter(model, llm)
29
29
  elif model.provider == LLMProvider.MISTRAL:
30
30
  return MistralTokenCounter(model, llm)
31
31
  elif model.provider == LLMProvider.DEEPSEEK:
@@ -7,53 +7,65 @@ logger = logging.getLogger(__name__)
7
7
  class LLMUserMessage:
8
8
  """
9
9
  Represents a user message formatted specifically for input to an LLM.
10
- It includes content and optionally image URLs.
11
- This structure is typically used when constructing prompts for multimodal LLMs
12
- or when sending a "user" role message in a conversation.
10
+ It includes content and optionally URLs for various media types.
11
+ This structure is typically used when constructing prompts for multimodal LLMs.
13
12
  """
14
13
  def __init__(self,
15
14
  content: str,
16
- image_urls: Optional[List[str]] = None):
15
+ image_urls: Optional[List[str]] = None,
16
+ audio_urls: Optional[List[str]] = None,
17
+ video_urls: Optional[List[str]] = None):
17
18
  """
18
19
  Initializes an LLMUserMessage.
19
20
 
20
21
  Args:
21
22
  content: The textual content of the user's message.
22
- image_urls: An optional list of URLs or local paths to images
23
- to be included with the message for the LLM.
23
+ image_urls: An optional list of URLs or local paths to images.
24
+ audio_urls: An optional list of URLs or local paths to audio files.
25
+ video_urls: An optional list of URLs or local paths to video files.
24
26
  """
25
- if not isinstance(content, str):
26
- # Allow empty string for content, as images might be the only input.
27
- # But content must still be a string type.
28
- pass # Validation can be more strict if empty content is disallowed with no images
29
-
30
- if image_urls is None:
31
- image_urls = [] # Default to empty list for easier processing
27
+ self.content: str = content
28
+ self.image_urls: List[str] = image_urls or []
29
+ self.audio_urls: List[str] = audio_urls or []
30
+ self.video_urls: List[str] = video_urls or []
32
31
 
33
- if not (isinstance(image_urls, list) and all(isinstance(url, str) for url in image_urls)):
32
+ # --- Validation ---
33
+ if not isinstance(self.content, str):
34
+ raise TypeError("LLMUserMessage 'content' must be a string.")
35
+ if not (isinstance(self.image_urls, list) and all(isinstance(url, str) for url in self.image_urls)):
34
36
  raise TypeError("LLMUserMessage 'image_urls' must be a list of strings.")
35
-
36
- if not content and not image_urls:
37
- raise ValueError("LLMUserMessage must have either content or image_urls or both.")
37
+ if not (isinstance(self.audio_urls, list) and all(isinstance(url, str) for url in self.audio_urls)):
38
+ raise TypeError("LLMUserMessage 'audio_urls' must be a list of strings.")
39
+ if not (isinstance(self.video_urls, list) and all(isinstance(url, str) for url in self.video_urls)):
40
+ raise TypeError("LLMUserMessage 'video_urls' must be a list of strings.")
38
41
 
39
- self.content: str = content
40
- self.image_urls: List[str] = image_urls
42
+ if not self.content and not self.image_urls and not self.audio_urls and not self.video_urls:
43
+ raise ValueError("LLMUserMessage must have either content or at least one media URL.")
41
44
 
42
- logger.debug(f"LLMUserMessage created. Content: '{content[:50]}...', Image URLs: {image_urls}")
45
+ logger.debug(f"LLMUserMessage created. Content: '{self.content[:50]}...', "
46
+ f"Images: {len(self.image_urls)}, Audio: {len(self.audio_urls)}, Video: {len(self.video_urls)}")
43
47
 
44
48
  def __repr__(self) -> str:
45
- image_urls_repr = f", image_urls={self.image_urls}" if self.image_urls else ""
46
- return f"LLMUserMessage(content='{self.content[:100]}...'{image_urls_repr})"
49
+ parts = [f"content='{self.content[:100]}...'"]
50
+ if self.image_urls:
51
+ parts.append(f"image_urls={self.image_urls}")
52
+ if self.audio_urls:
53
+ parts.append(f"audio_urls={self.audio_urls}")
54
+ if self.video_urls:
55
+ parts.append(f"video_urls={self.video_urls}")
56
+ return f"LLMUserMessage({', '.join(parts)})"
47
57
 
48
58
  def to_dict(self) -> Dict[str, Any]:
49
59
  """
50
- Serializes the LLMUserMessage to a dictionary. This method might be less used
51
- now that BaseLLM._add_user_message handles the conversion to the Message format.
52
- Kept for potential direct use or testing.
60
+ Serializes the LLMUserMessage to a dictionary.
53
61
  """
54
62
  data = {"content": self.content}
55
63
  if self.image_urls:
56
64
  data["image_urls"] = self.image_urls
65
+ if self.audio_urls:
66
+ data["audio_urls"] = self.audio_urls
67
+ if self.video_urls:
68
+ data["video_urls"] = self.video_urls
57
69
  return data
58
70
 
59
71
  @classmethod
@@ -61,13 +73,9 @@ class LLMUserMessage:
61
73
  """
62
74
  Deserializes an LLMUserMessage from a dictionary.
63
75
  """
64
- content = data.get("content", "") # Default to empty string if not present
65
- image_urls = data.get("image_urls") # Expects a list or None
66
-
67
- # Basic validation, more can be added if needed
68
- if not isinstance(content, str):
69
- raise ValueError("LLMUserMessage 'content' in dictionary must be a string.")
70
- if image_urls is not None and not (isinstance(image_urls, list) and all(isinstance(url, str) for url in image_urls)):
71
- raise ValueError("LLMUserMessage 'image_urls' in dictionary must be a list of strings if provided.")
72
-
73
- return cls(content=content, image_urls=image_urls)
76
+ return cls(
77
+ content=data.get("content", ""),
78
+ image_urls=data.get("image_urls"),
79
+ audio_urls=data.get("audio_urls"),
80
+ video_urls=data.get("video_urls")
81
+ )