autobyteus 1.1.5__py3-none-any.whl → 1.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. autobyteus/agent/context/agent_config.py +6 -1
  2. autobyteus/agent/handlers/llm_user_message_ready_event_handler.py +30 -7
  3. autobyteus/agent/handlers/user_input_message_event_handler.py +22 -25
  4. autobyteus/agent/message/__init__.py +7 -5
  5. autobyteus/agent/message/agent_input_user_message.py +6 -16
  6. autobyteus/agent/message/context_file.py +24 -24
  7. autobyteus/agent/message/context_file_type.py +29 -8
  8. autobyteus/agent/message/multimodal_message_builder.py +47 -0
  9. autobyteus/agent/streaming/stream_event_payloads.py +23 -4
  10. autobyteus/agent/system_prompt_processor/tool_manifest_injector_processor.py +6 -2
  11. autobyteus/agent/tool_invocation.py +2 -1
  12. autobyteus/agent_team/bootstrap_steps/agent_configuration_preparation_step.py +9 -2
  13. autobyteus/agent_team/context/agent_team_config.py +1 -0
  14. autobyteus/llm/api/autobyteus_llm.py +33 -33
  15. autobyteus/llm/api/bedrock_llm.py +13 -5
  16. autobyteus/llm/api/claude_llm.py +13 -27
  17. autobyteus/llm/api/gemini_llm.py +108 -42
  18. autobyteus/llm/api/groq_llm.py +4 -3
  19. autobyteus/llm/api/mistral_llm.py +97 -51
  20. autobyteus/llm/api/nvidia_llm.py +6 -5
  21. autobyteus/llm/api/ollama_llm.py +37 -12
  22. autobyteus/llm/api/openai_compatible_llm.py +91 -91
  23. autobyteus/llm/autobyteus_provider.py +1 -1
  24. autobyteus/llm/base_llm.py +42 -139
  25. autobyteus/llm/extensions/base_extension.py +6 -6
  26. autobyteus/llm/extensions/token_usage_tracking_extension.py +3 -2
  27. autobyteus/llm/llm_factory.py +106 -4
  28. autobyteus/llm/token_counter/token_counter_factory.py +1 -1
  29. autobyteus/llm/user_message.py +43 -35
  30. autobyteus/llm/utils/llm_config.py +34 -18
  31. autobyteus/llm/utils/media_payload_formatter.py +99 -0
  32. autobyteus/llm/utils/messages.py +32 -25
  33. autobyteus/llm/utils/response_types.py +9 -3
  34. autobyteus/llm/utils/token_usage.py +6 -5
  35. autobyteus/multimedia/__init__.py +31 -0
  36. autobyteus/multimedia/audio/__init__.py +11 -0
  37. autobyteus/multimedia/audio/api/__init__.py +4 -0
  38. autobyteus/multimedia/audio/api/autobyteus_audio_client.py +59 -0
  39. autobyteus/multimedia/audio/api/gemini_audio_client.py +219 -0
  40. autobyteus/multimedia/audio/audio_client_factory.py +120 -0
  41. autobyteus/multimedia/audio/audio_model.py +96 -0
  42. autobyteus/multimedia/audio/autobyteus_audio_provider.py +108 -0
  43. autobyteus/multimedia/audio/base_audio_client.py +40 -0
  44. autobyteus/multimedia/image/__init__.py +11 -0
  45. autobyteus/multimedia/image/api/__init__.py +9 -0
  46. autobyteus/multimedia/image/api/autobyteus_image_client.py +97 -0
  47. autobyteus/multimedia/image/api/gemini_image_client.py +188 -0
  48. autobyteus/multimedia/image/api/openai_image_client.py +142 -0
  49. autobyteus/multimedia/image/autobyteus_image_provider.py +109 -0
  50. autobyteus/multimedia/image/base_image_client.py +67 -0
  51. autobyteus/multimedia/image/image_client_factory.py +118 -0
  52. autobyteus/multimedia/image/image_model.py +96 -0
  53. autobyteus/multimedia/providers.py +5 -0
  54. autobyteus/multimedia/runtimes.py +8 -0
  55. autobyteus/multimedia/utils/__init__.py +10 -0
  56. autobyteus/multimedia/utils/api_utils.py +19 -0
  57. autobyteus/multimedia/utils/multimedia_config.py +29 -0
  58. autobyteus/multimedia/utils/response_types.py +13 -0
  59. autobyteus/tools/__init__.py +3 -0
  60. autobyteus/tools/multimedia/__init__.py +8 -0
  61. autobyteus/tools/multimedia/audio_tools.py +116 -0
  62. autobyteus/tools/multimedia/image_tools.py +186 -0
  63. autobyteus/tools/tool_category.py +1 -0
  64. autobyteus/tools/usage/parsers/provider_aware_tool_usage_parser.py +5 -2
  65. autobyteus/tools/usage/providers/tool_manifest_provider.py +5 -3
  66. autobyteus/tools/usage/registries/tool_formatting_registry.py +9 -2
  67. autobyteus/tools/usage/registries/tool_usage_parser_registry.py +9 -2
  68. {autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/METADATA +9 -9
  69. {autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/RECORD +73 -45
  70. examples/run_browser_agent.py +1 -1
  71. autobyteus/llm/utils/image_payload_formatter.py +0 -89
  72. {autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/WHEEL +0 -0
  73. {autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/licenses/LICENSE +0 -0
  74. {autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/top_level.txt +0 -0
@@ -1,45 +1,91 @@
1
- from typing import Dict, Optional, List, AsyncGenerator
1
+ from typing import Dict, Optional, List, Any, AsyncGenerator, Union
2
2
  import os
3
3
  import logging
4
+ import httpx
5
+ import asyncio
4
6
  from autobyteus.llm.models import LLMModel
5
7
  from autobyteus.llm.base_llm import BaseLLM
6
8
  from mistralai import Mistral
7
- from autobyteus.llm.utils.messages import MessageRole, Message
9
+ from autobyteus.llm.utils.messages import Message, MessageRole
8
10
  from autobyteus.llm.utils.llm_config import LLMConfig
9
11
  from autobyteus.llm.utils.token_usage import TokenUsage
10
12
  from autobyteus.llm.utils.response_types import CompleteResponse, ChunkResponse
13
+ from autobyteus.llm.user_message import LLMUserMessage
14
+ from autobyteus.llm.utils.media_payload_formatter import image_source_to_base64, get_mime_type, is_valid_image_path
11
15
 
12
- # Configure logger
13
16
  logger = logging.getLogger(__name__)
14
17
 
18
+ async def _format_mistral_messages(messages: List[Message]) -> List[Dict[str, Any]]:
19
+ """Formats a list of internal Message objects into a list of dictionaries for the Mistral API."""
20
+ mistral_messages = []
21
+ for msg in messages:
22
+ # Skip empty messages from non-system roles as Mistral API may reject them
23
+ if not msg.content and not msg.image_urls and msg.role != MessageRole.SYSTEM:
24
+ continue
25
+
26
+ content: Union[str, List[Dict[str, Any]]]
27
+
28
+ if msg.image_urls:
29
+ content_parts: List[Dict[str, Any]] = []
30
+ if msg.content:
31
+ content_parts.append({"type": "text", "text": msg.content})
32
+
33
+ image_tasks = [image_source_to_base64(url) for url in msg.image_urls]
34
+ try:
35
+ base64_images = await asyncio.gather(*image_tasks)
36
+ for i, b64_image in enumerate(base64_images):
37
+ original_url = msg.image_urls[i]
38
+ mime_type = get_mime_type(original_url) if is_valid_image_path(original_url) else "image/jpeg"
39
+ data_uri = f"data:{mime_type};base64,{b64_image}"
40
+
41
+ # Mistral's format for image parts
42
+ content_parts.append({
43
+ "type": "image_url",
44
+ "image_url": {
45
+ "url": data_uri
46
+ }
47
+ })
48
+ except Exception as e:
49
+ logger.error(f"Error processing images for Mistral: {e}")
50
+
51
+ if msg.audio_urls:
52
+ logger.warning("MistralLLM does not yet support audio; skipping.")
53
+ if msg.video_urls:
54
+ logger.warning("MistralLLM does not yet support video; skipping.")
55
+
56
+ content = content_parts
57
+ else:
58
+ content = msg.content or ""
59
+
60
+ mistral_messages.append({"role": msg.role.value, "content": content})
61
+
62
+ return mistral_messages
63
+
64
+
15
65
  class MistralLLM(BaseLLM):
16
66
  def __init__(self, model: LLMModel = None, llm_config: LLMConfig = None):
17
- # Provide defaults if not specified
18
67
  if model is None:
19
- model = LLMModel.mistral_large
68
+ model = LLMModel['mistral-large']
20
69
  if llm_config is None:
21
70
  llm_config = LLMConfig()
22
71
 
23
72
  super().__init__(model=model, llm_config=llm_config)
24
- self.client = self.initialize()
73
+ self.http_client = httpx.AsyncClient()
74
+ self.client: Mistral = self._initialize()
25
75
  logger.info(f"MistralLLM initialized with model: {self.model}")
26
76
 
27
- @classmethod
28
- def initialize(cls):
77
+ def _initialize(self) -> Mistral:
29
78
  mistral_api_key = os.environ.get("MISTRAL_API_KEY")
30
79
  if not mistral_api_key:
31
80
  logger.error("MISTRAL_API_KEY environment variable is not set")
32
- raise ValueError(
33
- "MISTRAL_API_KEY environment variable is not set. "
34
- "Please set this variable in your environment."
35
- )
81
+ raise ValueError("MISTRAL_API_KEY environment variable is not set.")
36
82
  try:
37
- return Mistral(api_key=mistral_api_key)
83
+ return Mistral(api_key=mistral_api_key, client=self.http_client)
38
84
  except Exception as e:
39
85
  logger.error(f"Failed to initialize Mistral client: {str(e)}")
40
86
  raise ValueError(f"Failed to initialize Mistral client: {str(e)}")
41
87
 
42
- def _create_token_usage(self, usage_data: Dict) -> TokenUsage:
88
+ def _create_token_usage(self, usage_data: Any) -> TokenUsage:
43
89
  """Convert Mistral usage data to TokenUsage format."""
44
90
  return TokenUsage(
45
91
  prompt_tokens=usage_data.prompt_tokens,
@@ -48,26 +94,26 @@ class MistralLLM(BaseLLM):
48
94
  )
49
95
 
50
96
  async def _send_user_message_to_llm(
51
- self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs
97
+ self, user_message: LLMUserMessage, **kwargs
52
98
  ) -> CompleteResponse:
53
99
  self.add_user_message(user_message)
54
-
100
+
55
101
  try:
56
- mistral_messages = [msg.to_mistral_message() for msg in self.messages]
102
+ mistral_messages = await _format_mistral_messages(self.messages)
57
103
 
58
- chat_response = self.client.chat.complete(
104
+ chat_response = await self.client.chat.complete_async(
59
105
  model=self.model.value,
60
106
  messages=mistral_messages,
107
+ temperature=self.config.temperature,
108
+ max_tokens=self.config.max_tokens,
109
+ top_p=self.config.top_p,
61
110
  )
62
111
 
63
- assistant_message = chat_response.choices.message.content
112
+ assistant_message = chat_response.choices[0].message.content
64
113
  self.add_assistant_message(assistant_message)
65
114
 
66
- # Create token usage if available
67
- token_usage = None
68
- if hasattr(chat_response, 'usage') and chat_response.usage:
69
- token_usage = self._create_token_usage(chat_response.usage)
70
- logger.debug(f"Token usage recorded: {token_usage}")
115
+ token_usage = self._create_token_usage(chat_response.usage)
116
+ logger.debug(f"Token usage recorded: {token_usage}")
71
117
 
72
118
  return CompleteResponse(
73
119
  content=assistant_message,
@@ -78,48 +124,48 @@ class MistralLLM(BaseLLM):
78
124
  raise ValueError(f"Error in Mistral API call: {str(e)}")
79
125
 
80
126
  async def _stream_user_message_to_llm(
81
- self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs
127
+ self, user_message: LLMUserMessage, **kwargs
82
128
  ) -> AsyncGenerator[ChunkResponse, None]:
83
129
  self.add_user_message(user_message)
84
130
 
131
+ accumulated_message = ""
132
+ final_usage = None
133
+
85
134
  try:
86
- mistral_messages = [msg.to_mistral_message() for msg in self.messages]
87
-
88
- stream = await self.client.chat.stream_async(
135
+ mistral_messages = await _format_mistral_messages(self.messages)
136
+
137
+ stream = self.client.chat.stream_async(
89
138
  model=self.model.value,
90
139
  messages=mistral_messages,
140
+ temperature=self.config.temperature,
141
+ max_tokens=self.config.max_tokens,
142
+ top_p=self.config.top_p,
91
143
  )
92
144
 
93
- accumulated_message = ""
94
-
95
145
  async for chunk in stream:
96
- if chunk.data.choices.delta.content is not None:
97
- token = chunk.data.choices.delta.content
146
+ if chunk.choices and chunk.choices[0].delta.content is not None:
147
+ token = chunk.choices[0].delta.content
98
148
  accumulated_message += token
99
149
 
100
- # For intermediate chunks, yield without usage
101
- yield ChunkResponse(
102
- content=token,
103
- is_complete=False
104
- )
105
-
106
- # Check if this is the last chunk with usage data
107
- if hasattr(chunk.data, 'usage') and chunk.data.usage is not None:
108
- token_usage = self._create_token_usage(chunk.data.usage)
109
- yield ChunkResponse(
110
- content="",
111
- is_complete=True,
112
- usage=token_usage
113
- )
114
-
115
- # After streaming is complete, store the full message
150
+ yield ChunkResponse(content=token, is_complete=False)
151
+
152
+ if hasattr(chunk, 'usage') and chunk.usage:
153
+ final_usage = self._create_token_usage(chunk.usage)
154
+
155
+ # Yield the final chunk with usage data
156
+ yield ChunkResponse(
157
+ content="",
158
+ is_complete=True,
159
+ usage=final_usage
160
+ )
161
+
116
162
  self.add_assistant_message(accumulated_message)
117
163
  except Exception as e:
118
164
  logger.error(f"Error in Mistral API streaming call: {str(e)}")
119
165
  raise ValueError(f"Error in Mistral API streaming call: {str(e)}")
120
166
 
121
167
  async def cleanup(self):
122
- # Clean up any resources if needed
123
168
  logger.debug("Cleaning up MistralLLM instance")
124
- self.messages = []
125
- super().cleanup()
169
+ if self.http_client and not self.http_client.is_closed:
170
+ await self.http_client.aclose()
171
+ await super().cleanup()
@@ -8,6 +8,7 @@ from autobyteus.llm.utils.llm_config import LLMConfig
8
8
  from autobyteus.llm.utils.messages import MessageRole, Message
9
9
  from autobyteus.llm.utils.token_usage import TokenUsage
10
10
  from autobyteus.llm.utils.response_types import CompleteResponse, ChunkResponse
11
+ from autobyteus.llm.user_message import LLMUserMessage
11
12
 
12
13
  logger = logging.getLogger(__name__)
13
14
 
@@ -38,11 +39,11 @@ class NvidiaLLM(BaseLLM):
38
39
  except Exception as e:
39
40
  raise ValueError(f"Failed to initialize Nvidia client: {str(e)}")
40
41
 
41
- async def _send_user_message_to_llm(self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs) -> CompleteResponse:
42
+ async def _send_user_message_to_llm(self, user_message: LLMUserMessage, **kwargs) -> CompleteResponse:
42
43
  self.add_user_message(user_message)
43
44
  try:
44
45
  completion = self.client.chat.completions.create(
45
- model=self.model,
46
+ model=self.model.value,
46
47
  messages=[msg.to_dict() for msg in self.messages],
47
48
  temperature=0,
48
49
  top_p=1,
@@ -65,12 +66,12 @@ class NvidiaLLM(BaseLLM):
65
66
  except Exception as e:
66
67
  raise ValueError(f"Error in Nvidia API call: {str(e)}")
67
68
 
68
- async def stream_response(self, user_message: str) -> AsyncGenerator[ChunkResponse, None]:
69
+ async def _stream_user_message_to_llm(self, user_message: LLMUserMessage, **kwargs) -> AsyncGenerator[ChunkResponse, None]:
69
70
  self.add_user_message(user_message)
70
71
  complete_response = ""
71
72
  try:
72
73
  completion = self.client.chat.completions.create(
73
- model=self.model,
74
+ model=self.model.value,
74
75
  messages=[msg.to_dict() for msg in self.messages],
75
76
  temperature=0,
76
77
  top_p=1,
@@ -104,4 +105,4 @@ class NvidiaLLM(BaseLLM):
104
105
  raise ValueError(f"Error in Nvidia API streaming call: {str(e)}")
105
106
 
106
107
  async def cleanup(self):
107
- super().cleanup()
108
+ await super().cleanup()
@@ -1,21 +1,22 @@
1
- from typing import Dict, Optional, List, AsyncGenerator
1
+ from typing import Dict, Optional, List, AsyncGenerator, Any
2
2
  from ollama import AsyncClient, ChatResponse, ResponseError
3
+ from ollama import Image # FIX: Import the Image type from the ollama library
3
4
  from autobyteus.llm.models import LLMModel
4
5
  from autobyteus.llm.base_llm import BaseLLM
5
6
  from autobyteus.llm.utils.llm_config import LLMConfig
6
- from autobyteus.llm.utils.messages import MessageRole, Message
7
+ from autobyteus.llm.utils.messages import Message
7
8
  from autobyteus.llm.utils.token_usage import TokenUsage
8
9
  from autobyteus.llm.utils.response_types import CompleteResponse, ChunkResponse
10
+ from autobyteus.llm.user_message import LLMUserMessage
11
+ from autobyteus.llm.utils.media_payload_formatter import image_source_to_base64
9
12
  import logging
10
13
  import asyncio
11
14
  import httpx
12
- import os
13
15
 
14
16
  logger = logging.getLogger(__name__)
15
17
 
16
18
  class OllamaLLM(BaseLLM):
17
19
  def __init__(self, model: LLMModel, llm_config: LLMConfig):
18
- # The host URL is now passed via the model object, decoupling from environment variables here.
19
20
  if not model.host_url:
20
21
  raise ValueError("OllamaLLM requires a host_url to be set in its LLMModel object.")
21
22
 
@@ -26,16 +27,41 @@ class OllamaLLM(BaseLLM):
26
27
  super().__init__(model=model, llm_config=llm_config)
27
28
  logger.info(f"OllamaLLM initialized with model: {self.model.model_identifier}")
28
29
 
29
- async def _send_user_message_to_llm(self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs) -> CompleteResponse:
30
+ async def _format_ollama_messages(self) -> List[Dict[str, Any]]:
31
+ """
32
+ Formats the conversation history for the Ollama API, including multimodal content.
33
+ """
34
+ formatted_messages = []
35
+ for msg in self.messages:
36
+ msg_dict = {
37
+ "role": msg.role.value,
38
+ "content": msg.content or ""
39
+ }
40
+ if msg.image_urls:
41
+ try:
42
+ # Concurrently process all images using the centralized utility
43
+ image_tasks = [image_source_to_base64(url) for url in msg.image_urls]
44
+ prepared_base64_images = await asyncio.gather(*image_tasks)
45
+ if prepared_base64_images:
46
+ # FIX: Wrap each base64 string in the official ollama.Image object
47
+ msg_dict["images"] = [Image(value=b64_string) for b64_string in prepared_base64_images]
48
+ except Exception as e:
49
+ logger.error(f"Error processing images for Ollama, skipping them. Error: {e}")
50
+
51
+ formatted_messages.append(msg_dict)
52
+ return formatted_messages
53
+
54
+ async def _send_user_message_to_llm(self, user_message: LLMUserMessage, **kwargs) -> CompleteResponse:
30
55
  self.add_user_message(user_message)
56
+
31
57
  try:
58
+ formatted_messages = await self._format_ollama_messages()
32
59
  response: ChatResponse = await self.client.chat(
33
60
  model=self.model.value,
34
- messages=[msg.to_dict() for msg in self.messages]
61
+ messages=formatted_messages
35
62
  )
36
63
  assistant_message = response['message']['content']
37
64
 
38
- # Detect and process reasoning content using <think> markers
39
65
  reasoning_content = None
40
66
  main_content = assistant_message
41
67
  if "<think>" in assistant_message and "</think>" in assistant_message:
@@ -69,7 +95,7 @@ class OllamaLLM(BaseLLM):
69
95
  raise
70
96
 
71
97
  async def _stream_user_message_to_llm(
72
- self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs
98
+ self, user_message: LLMUserMessage, **kwargs
73
99
  ) -> AsyncGenerator[ChunkResponse, None]:
74
100
  self.add_user_message(user_message)
75
101
  accumulated_main = ""
@@ -78,17 +104,16 @@ class OllamaLLM(BaseLLM):
78
104
  final_response = None
79
105
 
80
106
  try:
107
+ formatted_messages = await self._format_ollama_messages()
81
108
  async for part in await self.client.chat(
82
109
  model=self.model.value,
83
- messages=[msg.to_dict() for msg in self.messages],
110
+ messages=formatted_messages,
84
111
  stream=True
85
112
  ):
86
113
  token = part['message']['content']
87
114
 
88
- # Simple state machine for <think> tags
89
115
  if "<think>" in token:
90
116
  in_reasoning = True
91
- # In case token is like "...</think><think>...", handle it
92
117
  parts = token.split("<think>")
93
118
  token = parts[-1]
94
119
 
@@ -130,4 +155,4 @@ class OllamaLLM(BaseLLM):
130
155
  raise
131
156
 
132
157
  async def cleanup(self):
133
- await super().cleanup()
158
+ await super().cleanup()
@@ -1,20 +1,62 @@
1
1
  import logging
2
2
  import os
3
3
  from abc import ABC
4
- from typing import Optional, List, AsyncGenerator
4
+ from typing import Optional, List, AsyncGenerator, Dict, Any
5
5
  from openai import OpenAI
6
6
  from openai.types.completion_usage import CompletionUsage
7
7
  from openai.types.chat import ChatCompletionChunk
8
+ import asyncio
8
9
 
9
10
  from autobyteus.llm.base_llm import BaseLLM
10
11
  from autobyteus.llm.models import LLMModel
11
12
  from autobyteus.llm.utils.llm_config import LLMConfig
12
- from autobyteus.llm.utils.image_payload_formatter import process_image
13
+ from autobyteus.llm.utils.media_payload_formatter import image_source_to_base64, create_data_uri, get_mime_type, is_valid_image_path
13
14
  from autobyteus.llm.utils.token_usage import TokenUsage
14
15
  from autobyteus.llm.utils.response_types import CompleteResponse, ChunkResponse
16
+ from autobyteus.llm.user_message import LLMUserMessage
17
+ from autobyteus.llm.utils.messages import Message
15
18
 
16
19
  logger = logging.getLogger(__name__)
17
20
 
21
+ async def _format_openai_history(messages: List[Message]) -> List[Dict[str, Any]]:
22
+ """A local async function to format history for the OpenAI SDK, handling image processing."""
23
+ formatted_messages = []
24
+ for msg in messages:
25
+ # For multimodal messages, build the content list of parts
26
+ if msg.image_urls or msg.audio_urls or msg.video_urls:
27
+ content_parts: List[Dict[str, Any]] = []
28
+ if msg.content:
29
+ content_parts.append({"type": "text", "text": msg.content})
30
+
31
+ image_tasks = []
32
+ if msg.image_urls:
33
+ for url in msg.image_urls:
34
+ # Create an async task for each image to process them concurrently
35
+ image_tasks.append(image_source_to_base64(url))
36
+
37
+ try:
38
+ base64_images = await asyncio.gather(*image_tasks)
39
+ for i, b64_image in enumerate(base64_images):
40
+ original_url = msg.image_urls[i]
41
+ # Determine mime type from original path if possible, otherwise default
42
+ mime_type = get_mime_type(original_url) if is_valid_image_path(original_url) else "image/jpeg"
43
+ content_parts.append(create_data_uri(mime_type, b64_image))
44
+ except Exception as e:
45
+ logger.error(f"Error processing one or more images: {e}")
46
+
47
+ # Placeholder for future audio/video processing
48
+ if msg.audio_urls:
49
+ logger.warning("OpenAI compatible layer does not yet support audio; skipping.")
50
+ if msg.video_urls:
51
+ logger.warning("OpenAI compatible layer does not yet support video; skipping.")
52
+
53
+ formatted_messages.append({"role": msg.role.value, "content": content_parts})
54
+ else:
55
+ # For text-only messages, use the simple string format
56
+ formatted_messages.append({"role": msg.role.value, "content": msg.content})
57
+ return formatted_messages
58
+
59
+
18
60
  class OpenAICompatibleLLM(BaseLLM, ABC):
19
61
  def __init__(
20
62
  self,
@@ -24,18 +66,6 @@ class OpenAICompatibleLLM(BaseLLM, ABC):
24
66
  base_url: str,
25
67
  api_key_default: Optional[str] = None
26
68
  ):
27
- """
28
- Initializes an OpenAI-compatible LLM.
29
-
30
- Args:
31
- model (LLMModel): The model to use.
32
- llm_config (LLMConfig): Configuration for the LLM.
33
- api_key_env_var (str): The name of the environment variable for the API key.
34
- base_url (str): The base URL for the API.
35
- api_key_default (Optional[str], optional): A default API key to use if the
36
- environment variable is not set.
37
- Defaults to None.
38
- """
39
69
  api_key = os.getenv(api_key_env_var)
40
70
  if not api_key:
41
71
  if api_key_default:
@@ -49,13 +79,11 @@ class OpenAICompatibleLLM(BaseLLM, ABC):
49
79
  logger.info(f"Initialized OpenAI compatible client with base_url: {base_url}")
50
80
 
51
81
  super().__init__(model=model, llm_config=llm_config)
52
- self.max_tokens = 8000 # A default, can be overridden by subclass or config
82
+ self.max_tokens = 8000
53
83
 
54
84
  def _create_token_usage(self, usage_data: Optional[CompletionUsage]) -> Optional[TokenUsage]:
55
- """Convert usage data to TokenUsage format."""
56
85
  if not usage_data:
57
86
  return None
58
-
59
87
  return TokenUsage(
60
88
  prompt_tokens=usage_data.prompt_tokens,
61
89
  completion_tokens=usage_data.completion_tokens,
@@ -63,53 +91,41 @@ class OpenAICompatibleLLM(BaseLLM, ABC):
63
91
  )
64
92
 
65
93
  async def _send_user_message_to_llm(
66
- self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs
94
+ self, user_message: LLMUserMessage, **kwargs
67
95
  ) -> CompleteResponse:
68
- """
69
- Sends a non-streaming request to an OpenAI-compatible API.
70
- Supports optional reasoning content if provided in the response.
71
- """
72
- content = []
73
-
74
- if user_message:
75
- content.append({"type": "text", "text": user_message})
76
-
77
- if image_urls:
78
- for image_url in image_urls:
79
- try:
80
- image_content = process_image(image_url)
81
- content.append(image_content)
82
- logger.info(f"Processed image: {image_url}")
83
- except ValueError as e:
84
- logger.error(f"Error processing image {image_url}: {str(e)}")
85
- continue
86
-
87
- self.add_user_message(content)
88
- logger.debug(f"Prepared message content: {content}")
89
-
96
+ self.add_user_message(user_message)
97
+
90
98
  try:
99
+ formatted_messages = await _format_openai_history(self.messages)
91
100
  logger.info(f"Sending request to {self.model.provider.value} API")
92
- response = self.client.chat.completions.create(
93
- model=self.model.value,
94
- messages=[msg.to_dict() for msg in self.messages],
95
- max_tokens=self.max_tokens,
96
- )
101
+
102
+ params: Dict[str, Any] = {
103
+ "model": self.model.value,
104
+ "messages": formatted_messages,
105
+ }
106
+
107
+ if self.config.uses_max_completion_tokens:
108
+ params["max_completion_tokens"] = self.max_tokens
109
+ else:
110
+ params["max_tokens"] = self.max_tokens
111
+
112
+ response = self.client.chat.completions.create(**params)
97
113
  full_message = response.choices[0].message
98
114
 
99
- # Extract reasoning_content if present
115
+ # --- PRESERVED ORIGINAL LOGIC ---
100
116
  reasoning = None
101
117
  if hasattr(full_message, "reasoning_content") and full_message.reasoning_content:
102
118
  reasoning = full_message.reasoning_content
103
119
  elif "reasoning_content" in full_message and full_message["reasoning_content"]:
104
120
  reasoning = full_message["reasoning_content"]
105
121
 
106
- # Extract main content
107
122
  main_content = ""
108
123
  if hasattr(full_message, "content") and full_message.content:
109
124
  main_content = full_message.content
110
125
  elif "content" in full_message and full_message["content"]:
111
126
  main_content = full_message["content"]
112
-
127
+ # --- END PRESERVED LOGIC ---
128
+
113
129
  self.add_assistant_message(main_content, reasoning_content=reasoning)
114
130
 
115
131
  token_usage = self._create_token_usage(response.usage)
@@ -125,43 +141,30 @@ class OpenAICompatibleLLM(BaseLLM, ABC):
125
141
  raise ValueError(f"Error in {self.model.provider.value} API request: {str(e)}")
126
142
 
127
143
  async def _stream_user_message_to_llm(
128
- self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs
144
+ self, user_message: LLMUserMessage, **kwargs
129
145
  ) -> AsyncGenerator[ChunkResponse, None]:
130
- """
131
- Streams the response from an OpenAI-compatible API.
132
- Yields reasoning and content in separate chunks.
133
- """
134
- content = []
135
-
136
- if user_message:
137
- content.append({"type": "text", "text": user_message})
138
-
139
- if image_urls:
140
- for image_url in image_urls:
141
- try:
142
- image_content = process_image(image_url)
143
- content.append(image_content)
144
- logger.info(f"Processed image for streaming: {image_url}")
145
- except ValueError as e:
146
- logger.error(f"Error processing image for streaming {image_url}: {str(e)}")
147
- continue
146
+ self.add_user_message(user_message)
148
147
 
149
- self.add_user_message(content)
150
- logger.debug(f"Prepared streaming message content: {content}")
151
-
152
- # Initialize variables to track reasoning and main content
153
148
  accumulated_reasoning = ""
154
149
  accumulated_content = ""
155
150
 
156
151
  try:
152
+ formatted_messages = await _format_openai_history(self.messages)
157
153
  logger.info(f"Starting streaming request to {self.model.provider.value} API")
158
- stream = self.client.chat.completions.create(
159
- model=self.model.value,
160
- messages=[msg.to_dict() for msg in self.messages],
161
- max_tokens=self.max_tokens,
162
- stream=True,
163
- stream_options={"include_usage": True}
164
- )
154
+
155
+ params: Dict[str, Any] = {
156
+ "model": self.model.value,
157
+ "messages": formatted_messages,
158
+ "stream": True,
159
+ "stream_options": {"include_usage": True},
160
+ }
161
+
162
+ if self.config.uses_max_completion_tokens:
163
+ params["max_completion_tokens"] = self.max_tokens
164
+ else:
165
+ params["max_tokens"] = self.max_tokens
166
+
167
+ stream = self.client.chat.completions.create(**params)
165
168
 
166
169
  for chunk in stream:
167
170
  chunk: ChatCompletionChunk
@@ -170,25 +173,23 @@ class OpenAICompatibleLLM(BaseLLM, ABC):
170
173
 
171
174
  delta = chunk.choices[0].delta
172
175
 
173
- # Process reasoning tokens (if supported by model)
174
- reasoning_chunk = getattr(delta, "reasoning_content", None)
176
+ # --- PRESERVED ORIGINAL LOGIC (adapted for streaming) ---
177
+ reasoning_chunk = None
178
+ if hasattr(delta, "reasoning_content") and delta.reasoning_content:
179
+ reasoning_chunk = delta.reasoning_content
180
+ elif isinstance(delta, dict) and "reasoning_content" in delta and delta["reasoning_content"]:
181
+ reasoning_chunk = delta["reasoning_content"]
182
+
175
183
  if reasoning_chunk:
176
184
  accumulated_reasoning += reasoning_chunk
177
- yield ChunkResponse(
178
- content="",
179
- reasoning=reasoning_chunk
180
- )
185
+ yield ChunkResponse(content="", reasoning=reasoning_chunk)
186
+ # --- END PRESERVED LOGIC ---
181
187
 
182
- # Process main content tokens
183
188
  main_token = delta.content
184
189
  if main_token:
185
190
  accumulated_content += main_token
186
- yield ChunkResponse(
187
- content=main_token,
188
- reasoning=None
189
- )
191
+ yield ChunkResponse(content=main_token, reasoning=None)
190
192
 
191
- # Yield token usage if available in the final chunk
192
193
  if hasattr(chunk, "usage") and chunk.usage is not None:
193
194
  token_usage = self._create_token_usage(chunk.usage)
194
195
  yield ChunkResponse(
@@ -198,7 +199,6 @@ class OpenAICompatibleLLM(BaseLLM, ABC):
198
199
  usage=token_usage
199
200
  )
200
201
 
201
- # After streaming, add the fully accumulated assistant message to history
202
202
  self.add_assistant_message(accumulated_content, reasoning_content=accumulated_reasoning)
203
203
  logger.info(f"Completed streaming response from {self.model.provider.value} API")
204
204
 
@@ -49,7 +49,7 @@ class AutobyteusModelProvider:
49
49
  try:
50
50
  # Instantiate client for this specific host
51
51
  client = AutobyteusClient(server_url=host_url)
52
- response = client.get_available_models_sync()
52
+ response = client.get_available_llm_models_sync()
53
53
  except Exception as e:
54
54
  logger.warning(f"Could not connect or fetch models from Autobyteus server at {host_url}: {e}")
55
55
  continue