autobyteus 1.1.5__py3-none-any.whl → 1.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autobyteus/agent/context/agent_config.py +6 -1
- autobyteus/agent/handlers/llm_user_message_ready_event_handler.py +30 -7
- autobyteus/agent/handlers/user_input_message_event_handler.py +22 -25
- autobyteus/agent/message/__init__.py +7 -5
- autobyteus/agent/message/agent_input_user_message.py +6 -16
- autobyteus/agent/message/context_file.py +24 -24
- autobyteus/agent/message/context_file_type.py +29 -8
- autobyteus/agent/message/multimodal_message_builder.py +47 -0
- autobyteus/agent/streaming/stream_event_payloads.py +23 -4
- autobyteus/agent/system_prompt_processor/tool_manifest_injector_processor.py +6 -2
- autobyteus/agent/tool_invocation.py +2 -1
- autobyteus/agent_team/bootstrap_steps/agent_configuration_preparation_step.py +9 -2
- autobyteus/agent_team/context/agent_team_config.py +1 -0
- autobyteus/llm/api/autobyteus_llm.py +33 -33
- autobyteus/llm/api/bedrock_llm.py +13 -5
- autobyteus/llm/api/claude_llm.py +13 -27
- autobyteus/llm/api/gemini_llm.py +108 -42
- autobyteus/llm/api/groq_llm.py +4 -3
- autobyteus/llm/api/mistral_llm.py +97 -51
- autobyteus/llm/api/nvidia_llm.py +6 -5
- autobyteus/llm/api/ollama_llm.py +37 -12
- autobyteus/llm/api/openai_compatible_llm.py +91 -91
- autobyteus/llm/autobyteus_provider.py +1 -1
- autobyteus/llm/base_llm.py +42 -139
- autobyteus/llm/extensions/base_extension.py +6 -6
- autobyteus/llm/extensions/token_usage_tracking_extension.py +3 -2
- autobyteus/llm/llm_factory.py +106 -4
- autobyteus/llm/token_counter/token_counter_factory.py +1 -1
- autobyteus/llm/user_message.py +43 -35
- autobyteus/llm/utils/llm_config.py +34 -18
- autobyteus/llm/utils/media_payload_formatter.py +99 -0
- autobyteus/llm/utils/messages.py +32 -25
- autobyteus/llm/utils/response_types.py +9 -3
- autobyteus/llm/utils/token_usage.py +6 -5
- autobyteus/multimedia/__init__.py +31 -0
- autobyteus/multimedia/audio/__init__.py +11 -0
- autobyteus/multimedia/audio/api/__init__.py +4 -0
- autobyteus/multimedia/audio/api/autobyteus_audio_client.py +59 -0
- autobyteus/multimedia/audio/api/gemini_audio_client.py +219 -0
- autobyteus/multimedia/audio/audio_client_factory.py +120 -0
- autobyteus/multimedia/audio/audio_model.py +96 -0
- autobyteus/multimedia/audio/autobyteus_audio_provider.py +108 -0
- autobyteus/multimedia/audio/base_audio_client.py +40 -0
- autobyteus/multimedia/image/__init__.py +11 -0
- autobyteus/multimedia/image/api/__init__.py +9 -0
- autobyteus/multimedia/image/api/autobyteus_image_client.py +97 -0
- autobyteus/multimedia/image/api/gemini_image_client.py +188 -0
- autobyteus/multimedia/image/api/openai_image_client.py +142 -0
- autobyteus/multimedia/image/autobyteus_image_provider.py +109 -0
- autobyteus/multimedia/image/base_image_client.py +67 -0
- autobyteus/multimedia/image/image_client_factory.py +118 -0
- autobyteus/multimedia/image/image_model.py +96 -0
- autobyteus/multimedia/providers.py +5 -0
- autobyteus/multimedia/runtimes.py +8 -0
- autobyteus/multimedia/utils/__init__.py +10 -0
- autobyteus/multimedia/utils/api_utils.py +19 -0
- autobyteus/multimedia/utils/multimedia_config.py +29 -0
- autobyteus/multimedia/utils/response_types.py +13 -0
- autobyteus/tools/__init__.py +3 -0
- autobyteus/tools/multimedia/__init__.py +8 -0
- autobyteus/tools/multimedia/audio_tools.py +116 -0
- autobyteus/tools/multimedia/image_tools.py +186 -0
- autobyteus/tools/tool_category.py +1 -0
- autobyteus/tools/usage/parsers/provider_aware_tool_usage_parser.py +5 -2
- autobyteus/tools/usage/providers/tool_manifest_provider.py +5 -3
- autobyteus/tools/usage/registries/tool_formatting_registry.py +9 -2
- autobyteus/tools/usage/registries/tool_usage_parser_registry.py +9 -2
- {autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/METADATA +9 -9
- {autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/RECORD +73 -45
- examples/run_browser_agent.py +1 -1
- autobyteus/llm/utils/image_payload_formatter.py +0 -89
- {autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/WHEEL +0 -0
- {autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/licenses/LICENSE +0 -0
- {autobyteus-1.1.5.dist-info → autobyteus-1.1.6.dist-info}/top_level.txt +0 -0
|
@@ -1,45 +1,91 @@
|
|
|
1
|
-
from typing import Dict, Optional, List, AsyncGenerator
|
|
1
|
+
from typing import Dict, Optional, List, Any, AsyncGenerator, Union
|
|
2
2
|
import os
|
|
3
3
|
import logging
|
|
4
|
+
import httpx
|
|
5
|
+
import asyncio
|
|
4
6
|
from autobyteus.llm.models import LLMModel
|
|
5
7
|
from autobyteus.llm.base_llm import BaseLLM
|
|
6
8
|
from mistralai import Mistral
|
|
7
|
-
from autobyteus.llm.utils.messages import
|
|
9
|
+
from autobyteus.llm.utils.messages import Message, MessageRole
|
|
8
10
|
from autobyteus.llm.utils.llm_config import LLMConfig
|
|
9
11
|
from autobyteus.llm.utils.token_usage import TokenUsage
|
|
10
12
|
from autobyteus.llm.utils.response_types import CompleteResponse, ChunkResponse
|
|
13
|
+
from autobyteus.llm.user_message import LLMUserMessage
|
|
14
|
+
from autobyteus.llm.utils.media_payload_formatter import image_source_to_base64, get_mime_type, is_valid_image_path
|
|
11
15
|
|
|
12
|
-
# Configure logger
|
|
13
16
|
logger = logging.getLogger(__name__)
|
|
14
17
|
|
|
18
|
+
async def _format_mistral_messages(messages: List[Message]) -> List[Dict[str, Any]]:
|
|
19
|
+
"""Formats a list of internal Message objects into a list of dictionaries for the Mistral API."""
|
|
20
|
+
mistral_messages = []
|
|
21
|
+
for msg in messages:
|
|
22
|
+
# Skip empty messages from non-system roles as Mistral API may reject them
|
|
23
|
+
if not msg.content and not msg.image_urls and msg.role != MessageRole.SYSTEM:
|
|
24
|
+
continue
|
|
25
|
+
|
|
26
|
+
content: Union[str, List[Dict[str, Any]]]
|
|
27
|
+
|
|
28
|
+
if msg.image_urls:
|
|
29
|
+
content_parts: List[Dict[str, Any]] = []
|
|
30
|
+
if msg.content:
|
|
31
|
+
content_parts.append({"type": "text", "text": msg.content})
|
|
32
|
+
|
|
33
|
+
image_tasks = [image_source_to_base64(url) for url in msg.image_urls]
|
|
34
|
+
try:
|
|
35
|
+
base64_images = await asyncio.gather(*image_tasks)
|
|
36
|
+
for i, b64_image in enumerate(base64_images):
|
|
37
|
+
original_url = msg.image_urls[i]
|
|
38
|
+
mime_type = get_mime_type(original_url) if is_valid_image_path(original_url) else "image/jpeg"
|
|
39
|
+
data_uri = f"data:{mime_type};base64,{b64_image}"
|
|
40
|
+
|
|
41
|
+
# Mistral's format for image parts
|
|
42
|
+
content_parts.append({
|
|
43
|
+
"type": "image_url",
|
|
44
|
+
"image_url": {
|
|
45
|
+
"url": data_uri
|
|
46
|
+
}
|
|
47
|
+
})
|
|
48
|
+
except Exception as e:
|
|
49
|
+
logger.error(f"Error processing images for Mistral: {e}")
|
|
50
|
+
|
|
51
|
+
if msg.audio_urls:
|
|
52
|
+
logger.warning("MistralLLM does not yet support audio; skipping.")
|
|
53
|
+
if msg.video_urls:
|
|
54
|
+
logger.warning("MistralLLM does not yet support video; skipping.")
|
|
55
|
+
|
|
56
|
+
content = content_parts
|
|
57
|
+
else:
|
|
58
|
+
content = msg.content or ""
|
|
59
|
+
|
|
60
|
+
mistral_messages.append({"role": msg.role.value, "content": content})
|
|
61
|
+
|
|
62
|
+
return mistral_messages
|
|
63
|
+
|
|
64
|
+
|
|
15
65
|
class MistralLLM(BaseLLM):
|
|
16
66
|
def __init__(self, model: LLMModel = None, llm_config: LLMConfig = None):
|
|
17
|
-
# Provide defaults if not specified
|
|
18
67
|
if model is None:
|
|
19
|
-
model = LLMModel
|
|
68
|
+
model = LLMModel['mistral-large']
|
|
20
69
|
if llm_config is None:
|
|
21
70
|
llm_config = LLMConfig()
|
|
22
71
|
|
|
23
72
|
super().__init__(model=model, llm_config=llm_config)
|
|
24
|
-
self.
|
|
73
|
+
self.http_client = httpx.AsyncClient()
|
|
74
|
+
self.client: Mistral = self._initialize()
|
|
25
75
|
logger.info(f"MistralLLM initialized with model: {self.model}")
|
|
26
76
|
|
|
27
|
-
|
|
28
|
-
def initialize(cls):
|
|
77
|
+
def _initialize(self) -> Mistral:
|
|
29
78
|
mistral_api_key = os.environ.get("MISTRAL_API_KEY")
|
|
30
79
|
if not mistral_api_key:
|
|
31
80
|
logger.error("MISTRAL_API_KEY environment variable is not set")
|
|
32
|
-
raise ValueError(
|
|
33
|
-
"MISTRAL_API_KEY environment variable is not set. "
|
|
34
|
-
"Please set this variable in your environment."
|
|
35
|
-
)
|
|
81
|
+
raise ValueError("MISTRAL_API_KEY environment variable is not set.")
|
|
36
82
|
try:
|
|
37
|
-
return Mistral(api_key=mistral_api_key)
|
|
83
|
+
return Mistral(api_key=mistral_api_key, client=self.http_client)
|
|
38
84
|
except Exception as e:
|
|
39
85
|
logger.error(f"Failed to initialize Mistral client: {str(e)}")
|
|
40
86
|
raise ValueError(f"Failed to initialize Mistral client: {str(e)}")
|
|
41
87
|
|
|
42
|
-
def _create_token_usage(self, usage_data:
|
|
88
|
+
def _create_token_usage(self, usage_data: Any) -> TokenUsage:
|
|
43
89
|
"""Convert Mistral usage data to TokenUsage format."""
|
|
44
90
|
return TokenUsage(
|
|
45
91
|
prompt_tokens=usage_data.prompt_tokens,
|
|
@@ -48,26 +94,26 @@ class MistralLLM(BaseLLM):
|
|
|
48
94
|
)
|
|
49
95
|
|
|
50
96
|
async def _send_user_message_to_llm(
|
|
51
|
-
self, user_message:
|
|
97
|
+
self, user_message: LLMUserMessage, **kwargs
|
|
52
98
|
) -> CompleteResponse:
|
|
53
99
|
self.add_user_message(user_message)
|
|
54
|
-
|
|
100
|
+
|
|
55
101
|
try:
|
|
56
|
-
mistral_messages =
|
|
102
|
+
mistral_messages = await _format_mistral_messages(self.messages)
|
|
57
103
|
|
|
58
|
-
chat_response = self.client.chat.
|
|
104
|
+
chat_response = await self.client.chat.complete_async(
|
|
59
105
|
model=self.model.value,
|
|
60
106
|
messages=mistral_messages,
|
|
107
|
+
temperature=self.config.temperature,
|
|
108
|
+
max_tokens=self.config.max_tokens,
|
|
109
|
+
top_p=self.config.top_p,
|
|
61
110
|
)
|
|
62
111
|
|
|
63
|
-
assistant_message = chat_response.choices.message.content
|
|
112
|
+
assistant_message = chat_response.choices[0].message.content
|
|
64
113
|
self.add_assistant_message(assistant_message)
|
|
65
114
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
if hasattr(chat_response, 'usage') and chat_response.usage:
|
|
69
|
-
token_usage = self._create_token_usage(chat_response.usage)
|
|
70
|
-
logger.debug(f"Token usage recorded: {token_usage}")
|
|
115
|
+
token_usage = self._create_token_usage(chat_response.usage)
|
|
116
|
+
logger.debug(f"Token usage recorded: {token_usage}")
|
|
71
117
|
|
|
72
118
|
return CompleteResponse(
|
|
73
119
|
content=assistant_message,
|
|
@@ -78,48 +124,48 @@ class MistralLLM(BaseLLM):
|
|
|
78
124
|
raise ValueError(f"Error in Mistral API call: {str(e)}")
|
|
79
125
|
|
|
80
126
|
async def _stream_user_message_to_llm(
|
|
81
|
-
self, user_message:
|
|
127
|
+
self, user_message: LLMUserMessage, **kwargs
|
|
82
128
|
) -> AsyncGenerator[ChunkResponse, None]:
|
|
83
129
|
self.add_user_message(user_message)
|
|
84
130
|
|
|
131
|
+
accumulated_message = ""
|
|
132
|
+
final_usage = None
|
|
133
|
+
|
|
85
134
|
try:
|
|
86
|
-
mistral_messages =
|
|
87
|
-
|
|
88
|
-
stream =
|
|
135
|
+
mistral_messages = await _format_mistral_messages(self.messages)
|
|
136
|
+
|
|
137
|
+
stream = self.client.chat.stream_async(
|
|
89
138
|
model=self.model.value,
|
|
90
139
|
messages=mistral_messages,
|
|
140
|
+
temperature=self.config.temperature,
|
|
141
|
+
max_tokens=self.config.max_tokens,
|
|
142
|
+
top_p=self.config.top_p,
|
|
91
143
|
)
|
|
92
144
|
|
|
93
|
-
accumulated_message = ""
|
|
94
|
-
|
|
95
145
|
async for chunk in stream:
|
|
96
|
-
if chunk.
|
|
97
|
-
token = chunk.
|
|
146
|
+
if chunk.choices and chunk.choices[0].delta.content is not None:
|
|
147
|
+
token = chunk.choices[0].delta.content
|
|
98
148
|
accumulated_message += token
|
|
99
149
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
usage=token_usage
|
|
113
|
-
)
|
|
114
|
-
|
|
115
|
-
# After streaming is complete, store the full message
|
|
150
|
+
yield ChunkResponse(content=token, is_complete=False)
|
|
151
|
+
|
|
152
|
+
if hasattr(chunk, 'usage') and chunk.usage:
|
|
153
|
+
final_usage = self._create_token_usage(chunk.usage)
|
|
154
|
+
|
|
155
|
+
# Yield the final chunk with usage data
|
|
156
|
+
yield ChunkResponse(
|
|
157
|
+
content="",
|
|
158
|
+
is_complete=True,
|
|
159
|
+
usage=final_usage
|
|
160
|
+
)
|
|
161
|
+
|
|
116
162
|
self.add_assistant_message(accumulated_message)
|
|
117
163
|
except Exception as e:
|
|
118
164
|
logger.error(f"Error in Mistral API streaming call: {str(e)}")
|
|
119
165
|
raise ValueError(f"Error in Mistral API streaming call: {str(e)}")
|
|
120
166
|
|
|
121
167
|
async def cleanup(self):
|
|
122
|
-
# Clean up any resources if needed
|
|
123
168
|
logger.debug("Cleaning up MistralLLM instance")
|
|
124
|
-
self.
|
|
125
|
-
|
|
169
|
+
if self.http_client and not self.http_client.is_closed:
|
|
170
|
+
await self.http_client.aclose()
|
|
171
|
+
await super().cleanup()
|
autobyteus/llm/api/nvidia_llm.py
CHANGED
|
@@ -8,6 +8,7 @@ from autobyteus.llm.utils.llm_config import LLMConfig
|
|
|
8
8
|
from autobyteus.llm.utils.messages import MessageRole, Message
|
|
9
9
|
from autobyteus.llm.utils.token_usage import TokenUsage
|
|
10
10
|
from autobyteus.llm.utils.response_types import CompleteResponse, ChunkResponse
|
|
11
|
+
from autobyteus.llm.user_message import LLMUserMessage
|
|
11
12
|
|
|
12
13
|
logger = logging.getLogger(__name__)
|
|
13
14
|
|
|
@@ -38,11 +39,11 @@ class NvidiaLLM(BaseLLM):
|
|
|
38
39
|
except Exception as e:
|
|
39
40
|
raise ValueError(f"Failed to initialize Nvidia client: {str(e)}")
|
|
40
41
|
|
|
41
|
-
async def _send_user_message_to_llm(self, user_message:
|
|
42
|
+
async def _send_user_message_to_llm(self, user_message: LLMUserMessage, **kwargs) -> CompleteResponse:
|
|
42
43
|
self.add_user_message(user_message)
|
|
43
44
|
try:
|
|
44
45
|
completion = self.client.chat.completions.create(
|
|
45
|
-
model=self.model,
|
|
46
|
+
model=self.model.value,
|
|
46
47
|
messages=[msg.to_dict() for msg in self.messages],
|
|
47
48
|
temperature=0,
|
|
48
49
|
top_p=1,
|
|
@@ -65,12 +66,12 @@ class NvidiaLLM(BaseLLM):
|
|
|
65
66
|
except Exception as e:
|
|
66
67
|
raise ValueError(f"Error in Nvidia API call: {str(e)}")
|
|
67
68
|
|
|
68
|
-
async def
|
|
69
|
+
async def _stream_user_message_to_llm(self, user_message: LLMUserMessage, **kwargs) -> AsyncGenerator[ChunkResponse, None]:
|
|
69
70
|
self.add_user_message(user_message)
|
|
70
71
|
complete_response = ""
|
|
71
72
|
try:
|
|
72
73
|
completion = self.client.chat.completions.create(
|
|
73
|
-
model=self.model,
|
|
74
|
+
model=self.model.value,
|
|
74
75
|
messages=[msg.to_dict() for msg in self.messages],
|
|
75
76
|
temperature=0,
|
|
76
77
|
top_p=1,
|
|
@@ -104,4 +105,4 @@ class NvidiaLLM(BaseLLM):
|
|
|
104
105
|
raise ValueError(f"Error in Nvidia API streaming call: {str(e)}")
|
|
105
106
|
|
|
106
107
|
async def cleanup(self):
|
|
107
|
-
super().cleanup()
|
|
108
|
+
await super().cleanup()
|
autobyteus/llm/api/ollama_llm.py
CHANGED
|
@@ -1,21 +1,22 @@
|
|
|
1
|
-
from typing import Dict, Optional, List, AsyncGenerator
|
|
1
|
+
from typing import Dict, Optional, List, AsyncGenerator, Any
|
|
2
2
|
from ollama import AsyncClient, ChatResponse, ResponseError
|
|
3
|
+
from ollama import Image # FIX: Import the Image type from the ollama library
|
|
3
4
|
from autobyteus.llm.models import LLMModel
|
|
4
5
|
from autobyteus.llm.base_llm import BaseLLM
|
|
5
6
|
from autobyteus.llm.utils.llm_config import LLMConfig
|
|
6
|
-
from autobyteus.llm.utils.messages import
|
|
7
|
+
from autobyteus.llm.utils.messages import Message
|
|
7
8
|
from autobyteus.llm.utils.token_usage import TokenUsage
|
|
8
9
|
from autobyteus.llm.utils.response_types import CompleteResponse, ChunkResponse
|
|
10
|
+
from autobyteus.llm.user_message import LLMUserMessage
|
|
11
|
+
from autobyteus.llm.utils.media_payload_formatter import image_source_to_base64
|
|
9
12
|
import logging
|
|
10
13
|
import asyncio
|
|
11
14
|
import httpx
|
|
12
|
-
import os
|
|
13
15
|
|
|
14
16
|
logger = logging.getLogger(__name__)
|
|
15
17
|
|
|
16
18
|
class OllamaLLM(BaseLLM):
|
|
17
19
|
def __init__(self, model: LLMModel, llm_config: LLMConfig):
|
|
18
|
-
# The host URL is now passed via the model object, decoupling from environment variables here.
|
|
19
20
|
if not model.host_url:
|
|
20
21
|
raise ValueError("OllamaLLM requires a host_url to be set in its LLMModel object.")
|
|
21
22
|
|
|
@@ -26,16 +27,41 @@ class OllamaLLM(BaseLLM):
|
|
|
26
27
|
super().__init__(model=model, llm_config=llm_config)
|
|
27
28
|
logger.info(f"OllamaLLM initialized with model: {self.model.model_identifier}")
|
|
28
29
|
|
|
29
|
-
async def
|
|
30
|
+
async def _format_ollama_messages(self) -> List[Dict[str, Any]]:
|
|
31
|
+
"""
|
|
32
|
+
Formats the conversation history for the Ollama API, including multimodal content.
|
|
33
|
+
"""
|
|
34
|
+
formatted_messages = []
|
|
35
|
+
for msg in self.messages:
|
|
36
|
+
msg_dict = {
|
|
37
|
+
"role": msg.role.value,
|
|
38
|
+
"content": msg.content or ""
|
|
39
|
+
}
|
|
40
|
+
if msg.image_urls:
|
|
41
|
+
try:
|
|
42
|
+
# Concurrently process all images using the centralized utility
|
|
43
|
+
image_tasks = [image_source_to_base64(url) for url in msg.image_urls]
|
|
44
|
+
prepared_base64_images = await asyncio.gather(*image_tasks)
|
|
45
|
+
if prepared_base64_images:
|
|
46
|
+
# FIX: Wrap each base64 string in the official ollama.Image object
|
|
47
|
+
msg_dict["images"] = [Image(value=b64_string) for b64_string in prepared_base64_images]
|
|
48
|
+
except Exception as e:
|
|
49
|
+
logger.error(f"Error processing images for Ollama, skipping them. Error: {e}")
|
|
50
|
+
|
|
51
|
+
formatted_messages.append(msg_dict)
|
|
52
|
+
return formatted_messages
|
|
53
|
+
|
|
54
|
+
async def _send_user_message_to_llm(self, user_message: LLMUserMessage, **kwargs) -> CompleteResponse:
|
|
30
55
|
self.add_user_message(user_message)
|
|
56
|
+
|
|
31
57
|
try:
|
|
58
|
+
formatted_messages = await self._format_ollama_messages()
|
|
32
59
|
response: ChatResponse = await self.client.chat(
|
|
33
60
|
model=self.model.value,
|
|
34
|
-
messages=
|
|
61
|
+
messages=formatted_messages
|
|
35
62
|
)
|
|
36
63
|
assistant_message = response['message']['content']
|
|
37
64
|
|
|
38
|
-
# Detect and process reasoning content using <think> markers
|
|
39
65
|
reasoning_content = None
|
|
40
66
|
main_content = assistant_message
|
|
41
67
|
if "<think>" in assistant_message and "</think>" in assistant_message:
|
|
@@ -69,7 +95,7 @@ class OllamaLLM(BaseLLM):
|
|
|
69
95
|
raise
|
|
70
96
|
|
|
71
97
|
async def _stream_user_message_to_llm(
|
|
72
|
-
self, user_message:
|
|
98
|
+
self, user_message: LLMUserMessage, **kwargs
|
|
73
99
|
) -> AsyncGenerator[ChunkResponse, None]:
|
|
74
100
|
self.add_user_message(user_message)
|
|
75
101
|
accumulated_main = ""
|
|
@@ -78,17 +104,16 @@ class OllamaLLM(BaseLLM):
|
|
|
78
104
|
final_response = None
|
|
79
105
|
|
|
80
106
|
try:
|
|
107
|
+
formatted_messages = await self._format_ollama_messages()
|
|
81
108
|
async for part in await self.client.chat(
|
|
82
109
|
model=self.model.value,
|
|
83
|
-
messages=
|
|
110
|
+
messages=formatted_messages,
|
|
84
111
|
stream=True
|
|
85
112
|
):
|
|
86
113
|
token = part['message']['content']
|
|
87
114
|
|
|
88
|
-
# Simple state machine for <think> tags
|
|
89
115
|
if "<think>" in token:
|
|
90
116
|
in_reasoning = True
|
|
91
|
-
# In case token is like "...</think><think>...", handle it
|
|
92
117
|
parts = token.split("<think>")
|
|
93
118
|
token = parts[-1]
|
|
94
119
|
|
|
@@ -130,4 +155,4 @@ class OllamaLLM(BaseLLM):
|
|
|
130
155
|
raise
|
|
131
156
|
|
|
132
157
|
async def cleanup(self):
|
|
133
|
-
await super().cleanup()
|
|
158
|
+
await super().cleanup()
|
|
@@ -1,20 +1,62 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import os
|
|
3
3
|
from abc import ABC
|
|
4
|
-
from typing import Optional, List, AsyncGenerator
|
|
4
|
+
from typing import Optional, List, AsyncGenerator, Dict, Any
|
|
5
5
|
from openai import OpenAI
|
|
6
6
|
from openai.types.completion_usage import CompletionUsage
|
|
7
7
|
from openai.types.chat import ChatCompletionChunk
|
|
8
|
+
import asyncio
|
|
8
9
|
|
|
9
10
|
from autobyteus.llm.base_llm import BaseLLM
|
|
10
11
|
from autobyteus.llm.models import LLMModel
|
|
11
12
|
from autobyteus.llm.utils.llm_config import LLMConfig
|
|
12
|
-
from autobyteus.llm.utils.
|
|
13
|
+
from autobyteus.llm.utils.media_payload_formatter import image_source_to_base64, create_data_uri, get_mime_type, is_valid_image_path
|
|
13
14
|
from autobyteus.llm.utils.token_usage import TokenUsage
|
|
14
15
|
from autobyteus.llm.utils.response_types import CompleteResponse, ChunkResponse
|
|
16
|
+
from autobyteus.llm.user_message import LLMUserMessage
|
|
17
|
+
from autobyteus.llm.utils.messages import Message
|
|
15
18
|
|
|
16
19
|
logger = logging.getLogger(__name__)
|
|
17
20
|
|
|
21
|
+
async def _format_openai_history(messages: List[Message]) -> List[Dict[str, Any]]:
|
|
22
|
+
"""A local async function to format history for the OpenAI SDK, handling image processing."""
|
|
23
|
+
formatted_messages = []
|
|
24
|
+
for msg in messages:
|
|
25
|
+
# For multimodal messages, build the content list of parts
|
|
26
|
+
if msg.image_urls or msg.audio_urls or msg.video_urls:
|
|
27
|
+
content_parts: List[Dict[str, Any]] = []
|
|
28
|
+
if msg.content:
|
|
29
|
+
content_parts.append({"type": "text", "text": msg.content})
|
|
30
|
+
|
|
31
|
+
image_tasks = []
|
|
32
|
+
if msg.image_urls:
|
|
33
|
+
for url in msg.image_urls:
|
|
34
|
+
# Create an async task for each image to process them concurrently
|
|
35
|
+
image_tasks.append(image_source_to_base64(url))
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
base64_images = await asyncio.gather(*image_tasks)
|
|
39
|
+
for i, b64_image in enumerate(base64_images):
|
|
40
|
+
original_url = msg.image_urls[i]
|
|
41
|
+
# Determine mime type from original path if possible, otherwise default
|
|
42
|
+
mime_type = get_mime_type(original_url) if is_valid_image_path(original_url) else "image/jpeg"
|
|
43
|
+
content_parts.append(create_data_uri(mime_type, b64_image))
|
|
44
|
+
except Exception as e:
|
|
45
|
+
logger.error(f"Error processing one or more images: {e}")
|
|
46
|
+
|
|
47
|
+
# Placeholder for future audio/video processing
|
|
48
|
+
if msg.audio_urls:
|
|
49
|
+
logger.warning("OpenAI compatible layer does not yet support audio; skipping.")
|
|
50
|
+
if msg.video_urls:
|
|
51
|
+
logger.warning("OpenAI compatible layer does not yet support video; skipping.")
|
|
52
|
+
|
|
53
|
+
formatted_messages.append({"role": msg.role.value, "content": content_parts})
|
|
54
|
+
else:
|
|
55
|
+
# For text-only messages, use the simple string format
|
|
56
|
+
formatted_messages.append({"role": msg.role.value, "content": msg.content})
|
|
57
|
+
return formatted_messages
|
|
58
|
+
|
|
59
|
+
|
|
18
60
|
class OpenAICompatibleLLM(BaseLLM, ABC):
|
|
19
61
|
def __init__(
|
|
20
62
|
self,
|
|
@@ -24,18 +66,6 @@ class OpenAICompatibleLLM(BaseLLM, ABC):
|
|
|
24
66
|
base_url: str,
|
|
25
67
|
api_key_default: Optional[str] = None
|
|
26
68
|
):
|
|
27
|
-
"""
|
|
28
|
-
Initializes an OpenAI-compatible LLM.
|
|
29
|
-
|
|
30
|
-
Args:
|
|
31
|
-
model (LLMModel): The model to use.
|
|
32
|
-
llm_config (LLMConfig): Configuration for the LLM.
|
|
33
|
-
api_key_env_var (str): The name of the environment variable for the API key.
|
|
34
|
-
base_url (str): The base URL for the API.
|
|
35
|
-
api_key_default (Optional[str], optional): A default API key to use if the
|
|
36
|
-
environment variable is not set.
|
|
37
|
-
Defaults to None.
|
|
38
|
-
"""
|
|
39
69
|
api_key = os.getenv(api_key_env_var)
|
|
40
70
|
if not api_key:
|
|
41
71
|
if api_key_default:
|
|
@@ -49,13 +79,11 @@ class OpenAICompatibleLLM(BaseLLM, ABC):
|
|
|
49
79
|
logger.info(f"Initialized OpenAI compatible client with base_url: {base_url}")
|
|
50
80
|
|
|
51
81
|
super().__init__(model=model, llm_config=llm_config)
|
|
52
|
-
self.max_tokens = 8000
|
|
82
|
+
self.max_tokens = 8000
|
|
53
83
|
|
|
54
84
|
def _create_token_usage(self, usage_data: Optional[CompletionUsage]) -> Optional[TokenUsage]:
|
|
55
|
-
"""Convert usage data to TokenUsage format."""
|
|
56
85
|
if not usage_data:
|
|
57
86
|
return None
|
|
58
|
-
|
|
59
87
|
return TokenUsage(
|
|
60
88
|
prompt_tokens=usage_data.prompt_tokens,
|
|
61
89
|
completion_tokens=usage_data.completion_tokens,
|
|
@@ -63,53 +91,41 @@ class OpenAICompatibleLLM(BaseLLM, ABC):
|
|
|
63
91
|
)
|
|
64
92
|
|
|
65
93
|
async def _send_user_message_to_llm(
|
|
66
|
-
self, user_message:
|
|
94
|
+
self, user_message: LLMUserMessage, **kwargs
|
|
67
95
|
) -> CompleteResponse:
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
Supports optional reasoning content if provided in the response.
|
|
71
|
-
"""
|
|
72
|
-
content = []
|
|
73
|
-
|
|
74
|
-
if user_message:
|
|
75
|
-
content.append({"type": "text", "text": user_message})
|
|
76
|
-
|
|
77
|
-
if image_urls:
|
|
78
|
-
for image_url in image_urls:
|
|
79
|
-
try:
|
|
80
|
-
image_content = process_image(image_url)
|
|
81
|
-
content.append(image_content)
|
|
82
|
-
logger.info(f"Processed image: {image_url}")
|
|
83
|
-
except ValueError as e:
|
|
84
|
-
logger.error(f"Error processing image {image_url}: {str(e)}")
|
|
85
|
-
continue
|
|
86
|
-
|
|
87
|
-
self.add_user_message(content)
|
|
88
|
-
logger.debug(f"Prepared message content: {content}")
|
|
89
|
-
|
|
96
|
+
self.add_user_message(user_message)
|
|
97
|
+
|
|
90
98
|
try:
|
|
99
|
+
formatted_messages = await _format_openai_history(self.messages)
|
|
91
100
|
logger.info(f"Sending request to {self.model.provider.value} API")
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
101
|
+
|
|
102
|
+
params: Dict[str, Any] = {
|
|
103
|
+
"model": self.model.value,
|
|
104
|
+
"messages": formatted_messages,
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
if self.config.uses_max_completion_tokens:
|
|
108
|
+
params["max_completion_tokens"] = self.max_tokens
|
|
109
|
+
else:
|
|
110
|
+
params["max_tokens"] = self.max_tokens
|
|
111
|
+
|
|
112
|
+
response = self.client.chat.completions.create(**params)
|
|
97
113
|
full_message = response.choices[0].message
|
|
98
114
|
|
|
99
|
-
#
|
|
115
|
+
# --- PRESERVED ORIGINAL LOGIC ---
|
|
100
116
|
reasoning = None
|
|
101
117
|
if hasattr(full_message, "reasoning_content") and full_message.reasoning_content:
|
|
102
118
|
reasoning = full_message.reasoning_content
|
|
103
119
|
elif "reasoning_content" in full_message and full_message["reasoning_content"]:
|
|
104
120
|
reasoning = full_message["reasoning_content"]
|
|
105
121
|
|
|
106
|
-
# Extract main content
|
|
107
122
|
main_content = ""
|
|
108
123
|
if hasattr(full_message, "content") and full_message.content:
|
|
109
124
|
main_content = full_message.content
|
|
110
125
|
elif "content" in full_message and full_message["content"]:
|
|
111
126
|
main_content = full_message["content"]
|
|
112
|
-
|
|
127
|
+
# --- END PRESERVED LOGIC ---
|
|
128
|
+
|
|
113
129
|
self.add_assistant_message(main_content, reasoning_content=reasoning)
|
|
114
130
|
|
|
115
131
|
token_usage = self._create_token_usage(response.usage)
|
|
@@ -125,43 +141,30 @@ class OpenAICompatibleLLM(BaseLLM, ABC):
|
|
|
125
141
|
raise ValueError(f"Error in {self.model.provider.value} API request: {str(e)}")
|
|
126
142
|
|
|
127
143
|
async def _stream_user_message_to_llm(
|
|
128
|
-
self, user_message:
|
|
144
|
+
self, user_message: LLMUserMessage, **kwargs
|
|
129
145
|
) -> AsyncGenerator[ChunkResponse, None]:
|
|
130
|
-
|
|
131
|
-
Streams the response from an OpenAI-compatible API.
|
|
132
|
-
Yields reasoning and content in separate chunks.
|
|
133
|
-
"""
|
|
134
|
-
content = []
|
|
135
|
-
|
|
136
|
-
if user_message:
|
|
137
|
-
content.append({"type": "text", "text": user_message})
|
|
138
|
-
|
|
139
|
-
if image_urls:
|
|
140
|
-
for image_url in image_urls:
|
|
141
|
-
try:
|
|
142
|
-
image_content = process_image(image_url)
|
|
143
|
-
content.append(image_content)
|
|
144
|
-
logger.info(f"Processed image for streaming: {image_url}")
|
|
145
|
-
except ValueError as e:
|
|
146
|
-
logger.error(f"Error processing image for streaming {image_url}: {str(e)}")
|
|
147
|
-
continue
|
|
146
|
+
self.add_user_message(user_message)
|
|
148
147
|
|
|
149
|
-
self.add_user_message(content)
|
|
150
|
-
logger.debug(f"Prepared streaming message content: {content}")
|
|
151
|
-
|
|
152
|
-
# Initialize variables to track reasoning and main content
|
|
153
148
|
accumulated_reasoning = ""
|
|
154
149
|
accumulated_content = ""
|
|
155
150
|
|
|
156
151
|
try:
|
|
152
|
+
formatted_messages = await _format_openai_history(self.messages)
|
|
157
153
|
logger.info(f"Starting streaming request to {self.model.provider.value} API")
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
stream
|
|
163
|
-
stream_options
|
|
164
|
-
|
|
154
|
+
|
|
155
|
+
params: Dict[str, Any] = {
|
|
156
|
+
"model": self.model.value,
|
|
157
|
+
"messages": formatted_messages,
|
|
158
|
+
"stream": True,
|
|
159
|
+
"stream_options": {"include_usage": True},
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
if self.config.uses_max_completion_tokens:
|
|
163
|
+
params["max_completion_tokens"] = self.max_tokens
|
|
164
|
+
else:
|
|
165
|
+
params["max_tokens"] = self.max_tokens
|
|
166
|
+
|
|
167
|
+
stream = self.client.chat.completions.create(**params)
|
|
165
168
|
|
|
166
169
|
for chunk in stream:
|
|
167
170
|
chunk: ChatCompletionChunk
|
|
@@ -170,25 +173,23 @@ class OpenAICompatibleLLM(BaseLLM, ABC):
|
|
|
170
173
|
|
|
171
174
|
delta = chunk.choices[0].delta
|
|
172
175
|
|
|
173
|
-
#
|
|
174
|
-
reasoning_chunk =
|
|
176
|
+
# --- PRESERVED ORIGINAL LOGIC (adapted for streaming) ---
|
|
177
|
+
reasoning_chunk = None
|
|
178
|
+
if hasattr(delta, "reasoning_content") and delta.reasoning_content:
|
|
179
|
+
reasoning_chunk = delta.reasoning_content
|
|
180
|
+
elif isinstance(delta, dict) and "reasoning_content" in delta and delta["reasoning_content"]:
|
|
181
|
+
reasoning_chunk = delta["reasoning_content"]
|
|
182
|
+
|
|
175
183
|
if reasoning_chunk:
|
|
176
184
|
accumulated_reasoning += reasoning_chunk
|
|
177
|
-
yield ChunkResponse(
|
|
178
|
-
|
|
179
|
-
reasoning=reasoning_chunk
|
|
180
|
-
)
|
|
185
|
+
yield ChunkResponse(content="", reasoning=reasoning_chunk)
|
|
186
|
+
# --- END PRESERVED LOGIC ---
|
|
181
187
|
|
|
182
|
-
# Process main content tokens
|
|
183
188
|
main_token = delta.content
|
|
184
189
|
if main_token:
|
|
185
190
|
accumulated_content += main_token
|
|
186
|
-
yield ChunkResponse(
|
|
187
|
-
content=main_token,
|
|
188
|
-
reasoning=None
|
|
189
|
-
)
|
|
191
|
+
yield ChunkResponse(content=main_token, reasoning=None)
|
|
190
192
|
|
|
191
|
-
# Yield token usage if available in the final chunk
|
|
192
193
|
if hasattr(chunk, "usage") and chunk.usage is not None:
|
|
193
194
|
token_usage = self._create_token_usage(chunk.usage)
|
|
194
195
|
yield ChunkResponse(
|
|
@@ -198,7 +199,6 @@ class OpenAICompatibleLLM(BaseLLM, ABC):
|
|
|
198
199
|
usage=token_usage
|
|
199
200
|
)
|
|
200
201
|
|
|
201
|
-
# After streaming, add the fully accumulated assistant message to history
|
|
202
202
|
self.add_assistant_message(accumulated_content, reasoning_content=accumulated_reasoning)
|
|
203
203
|
logger.info(f"Completed streaming response from {self.model.provider.value} API")
|
|
204
204
|
|
|
@@ -49,7 +49,7 @@ class AutobyteusModelProvider:
|
|
|
49
49
|
try:
|
|
50
50
|
# Instantiate client for this specific host
|
|
51
51
|
client = AutobyteusClient(server_url=host_url)
|
|
52
|
-
response = client.
|
|
52
|
+
response = client.get_available_llm_models_sync()
|
|
53
53
|
except Exception as e:
|
|
54
54
|
logger.warning(f"Could not connect or fetch models from Autobyteus server at {host_url}: {e}")
|
|
55
55
|
continue
|