autobyteus 1.1.1__py3-none-any.whl → 1.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. autobyteus/agent/bootstrap_steps/__init__.py +2 -0
  2. autobyteus/agent/bootstrap_steps/agent_bootstrapper.py +2 -0
  3. autobyteus/agent/bootstrap_steps/mcp_server_prewarming_step.py +71 -0
  4. autobyteus/agent/llm_response_processor/provider_aware_tool_usage_processor.py +41 -12
  5. autobyteus/agent/runtime/agent_runtime.py +1 -4
  6. autobyteus/agent/runtime/agent_worker.py +56 -23
  7. autobyteus/agent/shutdown_steps/__init__.py +17 -0
  8. autobyteus/agent/shutdown_steps/agent_shutdown_orchestrator.py +63 -0
  9. autobyteus/agent/shutdown_steps/base_shutdown_step.py +33 -0
  10. autobyteus/agent/shutdown_steps/llm_instance_cleanup_step.py +45 -0
  11. autobyteus/agent/shutdown_steps/mcp_server_cleanup_step.py +32 -0
  12. autobyteus/llm/api/deepseek_llm.py +10 -172
  13. autobyteus/llm/api/grok_llm.py +10 -171
  14. autobyteus/llm/api/kimi_llm.py +24 -0
  15. autobyteus/llm/api/openai_compatible_llm.py +193 -0
  16. autobyteus/llm/api/openai_llm.py +11 -139
  17. autobyteus/llm/llm_factory.py +62 -0
  18. autobyteus/llm/providers.py +1 -0
  19. autobyteus/llm/token_counter/kimi_token_counter.py +24 -0
  20. autobyteus/llm/token_counter/token_counter_factory.py +3 -0
  21. autobyteus/llm/utils/messages.py +3 -3
  22. autobyteus/tools/base_tool.py +2 -0
  23. autobyteus/tools/mcp/__init__.py +10 -7
  24. autobyteus/tools/mcp/call_handlers/__init__.py +0 -2
  25. autobyteus/tools/mcp/config_service.py +1 -6
  26. autobyteus/tools/mcp/factory.py +12 -26
  27. autobyteus/tools/mcp/registrar.py +57 -178
  28. autobyteus/tools/mcp/server/__init__.py +16 -0
  29. autobyteus/tools/mcp/server/base_managed_mcp_server.py +139 -0
  30. autobyteus/tools/mcp/server/http_managed_mcp_server.py +29 -0
  31. autobyteus/tools/mcp/server/proxy.py +36 -0
  32. autobyteus/tools/mcp/server/stdio_managed_mcp_server.py +33 -0
  33. autobyteus/tools/mcp/server_instance_manager.py +93 -0
  34. autobyteus/tools/mcp/tool.py +28 -46
  35. autobyteus/tools/mcp/tool_registrar.py +177 -0
  36. autobyteus/tools/mcp/types.py +10 -21
  37. autobyteus/tools/registry/tool_definition.py +11 -2
  38. autobyteus/tools/registry/tool_registry.py +27 -28
  39. autobyteus/tools/usage/parsers/_json_extractor.py +99 -0
  40. autobyteus/tools/usage/parsers/default_json_tool_usage_parser.py +46 -77
  41. autobyteus/tools/usage/parsers/default_xml_tool_usage_parser.py +87 -97
  42. autobyteus/tools/usage/parsers/gemini_json_tool_usage_parser.py +38 -46
  43. autobyteus/tools/usage/parsers/openai_json_tool_usage_parser.py +104 -154
  44. {autobyteus-1.1.1.dist-info → autobyteus-1.1.3.dist-info}/METADATA +4 -2
  45. {autobyteus-1.1.1.dist-info → autobyteus-1.1.3.dist-info}/RECORD +48 -32
  46. autobyteus/tools/mcp/call_handlers/sse_handler.py +0 -22
  47. {autobyteus-1.1.1.dist-info → autobyteus-1.1.3.dist-info}/WHEEL +0 -0
  48. {autobyteus-1.1.1.dist-info → autobyteus-1.1.3.dist-info}/licenses/LICENSE +0 -0
  49. {autobyteus-1.1.1.dist-info → autobyteus-1.1.3.dist-info}/top_level.txt +0 -0
@@ -1,188 +1,26 @@
1
1
  import logging
2
- import os
3
- from typing import Optional, List, AsyncGenerator
4
- from openai import OpenAI
5
- from openai.types.completion_usage import CompletionUsage
6
- from openai.types.chat import ChatCompletionChunk
7
- from autobyteus.llm.base_llm import BaseLLM
2
+ from typing import Optional
8
3
  from autobyteus.llm.models import LLMModel
9
4
  from autobyteus.llm.utils.llm_config import LLMConfig
10
- from autobyteus.llm.utils.messages import MessageRole
11
- from autobyteus.llm.utils.image_payload_formatter import process_image
12
- from autobyteus.llm.utils.token_usage import TokenUsage
13
- from autobyteus.llm.utils.response_types import CompleteResponse, ChunkResponse
5
+ from autobyteus.llm.api.openai_compatible_llm import OpenAICompatibleLLM
14
6
 
15
7
  logger = logging.getLogger(__name__)
16
8
 
17
- class DeepSeekLLM(BaseLLM):
9
+ class DeepSeekLLM(OpenAICompatibleLLM):
18
10
  def __init__(self, model: LLMModel = None, llm_config: LLMConfig = None):
19
- deepseek_api_key = os.getenv("DEEPSEEK_API_KEY")
20
- if not deepseek_api_key:
21
- logger.error("DEEPSEEK_API_KEY environment variable is not set.")
22
- raise ValueError("DEEPSEEK_API_KEY environment variable is not set.")
23
-
24
- self.client = OpenAI(api_key=deepseek_api_key, base_url="https://api.deepseek.com")
25
- logger.info("DeepSeek API key and base URL set successfully")
26
-
27
11
  # Provide defaults if not specified
28
12
  if model is None:
29
- model = LLMModel.deepseek_chat
13
+ model = LLMModel['deepseek-chat']
30
14
  if llm_config is None:
31
15
  llm_config = LLMConfig()
32
16
 
33
- super().__init__(model=model, llm_config=llm_config)
34
- self.max_tokens = 8000
35
-
36
- def _create_token_usage(self, usage_data: Optional[CompletionUsage]) -> Optional[TokenUsage]:
37
- """Convert usage data to TokenUsage format."""
38
- if not usage_data:
39
- return None
40
-
41
- return TokenUsage(
42
- prompt_tokens=usage_data.prompt_tokens,
43
- completion_tokens=usage_data.completion_tokens,
44
- total_tokens=usage_data.total_tokens
17
+ super().__init__(
18
+ model=model,
19
+ llm_config=llm_config,
20
+ api_key_env_var="DEEPSEEK_API_KEY",
21
+ base_url="https://api.deepseek.com"
45
22
  )
23
+ logger.info(f"DeepSeekLLM initialized with model: {self.model}")
46
24
 
47
- async def _send_user_message_to_llm(
48
- self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs
49
- ) -> CompleteResponse:
50
- """
51
- Sends a non-streaming request to the DeepSeek API.
52
- Supports optional reasoning content if provided in the response.
53
- """
54
- content = []
55
-
56
- if user_message:
57
- content.append({"type": "text", "text": user_message})
58
-
59
- if image_urls:
60
- for image_url in image_urls:
61
- try:
62
- image_content = process_image(image_url)
63
- content.append(image_content)
64
- logger.info(f"Processed image: {image_url}")
65
- except ValueError as e:
66
- logger.error(f"Error processing image {image_url}: {str(e)}")
67
- continue
68
-
69
- self.add_user_message(content)
70
- logger.debug(f"Prepared message content: {content}")
71
-
72
- try:
73
- logger.info("Sending request to DeepSeek API")
74
- response = self.client.chat.completions.create(
75
- model=self.model.value,
76
- messages=[msg.to_dict() for msg in self.messages],
77
- max_tokens=self.max_tokens,
78
- )
79
- full_message = response.choices.message
80
-
81
- # Extract reasoning_content if present
82
- reasoning = None
83
- if hasattr(full_message, "reasoning_content") and full_message.reasoning_content:
84
- reasoning = full_message.reasoning_content
85
- elif "reasoning_content" in full_message and full_message["reasoning_content"]:
86
- reasoning = full_message["reasoning_content"]
87
-
88
- # Extract main content
89
- main_content = ""
90
- if hasattr(full_message, "content") and full_message.content:
91
- main_content = full_message.content
92
- elif "content" in full_message and full_message["content"]:
93
- main_content = full_message["content"]
94
-
95
- self.add_assistant_message(main_content, reasoning_content=reasoning)
96
-
97
- token_usage = self._create_token_usage(response.usage)
98
- logger.info("Received response from DeepSeek API with usage data")
99
-
100
- return CompleteResponse(
101
- content=main_content,
102
- reasoning=reasoning,
103
- usage=token_usage
104
- )
105
- except Exception as e:
106
- logger.error(f"Error in DeepSeek API request: {str(e)}")
107
- raise ValueError(f"Error in DeepSeek API request: {str(e)}")
108
-
109
- async def _stream_user_message_to_llm(
110
- self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs
111
- ) -> AsyncGenerator[ChunkResponse, None]:
112
- """
113
- Streams the response from the DeepSeek API.
114
- Yields reasoning and content in separate chunks.
115
- """
116
- content = []
117
-
118
- if user_message:
119
- content.append({"type": "text", "text": user_message})
120
-
121
- if image_urls:
122
- for image_url in image_urls:
123
- try:
124
- image_content = process_image(image_url)
125
- content.append(image_content)
126
- logger.info(f"Processed image for streaming: {image_url}")
127
- except ValueError as e:
128
- logger.error(f"Error processing image for streaming {image_url}: {str(e)}")
129
- continue
130
-
131
- self.add_user_message(content)
132
- logger.debug(f"Prepared streaming message content: {content}")
133
-
134
- # Initialize variables to track reasoning and main content
135
- accumulated_reasoning = ""
136
- accumulated_content = ""
137
-
138
- try:
139
- logger.info("Starting streaming request to DeepSeek API")
140
- stream = self.client.chat.completions.create(
141
- model=self.model.value,
142
- messages=[msg.to_dict() for msg in self.messages],
143
- max_tokens=self.max_tokens,
144
- stream=True,
145
- stream_options={"include_usage": True}
146
- )
147
-
148
- for chunk in stream:
149
- chunk: ChatCompletionChunk
150
-
151
- # Process reasoning tokens
152
- reasoning_chunk = getattr(chunk.choices.delta, "reasoning_content", None)
153
- if reasoning_chunk:
154
- accumulated_reasoning += reasoning_chunk
155
- yield ChunkResponse(
156
- content="",
157
- reasoning=reasoning_chunk
158
- )
159
-
160
- # Process main content tokens
161
- main_token = chunk.choices.delta.content
162
- if main_token:
163
- accumulated_content += main_token
164
- yield ChunkResponse(
165
- content=main_token,
166
- reasoning=None
167
- )
168
-
169
- # Yield token usage if available in the final chunk
170
- if hasattr(chunk, "usage") and chunk.usage is not None:
171
- token_usage = self._create_token_usage(chunk.usage)
172
- yield ChunkResponse(
173
- content="",
174
- reasoning=None,
175
- is_complete=True,
176
- usage=token_usage
177
- )
178
-
179
- # After streaming, add the fully accumulated assistant message to history
180
- self.add_assistant_message(accumulated_content, reasoning_content=accumulated_reasoning)
181
- logger.info("Completed streaming response from DeepSeek API")
182
-
183
- except Exception as e:
184
- logger.error(f"Error in DeepSeek API streaming: {str(e)}")
185
- raise ValueError(f"Error in DeepSeek API streaming: {str(e)}")
186
-
187
25
  async def cleanup(self):
188
26
  await super().cleanup()
@@ -1,187 +1,26 @@
1
1
  import logging
2
- import os
3
- from typing import Optional, List, AsyncGenerator
4
- from openai import OpenAI
5
- from openai.types.completion_usage import CompletionUsage
6
- from openai.types.chat import ChatCompletionChunk
7
- from autobyteus.llm.base_llm import BaseLLM
2
+ from typing import Optional
8
3
  from autobyteus.llm.models import LLMModel
9
4
  from autobyteus.llm.utils.llm_config import LLMConfig
10
- from autobyteus.llm.utils.messages import MessageRole
11
- from autobyteus.llm.utils.image_payload_formatter import process_image
12
- from autobyteus.llm.utils.token_usage import TokenUsage
13
- from autobyteus.llm.utils.response_types import CompleteResponse, ChunkResponse
5
+ from autobyteus.llm.api.openai_compatible_llm import OpenAICompatibleLLM
14
6
 
15
7
  logger = logging.getLogger(__name__)
16
8
 
17
- class GrokLLM(BaseLLM):
9
+ class GrokLLM(OpenAICompatibleLLM):
18
10
  def __init__(self, model: LLMModel = None, llm_config: LLMConfig = None):
19
- grok_api_key = os.getenv("GROK_API_KEY")
20
- if not grok_api_key:
21
- logger.error("GROK_API_KEY environment variable is not set.")
22
- raise ValueError("GROK_API_KEY environment variable is not set.")
23
-
24
- self.client = OpenAI(api_key=grok_api_key, base_url="https://api.x.ai/v1")
25
- logger.info("Grok API key and base URL set successfully")
26
-
27
11
  # Provide defaults if not specified
28
12
  if model is None:
29
- model = LLMModel.grok_2_1212
13
+ model = LLMModel['grok-2-1212']
30
14
  if llm_config is None:
31
15
  llm_config = LLMConfig()
32
16
 
33
- super().__init__(model=model, llm_config=llm_config)
34
- self.max_tokens = 8000
35
-
36
- def _create_token_usage(self, usage_data: Optional[CompletionUsage]) -> Optional[TokenUsage]:
37
- """Convert usage data to TokenUsage format."""
38
- if not usage_data:
39
- return None
40
-
41
- return TokenUsage(
42
- prompt_tokens=usage_data.prompt_tokens,
43
- completion_tokens=usage_data.completion_tokens,
44
- total_tokens=usage_data.total_tokens
17
+ super().__init__(
18
+ model=model,
19
+ llm_config=llm_config,
20
+ api_key_env_var="GROK_API_KEY",
21
+ base_url="https://api.x.ai/v1"
45
22
  )
23
+ logger.info(f"GrokLLM initialized with model: {self.model}")
46
24
 
47
- async def _send_user_message_to_llm(
48
- self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs
49
- ) -> CompleteResponse:
50
- """
51
- Sends a non-streaming request to the Grok API.
52
- """
53
- content = []
54
-
55
- if user_message:
56
- content.append({"type": "text", "text": user_message})
57
-
58
- if image_urls:
59
- for image_url in image_urls:
60
- try:
61
- image_content = process_image(image_url)
62
- content.append(image_content)
63
- logger.info(f"Processed image: {image_url}")
64
- except ValueError as e:
65
- logger.error(f"Error processing image {image_url}: {str(e)}")
66
- continue
67
-
68
- self.add_user_message(content)
69
- logger.debug(f"Prepared message content: {content}")
70
-
71
- try:
72
- logger.info("Sending request to Grok API")
73
- response = self.client.chat.completions.create(
74
- model=self.model.value,
75
- messages=[msg.to_dict() for msg in self.messages],
76
- max_tokens=self.max_tokens,
77
- )
78
- full_message = response.choices.message
79
-
80
- # Extract reasoning_content if present
81
- reasoning = None
82
- if hasattr(full_message, "reasoning_content") and full_message.reasoning_content:
83
- reasoning = full_message.reasoning_content
84
- elif "reasoning_content" in full_message and full_message["reasoning_content"]:
85
- reasoning = full_message["reasoning_content"]
86
-
87
- # Extract main content
88
- main_content = ""
89
- if hasattr(full_message, "content") and full_message.content:
90
- main_content = full_message.content
91
- elif "content" in full_message and full_message["content"]:
92
- main_content = full_message["content"]
93
-
94
- self.add_assistant_message(main_content, reasoning_content=reasoning)
95
-
96
- token_usage = self._create_token_usage(response.usage)
97
- logger.info("Received response from Grok API with usage data")
98
-
99
- return CompleteResponse(
100
- content=main_content,
101
- reasoning=reasoning,
102
- usage=token_usage
103
- )
104
- except Exception as e:
105
- logger.error(f"Error in Grok API request: {str(e)}")
106
- raise ValueError(f"Error in Grok API request: {str(e)}")
107
-
108
- async def _stream_user_message_to_llm(
109
- self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs
110
- ) -> AsyncGenerator[ChunkResponse, None]:
111
- """
112
- Streams the response from the Grok API.
113
- Yields reasoning and content in separate chunks.
114
- """
115
- content = []
116
-
117
- if user_message:
118
- content.append({"type": "text", "text": user_message})
119
-
120
- if image_urls:
121
- for image_url in image_urls:
122
- try:
123
- image_content = process_image(image_url)
124
- content.append(image_content)
125
- logger.info(f"Processed image for streaming: {image_url}")
126
- except ValueError as e:
127
- logger.error(f"Error processing image for streaming {image_url}: {str(e)}")
128
- continue
129
-
130
- self.add_user_message(content)
131
- logger.debug(f"Prepared streaming message content: {content}")
132
-
133
- # Initialize variables to track reasoning and main content
134
- accumulated_reasoning = ""
135
- accumulated_content = ""
136
-
137
- try:
138
- logger.info("Starting streaming request to Grok API")
139
- stream = self.client.chat.completions.create(
140
- model=self.model.value,
141
- messages=[msg.to_dict() for msg in self.messages],
142
- max_tokens=self.max_tokens,
143
- stream=True,
144
- stream_options={"include_usage": True}
145
- )
146
-
147
- for chunk in stream:
148
- chunk: ChatCompletionChunk
149
-
150
- # Process reasoning tokens
151
- reasoning_chunk = getattr(chunk.choices.delta, "reasoning_content", None)
152
- if reasoning_chunk:
153
- accumulated_reasoning += reasoning_chunk
154
- yield ChunkResponse(
155
- content="",
156
- reasoning=reasoning_chunk
157
- )
158
-
159
- # Process main content tokens
160
- main_token = chunk.choices.delta.content
161
- if main_token:
162
- accumulated_content += main_token
163
- yield ChunkResponse(
164
- content=main_token,
165
- reasoning=None
166
- )
167
-
168
- # Yield token usage if available in the final chunk
169
- if hasattr(chunk, "usage") and chunk.usage is not None:
170
- token_usage = self._create_token_usage(chunk.usage)
171
- yield ChunkResponse(
172
- content="",
173
- reasoning=None,
174
- is_complete=True,
175
- usage=token_usage
176
- )
177
-
178
- # After streaming, add the fully accumulated assistant message to history
179
- self.add_assistant_message(accumulated_content, reasoning_content=accumulated_reasoning)
180
- logger.info("Completed streaming response from Grok API")
181
-
182
- except Exception as e:
183
- logger.error(f"Error in Grok API streaming: {str(e)}")
184
- raise ValueError(f"Error in Grok API streaming: {str(e)}")
185
-
186
25
  async def cleanup(self):
187
26
  await super().cleanup()
@@ -0,0 +1,24 @@
1
+ import logging
2
+ from typing import Optional
3
+ from autobyteus.llm.models import LLMModel
4
+ from autobyteus.llm.utils.llm_config import LLMConfig
5
+ from autobyteus.llm.api.openai_compatible_llm import OpenAICompatibleLLM
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ class KimiLLM(OpenAICompatibleLLM):
10
+ def __init__(self, model: LLMModel = None, llm_config: LLMConfig = None):
11
+ # Provide defaults if not specified
12
+ if model is None:
13
+ # Setting a default Kimi model from the factory ones
14
+ model = LLMModel['kimi-latest']
15
+ if llm_config is None:
16
+ llm_config = LLMConfig()
17
+
18
+ super().__init__(
19
+ model=model,
20
+ llm_config=llm_config,
21
+ api_key_env_var="KIMI_API_KEY",
22
+ base_url="https://api.moonshot.cn/v1"
23
+ )
24
+ logger.info(f"KimiLLM initialized with model: {self.model}")
@@ -0,0 +1,193 @@
1
+ import logging
2
+ import os
3
+ from abc import ABC
4
+ from typing import Optional, List, AsyncGenerator
5
+ from openai import OpenAI
6
+ from openai.types.completion_usage import CompletionUsage
7
+ from openai.types.chat import ChatCompletionChunk
8
+
9
+ from autobyteus.llm.base_llm import BaseLLM
10
+ from autobyteus.llm.models import LLMModel
11
+ from autobyteus.llm.utils.llm_config import LLMConfig
12
+ from autobyteus.llm.utils.image_payload_formatter import process_image
13
+ from autobyteus.llm.utils.token_usage import TokenUsage
14
+ from autobyteus.llm.utils.response_types import CompleteResponse, ChunkResponse
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ class OpenAICompatibleLLM(BaseLLM, ABC):
19
+ def __init__(
20
+ self,
21
+ model: LLMModel,
22
+ llm_config: LLMConfig,
23
+ api_key_env_var: str,
24
+ base_url: str
25
+ ):
26
+ api_key = os.getenv(api_key_env_var)
27
+ if not api_key:
28
+ logger.error(f"{api_key_env_var} environment variable is not set.")
29
+ raise ValueError(f"{api_key_env_var} environment variable is not set.")
30
+
31
+ self.client = OpenAI(api_key=api_key, base_url=base_url)
32
+ logger.info(f"Initialized OpenAI compatible client with base_url: {base_url}")
33
+
34
+ super().__init__(model=model, llm_config=llm_config)
35
+ self.max_tokens = 8000 # A default, can be overridden by subclass or config
36
+
37
+ def _create_token_usage(self, usage_data: Optional[CompletionUsage]) -> Optional[TokenUsage]:
38
+ """Convert usage data to TokenUsage format."""
39
+ if not usage_data:
40
+ return None
41
+
42
+ return TokenUsage(
43
+ prompt_tokens=usage_data.prompt_tokens,
44
+ completion_tokens=usage_data.completion_tokens,
45
+ total_tokens=usage_data.total_tokens
46
+ )
47
+
48
+ async def _send_user_message_to_llm(
49
+ self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs
50
+ ) -> CompleteResponse:
51
+ """
52
+ Sends a non-streaming request to an OpenAI-compatible API.
53
+ Supports optional reasoning content if provided in the response.
54
+ """
55
+ content = []
56
+
57
+ if user_message:
58
+ content.append({"type": "text", "text": user_message})
59
+
60
+ if image_urls:
61
+ for image_url in image_urls:
62
+ try:
63
+ image_content = process_image(image_url)
64
+ content.append(image_content)
65
+ logger.info(f"Processed image: {image_url}")
66
+ except ValueError as e:
67
+ logger.error(f"Error processing image {image_url}: {str(e)}")
68
+ continue
69
+
70
+ self.add_user_message(content)
71
+ logger.debug(f"Prepared message content: {content}")
72
+
73
+ try:
74
+ logger.info(f"Sending request to {self.model.provider.value} API")
75
+ response = self.client.chat.completions.create(
76
+ model=self.model.value,
77
+ messages=[msg.to_dict() for msg in self.messages],
78
+ max_tokens=self.max_tokens,
79
+ )
80
+ full_message = response.choices[0].message
81
+
82
+ # Extract reasoning_content if present
83
+ reasoning = None
84
+ if hasattr(full_message, "reasoning_content") and full_message.reasoning_content:
85
+ reasoning = full_message.reasoning_content
86
+ elif "reasoning_content" in full_message and full_message["reasoning_content"]:
87
+ reasoning = full_message["reasoning_content"]
88
+
89
+ # Extract main content
90
+ main_content = ""
91
+ if hasattr(full_message, "content") and full_message.content:
92
+ main_content = full_message.content
93
+ elif "content" in full_message and full_message["content"]:
94
+ main_content = full_message["content"]
95
+
96
+ self.add_assistant_message(main_content, reasoning_content=reasoning)
97
+
98
+ token_usage = self._create_token_usage(response.usage)
99
+ logger.info(f"Received response from {self.model.provider.value} API with usage data")
100
+
101
+ return CompleteResponse(
102
+ content=main_content,
103
+ reasoning=reasoning,
104
+ usage=token_usage
105
+ )
106
+ except Exception as e:
107
+ logger.error(f"Error in {self.model.provider.value} API request: {str(e)}")
108
+ raise ValueError(f"Error in {self.model.provider.value} API request: {str(e)}")
109
+
110
+ async def _stream_user_message_to_llm(
111
+ self, user_message: str, image_urls: Optional[List[str]] = None, **kwargs
112
+ ) -> AsyncGenerator[ChunkResponse, None]:
113
+ """
114
+ Streams the response from an OpenAI-compatible API.
115
+ Yields reasoning and content in separate chunks.
116
+ """
117
+ content = []
118
+
119
+ if user_message:
120
+ content.append({"type": "text", "text": user_message})
121
+
122
+ if image_urls:
123
+ for image_url in image_urls:
124
+ try:
125
+ image_content = process_image(image_url)
126
+ content.append(image_content)
127
+ logger.info(f"Processed image for streaming: {image_url}")
128
+ except ValueError as e:
129
+ logger.error(f"Error processing image for streaming {image_url}: {str(e)}")
130
+ continue
131
+
132
+ self.add_user_message(content)
133
+ logger.debug(f"Prepared streaming message content: {content}")
134
+
135
+ # Initialize variables to track reasoning and main content
136
+ accumulated_reasoning = ""
137
+ accumulated_content = ""
138
+
139
+ try:
140
+ logger.info(f"Starting streaming request to {self.model.provider.value} API")
141
+ stream = self.client.chat.completions.create(
142
+ model=self.model.value,
143
+ messages=[msg.to_dict() for msg in self.messages],
144
+ max_tokens=self.max_tokens,
145
+ stream=True,
146
+ stream_options={"include_usage": True}
147
+ )
148
+
149
+ for chunk in stream:
150
+ chunk: ChatCompletionChunk
151
+ if not chunk.choices:
152
+ continue
153
+
154
+ delta = chunk.choices[0].delta
155
+
156
+ # Process reasoning tokens (if supported by model)
157
+ reasoning_chunk = getattr(delta, "reasoning_content", None)
158
+ if reasoning_chunk:
159
+ accumulated_reasoning += reasoning_chunk
160
+ yield ChunkResponse(
161
+ content="",
162
+ reasoning=reasoning_chunk
163
+ )
164
+
165
+ # Process main content tokens
166
+ main_token = delta.content
167
+ if main_token:
168
+ accumulated_content += main_token
169
+ yield ChunkResponse(
170
+ content=main_token,
171
+ reasoning=None
172
+ )
173
+
174
+ # Yield token usage if available in the final chunk
175
+ if hasattr(chunk, "usage") and chunk.usage is not None:
176
+ token_usage = self._create_token_usage(chunk.usage)
177
+ yield ChunkResponse(
178
+ content="",
179
+ reasoning=None,
180
+ is_complete=True,
181
+ usage=token_usage
182
+ )
183
+
184
+ # After streaming, add the fully accumulated assistant message to history
185
+ self.add_assistant_message(accumulated_content, reasoning_content=accumulated_reasoning)
186
+ logger.info(f"Completed streaming response from {self.model.provider.value} API")
187
+
188
+ except Exception as e:
189
+ logger.error(f"Error in {self.model.provider.value} API streaming: {str(e)}")
190
+ raise ValueError(f"Error in {self.model.provider.value} API streaming: {str(e)}")
191
+
192
+ async def cleanup(self):
193
+ await super().cleanup()