praisonaiagents 0.0.144__py3-none-any.whl → 0.0.146__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- praisonaiagents/__init__.py +71 -7
- praisonaiagents/agent/__init__.py +2 -1
- praisonaiagents/agent/agent.py +358 -48
- praisonaiagents/agent/context_agent.py +2315 -0
- praisonaiagents/agents/agents.py +30 -12
- praisonaiagents/knowledge/knowledge.py +9 -1
- praisonaiagents/llm/__init__.py +40 -14
- praisonaiagents/llm/llm.py +485 -59
- praisonaiagents/llm/openai_client.py +98 -16
- praisonaiagents/memory/memory.py +84 -15
- praisonaiagents/task/task.py +7 -6
- praisonaiagents/telemetry/__init__.py +63 -3
- praisonaiagents/telemetry/integration.py +78 -10
- praisonaiagents/telemetry/performance_cli.py +397 -0
- praisonaiagents/telemetry/performance_monitor.py +573 -0
- praisonaiagents/telemetry/performance_utils.py +571 -0
- praisonaiagents/telemetry/telemetry.py +35 -11
- {praisonaiagents-0.0.144.dist-info → praisonaiagents-0.0.146.dist-info}/METADATA +9 -3
- {praisonaiagents-0.0.144.dist-info → praisonaiagents-0.0.146.dist-info}/RECORD +21 -17
- {praisonaiagents-0.0.144.dist-info → praisonaiagents-0.0.146.dist-info}/WHEEL +0 -0
- {praisonaiagents-0.0.144.dist-info → praisonaiagents-0.0.146.dist-info}/top_level.txt +0 -0
praisonaiagents/llm/llm.py
CHANGED
@@ -971,13 +971,39 @@ class LLM:
|
|
971
971
|
# Provider doesn't support streaming with tools, use non-streaming
|
972
972
|
use_streaming = False
|
973
973
|
|
974
|
+
# Track whether fallback was successful to avoid duplicate API calls
|
975
|
+
fallback_completed = False
|
976
|
+
|
974
977
|
if use_streaming:
|
975
978
|
# Streaming approach (with or without tools)
|
976
979
|
tool_calls = []
|
977
980
|
response_text = ""
|
981
|
+
streaming_success = False
|
978
982
|
|
979
|
-
|
980
|
-
|
983
|
+
# Wrap streaming with error handling for LiteLLM JSON parsing errors
|
984
|
+
try:
|
985
|
+
if verbose:
|
986
|
+
# Verbose streaming: show display_generating during streaming
|
987
|
+
with Live(display_generating("", current_time), console=console, refresh_per_second=4) as live:
|
988
|
+
for chunk in litellm.completion(
|
989
|
+
**self._build_completion_params(
|
990
|
+
messages=messages,
|
991
|
+
tools=formatted_tools,
|
992
|
+
temperature=temperature,
|
993
|
+
stream=True,
|
994
|
+
output_json=output_json,
|
995
|
+
output_pydantic=output_pydantic,
|
996
|
+
**kwargs
|
997
|
+
)
|
998
|
+
):
|
999
|
+
if chunk and chunk.choices and chunk.choices[0].delta:
|
1000
|
+
delta = chunk.choices[0].delta
|
1001
|
+
response_text, tool_calls = self._process_stream_delta(
|
1002
|
+
delta, response_text, tool_calls, formatted_tools
|
1003
|
+
)
|
1004
|
+
live.update(display_generating(response_text, current_time))
|
1005
|
+
else:
|
1006
|
+
# Non-verbose streaming: no display_generating during streaming
|
981
1007
|
for chunk in litellm.completion(
|
982
1008
|
**self._build_completion_params(
|
983
1009
|
messages=messages,
|
@@ -994,74 +1020,205 @@ class LLM:
|
|
994
1020
|
response_text, tool_calls = self._process_stream_delta(
|
995
1021
|
delta, response_text, tool_calls, formatted_tools
|
996
1022
|
)
|
997
|
-
|
1023
|
+
streaming_success = True
|
1024
|
+
except Exception as streaming_error:
|
1025
|
+
# Handle streaming errors with recovery logic
|
1026
|
+
if self._is_streaming_error_recoverable(streaming_error):
|
1027
|
+
if verbose:
|
1028
|
+
logging.warning(f"Streaming error (recoverable): {streaming_error}")
|
1029
|
+
logging.warning("Falling back to non-streaming mode")
|
1030
|
+
# Immediately perform non-streaming fallback with actual API call
|
1031
|
+
try:
|
1032
|
+
if verbose:
|
1033
|
+
# When verbose=True, always use streaming for better UX
|
1034
|
+
with Live(display_generating("", current_time), console=console, refresh_per_second=4, transient=True) as live:
|
1035
|
+
response_text = ""
|
1036
|
+
# Use streaming when verbose for progressive display
|
1037
|
+
for chunk in litellm.completion(
|
1038
|
+
**self._build_completion_params(
|
1039
|
+
messages=messages,
|
1040
|
+
tools=formatted_tools,
|
1041
|
+
temperature=temperature,
|
1042
|
+
stream=True, # Always stream when verbose=True
|
1043
|
+
output_json=output_json,
|
1044
|
+
output_pydantic=output_pydantic,
|
1045
|
+
**kwargs
|
1046
|
+
)
|
1047
|
+
):
|
1048
|
+
if chunk and chunk.choices and chunk.choices[0].delta:
|
1049
|
+
delta = chunk.choices[0].delta
|
1050
|
+
if hasattr(delta, "content") and delta.content:
|
1051
|
+
response_text += delta.content
|
1052
|
+
live.update(display_generating(response_text, current_time))
|
1053
|
+
|
1054
|
+
# Clear the live display after completion
|
1055
|
+
console.print()
|
1056
|
+
|
1057
|
+
# Create final response structure
|
1058
|
+
final_response = {
|
1059
|
+
"choices": [{
|
1060
|
+
"message": {
|
1061
|
+
"content": response_text,
|
1062
|
+
"tool_calls": None
|
1063
|
+
}
|
1064
|
+
}]
|
1065
|
+
}
|
1066
|
+
else:
|
1067
|
+
# For non-streaming + non-verbose: no display_generating (per user requirements)
|
1068
|
+
final_response = litellm.completion(
|
1069
|
+
**self._build_completion_params(
|
1070
|
+
messages=messages,
|
1071
|
+
tools=formatted_tools,
|
1072
|
+
temperature=temperature,
|
1073
|
+
stream=False,
|
1074
|
+
output_json=output_json,
|
1075
|
+
output_pydantic=output_pydantic,
|
1076
|
+
**kwargs
|
1077
|
+
)
|
1078
|
+
)
|
1079
|
+
response_text = final_response["choices"][0]["message"]["content"]
|
1080
|
+
|
1081
|
+
# Execute callbacks and display based on verbose setting
|
1082
|
+
if verbose and not interaction_displayed:
|
1083
|
+
# Display the complete response at once (this will trigger callbacks internally)
|
1084
|
+
display_interaction(
|
1085
|
+
original_prompt,
|
1086
|
+
response_text,
|
1087
|
+
markdown=markdown,
|
1088
|
+
generation_time=time.time() - current_time,
|
1089
|
+
console=console,
|
1090
|
+
agent_name=agent_name,
|
1091
|
+
agent_role=agent_role,
|
1092
|
+
agent_tools=agent_tools,
|
1093
|
+
task_name=task_name,
|
1094
|
+
task_description=task_description,
|
1095
|
+
task_id=task_id
|
1096
|
+
)
|
1097
|
+
interaction_displayed = True
|
1098
|
+
callback_executed = True
|
1099
|
+
elif not callback_executed:
|
1100
|
+
# Only execute callback if display_interaction hasn't been called
|
1101
|
+
execute_sync_callback(
|
1102
|
+
'interaction',
|
1103
|
+
message=original_prompt,
|
1104
|
+
response=response_text,
|
1105
|
+
markdown=markdown,
|
1106
|
+
generation_time=time.time() - current_time,
|
1107
|
+
agent_name=agent_name,
|
1108
|
+
agent_role=agent_role,
|
1109
|
+
agent_tools=agent_tools,
|
1110
|
+
task_name=task_name,
|
1111
|
+
task_description=task_description,
|
1112
|
+
task_id=task_id
|
1113
|
+
)
|
1114
|
+
callback_executed = True
|
1115
|
+
|
1116
|
+
# Mark that fallback completed successfully
|
1117
|
+
fallback_completed = True
|
1118
|
+
streaming_success = False
|
1119
|
+
|
1120
|
+
except Exception as fallback_error:
|
1121
|
+
# If non-streaming also fails, create a graceful fallback with partial streaming data
|
1122
|
+
logging.warning(f"Non-streaming fallback also failed: {fallback_error}")
|
1123
|
+
logging.warning("Using partial streaming response data")
|
1124
|
+
response_text = response_text or ""
|
1125
|
+
# Create a mock response with whatever partial data we have
|
1126
|
+
final_response = {
|
1127
|
+
"choices": [{
|
1128
|
+
"message": {
|
1129
|
+
"content": response_text,
|
1130
|
+
"tool_calls": tool_calls if tool_calls else None
|
1131
|
+
}
|
1132
|
+
}]
|
1133
|
+
}
|
1134
|
+
fallback_completed = True
|
1135
|
+
streaming_success = False
|
1136
|
+
else:
|
1137
|
+
# For non-recoverable errors, re-raise immediately
|
1138
|
+
logging.error(f"Non-recoverable streaming error: {streaming_error}")
|
1139
|
+
raise streaming_error
|
1140
|
+
|
1141
|
+
if streaming_success:
|
1142
|
+
response_text = response_text.strip() if response_text else ""
|
1143
|
+
|
1144
|
+
# Execute callbacks after streaming completes (only if not verbose, since verbose will call display_interaction later)
|
1145
|
+
if not verbose and not callback_executed:
|
1146
|
+
execute_sync_callback(
|
1147
|
+
'interaction',
|
1148
|
+
message=original_prompt,
|
1149
|
+
response=response_text,
|
1150
|
+
markdown=markdown,
|
1151
|
+
generation_time=time.time() - current_time,
|
1152
|
+
agent_name=agent_name,
|
1153
|
+
agent_role=agent_role,
|
1154
|
+
agent_tools=agent_tools,
|
1155
|
+
task_name=task_name,
|
1156
|
+
task_description=task_description,
|
1157
|
+
task_id=task_id
|
1158
|
+
)
|
1159
|
+
callback_executed = True
|
1160
|
+
|
1161
|
+
# Create a mock final_response with the captured data
|
1162
|
+
final_response = {
|
1163
|
+
"choices": [{
|
1164
|
+
"message": {
|
1165
|
+
"content": response_text,
|
1166
|
+
"tool_calls": tool_calls if tool_calls else None
|
1167
|
+
}
|
1168
|
+
}]
|
1169
|
+
}
|
1170
|
+
|
1171
|
+
# Only execute non-streaming if we haven't used streaming AND fallback hasn't completed
|
1172
|
+
if not use_streaming and not fallback_completed:
|
1173
|
+
# Non-streaming approach (when tools require it, streaming is disabled, or streaming fallback)
|
1174
|
+
if verbose:
|
1175
|
+
# When verbose=True, always use streaming for better UX
|
1176
|
+
with Live(display_generating("", current_time), console=console, refresh_per_second=4, transient=True) as live:
|
1177
|
+
response_text = ""
|
1178
|
+
# Use streaming when verbose for progressive display
|
1179
|
+
for chunk in litellm.completion(
|
1180
|
+
**self._build_completion_params(
|
1181
|
+
messages=messages,
|
1182
|
+
tools=formatted_tools,
|
1183
|
+
temperature=temperature,
|
1184
|
+
stream=True, # Always stream when verbose=True
|
1185
|
+
output_json=output_json,
|
1186
|
+
output_pydantic=output_pydantic,
|
1187
|
+
**kwargs
|
1188
|
+
)
|
1189
|
+
):
|
1190
|
+
if chunk and chunk.choices and chunk.choices[0].delta:
|
1191
|
+
delta = chunk.choices[0].delta
|
1192
|
+
if hasattr(delta, "content") and delta.content:
|
1193
|
+
response_text += delta.content
|
998
1194
|
live.update(display_generating(response_text, current_time))
|
999
|
-
|
1195
|
+
|
1196
|
+
# Clear the live display after completion
|
1197
|
+
console.print()
|
1198
|
+
|
1199
|
+
# Create final response structure
|
1200
|
+
final_response = {
|
1201
|
+
"choices": [{
|
1202
|
+
"message": {
|
1203
|
+
"content": response_text,
|
1204
|
+
"tool_calls": None
|
1205
|
+
}
|
1206
|
+
}]
|
1207
|
+
}
|
1000
1208
|
else:
|
1001
|
-
#
|
1002
|
-
|
1209
|
+
# For non-streaming + non-verbose: no display_generating (per user requirements)
|
1210
|
+
final_response = litellm.completion(
|
1003
1211
|
**self._build_completion_params(
|
1004
1212
|
messages=messages,
|
1005
1213
|
tools=formatted_tools,
|
1006
1214
|
temperature=temperature,
|
1007
|
-
stream=
|
1215
|
+
stream=False,
|
1008
1216
|
output_json=output_json,
|
1009
1217
|
output_pydantic=output_pydantic,
|
1010
1218
|
**kwargs
|
1011
1219
|
)
|
1012
|
-
):
|
1013
|
-
if chunk and chunk.choices and chunk.choices[0].delta:
|
1014
|
-
delta = chunk.choices[0].delta
|
1015
|
-
if delta.content:
|
1016
|
-
response_text += delta.content
|
1017
|
-
|
1018
|
-
# Capture tool calls from streaming chunks if provider supports it
|
1019
|
-
if formatted_tools and self._supports_streaming_tools():
|
1020
|
-
tool_calls = self._process_tool_calls_from_stream(delta, tool_calls)
|
1021
|
-
|
1022
|
-
response_text = response_text.strip() if response_text else ""
|
1023
|
-
|
1024
|
-
# Execute callbacks after streaming completes (only if not verbose, since verbose will call display_interaction later)
|
1025
|
-
if not verbose and not callback_executed:
|
1026
|
-
execute_sync_callback(
|
1027
|
-
'interaction',
|
1028
|
-
message=original_prompt,
|
1029
|
-
response=response_text,
|
1030
|
-
markdown=markdown,
|
1031
|
-
generation_time=time.time() - current_time,
|
1032
|
-
agent_name=agent_name,
|
1033
|
-
agent_role=agent_role,
|
1034
|
-
agent_tools=agent_tools,
|
1035
|
-
task_name=task_name,
|
1036
|
-
task_description=task_description,
|
1037
|
-
task_id=task_id
|
1038
1220
|
)
|
1039
|
-
|
1040
|
-
|
1041
|
-
|
1042
|
-
# Create a mock final_response with the captured data
|
1043
|
-
final_response = {
|
1044
|
-
"choices": [{
|
1045
|
-
"message": {
|
1046
|
-
"content": response_text,
|
1047
|
-
"tool_calls": tool_calls if tool_calls else None
|
1048
|
-
}
|
1049
|
-
}]
|
1050
|
-
}
|
1051
|
-
else:
|
1052
|
-
# Non-streaming approach (when tools require it or streaming is disabled)
|
1053
|
-
final_response = litellm.completion(
|
1054
|
-
**self._build_completion_params(
|
1055
|
-
messages=messages,
|
1056
|
-
tools=formatted_tools,
|
1057
|
-
temperature=temperature,
|
1058
|
-
stream=False,
|
1059
|
-
output_json=output_json,
|
1060
|
-
output_pydantic=output_pydantic,
|
1061
|
-
**kwargs
|
1062
|
-
)
|
1063
|
-
)
|
1064
|
-
response_text = final_response["choices"][0]["message"]["content"]
|
1221
|
+
response_text = final_response["choices"][0]["message"]["content"]
|
1065
1222
|
|
1066
1223
|
# Execute callbacks and display based on verbose setting
|
1067
1224
|
if verbose and not interaction_displayed:
|
@@ -1557,11 +1714,280 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1557
1714
|
total_time = time.time() - start_time
|
1558
1715
|
logging.debug(f"get_response completed in {total_time:.2f} seconds")
|
1559
1716
|
|
1717
|
+
def get_response_stream(
|
1718
|
+
self,
|
1719
|
+
prompt: Union[str, List[Dict]],
|
1720
|
+
system_prompt: Optional[str] = None,
|
1721
|
+
chat_history: Optional[List[Dict]] = None,
|
1722
|
+
temperature: float = 0.2,
|
1723
|
+
tools: Optional[List[Any]] = None,
|
1724
|
+
output_json: Optional[BaseModel] = None,
|
1725
|
+
output_pydantic: Optional[BaseModel] = None,
|
1726
|
+
verbose: bool = False, # Default to non-verbose for streaming
|
1727
|
+
markdown: bool = True,
|
1728
|
+
agent_name: Optional[str] = None,
|
1729
|
+
agent_role: Optional[str] = None,
|
1730
|
+
agent_tools: Optional[List[str]] = None,
|
1731
|
+
task_name: Optional[str] = None,
|
1732
|
+
task_description: Optional[str] = None,
|
1733
|
+
task_id: Optional[str] = None,
|
1734
|
+
execute_tool_fn: Optional[Callable] = None,
|
1735
|
+
**kwargs
|
1736
|
+
):
|
1737
|
+
"""Generator that yields real-time response chunks from the LLM.
|
1738
|
+
|
1739
|
+
This method provides true streaming by yielding content chunks as they
|
1740
|
+
are received from the underlying LLM, enabling real-time display of
|
1741
|
+
responses without waiting for the complete response.
|
1742
|
+
|
1743
|
+
Args:
|
1744
|
+
prompt: The prompt to send to the LLM
|
1745
|
+
system_prompt: Optional system prompt
|
1746
|
+
chat_history: Optional chat history
|
1747
|
+
temperature: Sampling temperature
|
1748
|
+
tools: Optional list of tools for function calling
|
1749
|
+
output_json: Optional JSON schema for structured output
|
1750
|
+
output_pydantic: Optional Pydantic model for structured output
|
1751
|
+
verbose: Whether to enable verbose logging (default False for streaming)
|
1752
|
+
markdown: Whether to enable markdown processing
|
1753
|
+
agent_name: Optional agent name for logging
|
1754
|
+
agent_role: Optional agent role for logging
|
1755
|
+
agent_tools: Optional list of agent tools for logging
|
1756
|
+
task_name: Optional task name for logging
|
1757
|
+
task_description: Optional task description for logging
|
1758
|
+
task_id: Optional task ID for logging
|
1759
|
+
execute_tool_fn: Optional function for executing tools
|
1760
|
+
**kwargs: Additional parameters
|
1761
|
+
|
1762
|
+
Yields:
|
1763
|
+
str: Individual content chunks as they are received from the LLM
|
1764
|
+
|
1765
|
+
Raises:
|
1766
|
+
Exception: If streaming fails or LLM call encounters an error
|
1767
|
+
"""
|
1768
|
+
try:
|
1769
|
+
import litellm
|
1770
|
+
|
1771
|
+
# Build messages using existing logic
|
1772
|
+
messages, original_prompt = self._build_messages(
|
1773
|
+
prompt=prompt,
|
1774
|
+
system_prompt=system_prompt,
|
1775
|
+
chat_history=chat_history,
|
1776
|
+
output_json=output_json,
|
1777
|
+
output_pydantic=output_pydantic
|
1778
|
+
)
|
1779
|
+
|
1780
|
+
# Format tools for litellm
|
1781
|
+
formatted_tools = self._format_tools_for_litellm(tools)
|
1782
|
+
|
1783
|
+
# Determine if we should use streaming based on tool support
|
1784
|
+
use_streaming = True
|
1785
|
+
if formatted_tools and not self._supports_streaming_tools():
|
1786
|
+
# Provider doesn't support streaming with tools, fall back to non-streaming
|
1787
|
+
use_streaming = False
|
1788
|
+
|
1789
|
+
if use_streaming:
|
1790
|
+
# Real-time streaming approach with tool call support
|
1791
|
+
try:
|
1792
|
+
tool_calls = []
|
1793
|
+
response_text = ""
|
1794
|
+
consecutive_errors = 0
|
1795
|
+
max_consecutive_errors = 3 # Fallback to non-streaming after 3 consecutive errors
|
1796
|
+
|
1797
|
+
stream_iterator = litellm.completion(
|
1798
|
+
**self._build_completion_params(
|
1799
|
+
messages=messages,
|
1800
|
+
tools=formatted_tools,
|
1801
|
+
temperature=temperature,
|
1802
|
+
stream=True,
|
1803
|
+
output_json=output_json,
|
1804
|
+
output_pydantic=output_pydantic,
|
1805
|
+
**kwargs
|
1806
|
+
)
|
1807
|
+
)
|
1808
|
+
|
1809
|
+
# Wrap the iteration with additional error handling for LiteLLM JSON parsing errors
|
1810
|
+
try:
|
1811
|
+
for chunk in stream_iterator:
|
1812
|
+
try:
|
1813
|
+
if chunk and chunk.choices and chunk.choices[0].delta:
|
1814
|
+
delta = chunk.choices[0].delta
|
1815
|
+
|
1816
|
+
# Process both content and tool calls using existing helper
|
1817
|
+
response_text, tool_calls = self._process_stream_delta(
|
1818
|
+
delta, response_text, tool_calls, formatted_tools
|
1819
|
+
)
|
1820
|
+
|
1821
|
+
# Yield content chunks in real-time as they arrive
|
1822
|
+
if delta.content:
|
1823
|
+
yield delta.content
|
1824
|
+
|
1825
|
+
# Reset consecutive error counter only after successful chunk processing
|
1826
|
+
consecutive_errors = 0
|
1827
|
+
|
1828
|
+
except Exception as chunk_error:
|
1829
|
+
consecutive_errors += 1
|
1830
|
+
|
1831
|
+
# Log the specific error for debugging
|
1832
|
+
if verbose:
|
1833
|
+
logging.warning(f"Chunk processing error ({consecutive_errors}/{max_consecutive_errors}): {chunk_error}")
|
1834
|
+
|
1835
|
+
# Check if this error is recoverable using our helper method
|
1836
|
+
if self._is_streaming_error_recoverable(chunk_error):
|
1837
|
+
if verbose:
|
1838
|
+
logging.warning("Recoverable streaming error detected, skipping malformed chunk and continuing")
|
1839
|
+
|
1840
|
+
# Skip this malformed chunk and continue if we haven't hit the limit
|
1841
|
+
if consecutive_errors < max_consecutive_errors:
|
1842
|
+
continue
|
1843
|
+
else:
|
1844
|
+
# Too many recoverable errors, fallback to non-streaming
|
1845
|
+
logging.warning(f"Too many consecutive streaming errors ({consecutive_errors}), falling back to non-streaming mode")
|
1846
|
+
raise Exception(f"Streaming failed with {consecutive_errors} consecutive errors") from chunk_error
|
1847
|
+
else:
|
1848
|
+
# For non-recoverable errors, re-raise immediately
|
1849
|
+
logging.error(f"Non-recoverable streaming error: {chunk_error}")
|
1850
|
+
raise chunk_error
|
1851
|
+
|
1852
|
+
except Exception as iterator_error:
|
1853
|
+
# Handle errors that occur during stream iteration itself (e.g., JSON parsing in LiteLLM)
|
1854
|
+
error_msg = str(iterator_error).lower()
|
1855
|
+
|
1856
|
+
# Check if this is a recoverable streaming error (including JSON parsing errors)
|
1857
|
+
if self._is_streaming_error_recoverable(iterator_error):
|
1858
|
+
if verbose:
|
1859
|
+
logging.warning(f"Stream iterator error detected (recoverable): {iterator_error}")
|
1860
|
+
logging.warning("Falling back to non-streaming mode due to stream iteration failure")
|
1861
|
+
|
1862
|
+
# Force fallback to non-streaming for iterator-level errors
|
1863
|
+
raise Exception("Stream iteration failed with recoverable error, falling back to non-streaming") from iterator_error
|
1864
|
+
else:
|
1865
|
+
# For non-recoverable errors, re-raise immediately
|
1866
|
+
logging.error(f"Non-recoverable stream iterator error: {iterator_error}")
|
1867
|
+
raise iterator_error
|
1868
|
+
|
1869
|
+
# After streaming completes, handle tool calls if present
|
1870
|
+
if tool_calls and execute_tool_fn:
|
1871
|
+
# Add assistant message with tool calls to conversation
|
1872
|
+
if self._is_ollama_provider():
|
1873
|
+
messages.append({
|
1874
|
+
"role": "assistant",
|
1875
|
+
"content": response_text
|
1876
|
+
})
|
1877
|
+
else:
|
1878
|
+
serializable_tool_calls = self._serialize_tool_calls(tool_calls)
|
1879
|
+
messages.append({
|
1880
|
+
"role": "assistant",
|
1881
|
+
"content": response_text,
|
1882
|
+
"tool_calls": serializable_tool_calls
|
1883
|
+
})
|
1884
|
+
|
1885
|
+
# Execute tool calls and add results to conversation
|
1886
|
+
for tool_call in tool_calls:
|
1887
|
+
is_ollama = self._is_ollama_provider()
|
1888
|
+
function_name, arguments, tool_call_id = self._extract_tool_call_info(tool_call, is_ollama)
|
1889
|
+
|
1890
|
+
try:
|
1891
|
+
# Execute the tool
|
1892
|
+
tool_result = execute_tool_fn(function_name, arguments)
|
1893
|
+
|
1894
|
+
# Add tool result to messages
|
1895
|
+
tool_message = self._create_tool_message(function_name, tool_result, tool_call_id, is_ollama)
|
1896
|
+
messages.append(tool_message)
|
1897
|
+
|
1898
|
+
except Exception as e:
|
1899
|
+
logging.error(f"Tool execution error for {function_name}: {e}")
|
1900
|
+
# Add error message to conversation
|
1901
|
+
error_message = self._create_tool_message(
|
1902
|
+
function_name, f"Error executing tool: {e}", tool_call_id, is_ollama
|
1903
|
+
)
|
1904
|
+
messages.append(error_message)
|
1905
|
+
|
1906
|
+
# Continue conversation after tool execution - get follow-up response
|
1907
|
+
try:
|
1908
|
+
follow_up_response = litellm.completion(
|
1909
|
+
**self._build_completion_params(
|
1910
|
+
messages=messages,
|
1911
|
+
tools=formatted_tools,
|
1912
|
+
temperature=temperature,
|
1913
|
+
stream=False,
|
1914
|
+
**kwargs
|
1915
|
+
)
|
1916
|
+
)
|
1917
|
+
|
1918
|
+
if follow_up_response and follow_up_response.choices:
|
1919
|
+
follow_up_content = follow_up_response.choices[0].message.content
|
1920
|
+
if follow_up_content:
|
1921
|
+
# Yield the follow-up response after tool execution
|
1922
|
+
yield follow_up_content
|
1923
|
+
except Exception as e:
|
1924
|
+
logging.error(f"Follow-up response failed: {e}")
|
1925
|
+
|
1926
|
+
except Exception as e:
|
1927
|
+
error_msg = str(e).lower()
|
1928
|
+
|
1929
|
+
# Provide more specific error messages based on the error type
|
1930
|
+
if any(keyword in error_msg for keyword in ['json', 'expecting property name', 'parse', 'decode']):
|
1931
|
+
logging.warning(f"Streaming failed due to JSON parsing errors (likely malformed chunks from provider): {e}")
|
1932
|
+
elif 'connection' in error_msg or 'timeout' in error_msg:
|
1933
|
+
logging.warning(f"Streaming failed due to connection issues: {e}")
|
1934
|
+
else:
|
1935
|
+
logging.error(f"Streaming failed with unexpected error: {e}")
|
1936
|
+
|
1937
|
+
# Fall back to non-streaming if streaming fails
|
1938
|
+
use_streaming = False
|
1939
|
+
|
1940
|
+
if not use_streaming:
|
1941
|
+
# Fall back to non-streaming and yield the complete response
|
1942
|
+
try:
|
1943
|
+
response = litellm.completion(
|
1944
|
+
**self._build_completion_params(
|
1945
|
+
messages=messages,
|
1946
|
+
tools=formatted_tools,
|
1947
|
+
temperature=temperature,
|
1948
|
+
stream=False,
|
1949
|
+
output_json=output_json,
|
1950
|
+
output_pydantic=output_pydantic,
|
1951
|
+
**kwargs
|
1952
|
+
)
|
1953
|
+
)
|
1954
|
+
|
1955
|
+
if response and response.choices:
|
1956
|
+
content = response.choices[0].message.content
|
1957
|
+
if content:
|
1958
|
+
# Yield the complete response as a single chunk
|
1959
|
+
yield content
|
1960
|
+
|
1961
|
+
except Exception as e:
|
1962
|
+
logging.error(f"Non-streaming fallback failed: {e}")
|
1963
|
+
raise
|
1964
|
+
|
1965
|
+
except Exception as e:
|
1966
|
+
logging.error(f"Error in get_response_stream: {e}")
|
1967
|
+
raise
|
1968
|
+
|
1560
1969
|
def _is_gemini_model(self) -> bool:
|
1561
1970
|
"""Check if the model is a Gemini model."""
|
1562
1971
|
if not self.model:
|
1563
1972
|
return False
|
1564
1973
|
return any(prefix in self.model.lower() for prefix in ['gemini', 'gemini/', 'google/gemini'])
|
1974
|
+
|
1975
|
+
def _is_streaming_error_recoverable(self, error: Exception) -> bool:
|
1976
|
+
"""Check if a streaming error is recoverable (e.g., malformed chunk vs connection error)."""
|
1977
|
+
error_msg = str(error).lower()
|
1978
|
+
|
1979
|
+
# JSON parsing errors are often recoverable (skip malformed chunk and continue)
|
1980
|
+
json_error_keywords = ['json', 'expecting property name', 'parse', 'decode', 'invalid json']
|
1981
|
+
if any(keyword in error_msg for keyword in json_error_keywords):
|
1982
|
+
return True
|
1983
|
+
|
1984
|
+
# Connection errors might be temporary but are less recoverable in streaming context
|
1985
|
+
connection_error_keywords = ['connection', 'timeout', 'network', 'http']
|
1986
|
+
if any(keyword in error_msg for keyword in connection_error_keywords):
|
1987
|
+
return False
|
1988
|
+
|
1989
|
+
# Other errors are generally not recoverable
|
1990
|
+
return False
|
1565
1991
|
|
1566
1992
|
async def get_response_async(
|
1567
1993
|
self,
|