praisonaiagents 0.0.144__py3-none-any.whl → 0.0.146__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -971,13 +971,39 @@ class LLM:
971
971
  # Provider doesn't support streaming with tools, use non-streaming
972
972
  use_streaming = False
973
973
 
974
+ # Track whether fallback was successful to avoid duplicate API calls
975
+ fallback_completed = False
976
+
974
977
  if use_streaming:
975
978
  # Streaming approach (with or without tools)
976
979
  tool_calls = []
977
980
  response_text = ""
981
+ streaming_success = False
978
982
 
979
- if verbose:
980
- with Live(display_generating("", current_time), console=console, refresh_per_second=4) as live:
983
+ # Wrap streaming with error handling for LiteLLM JSON parsing errors
984
+ try:
985
+ if verbose:
986
+ # Verbose streaming: show display_generating during streaming
987
+ with Live(display_generating("", current_time), console=console, refresh_per_second=4) as live:
988
+ for chunk in litellm.completion(
989
+ **self._build_completion_params(
990
+ messages=messages,
991
+ tools=formatted_tools,
992
+ temperature=temperature,
993
+ stream=True,
994
+ output_json=output_json,
995
+ output_pydantic=output_pydantic,
996
+ **kwargs
997
+ )
998
+ ):
999
+ if chunk and chunk.choices and chunk.choices[0].delta:
1000
+ delta = chunk.choices[0].delta
1001
+ response_text, tool_calls = self._process_stream_delta(
1002
+ delta, response_text, tool_calls, formatted_tools
1003
+ )
1004
+ live.update(display_generating(response_text, current_time))
1005
+ else:
1006
+ # Non-verbose streaming: no display_generating during streaming
981
1007
  for chunk in litellm.completion(
982
1008
  **self._build_completion_params(
983
1009
  messages=messages,
@@ -994,74 +1020,205 @@ class LLM:
994
1020
  response_text, tool_calls = self._process_stream_delta(
995
1021
  delta, response_text, tool_calls, formatted_tools
996
1022
  )
997
- if delta.content:
1023
+ streaming_success = True
1024
+ except Exception as streaming_error:
1025
+ # Handle streaming errors with recovery logic
1026
+ if self._is_streaming_error_recoverable(streaming_error):
1027
+ if verbose:
1028
+ logging.warning(f"Streaming error (recoverable): {streaming_error}")
1029
+ logging.warning("Falling back to non-streaming mode")
1030
+ # Immediately perform non-streaming fallback with actual API call
1031
+ try:
1032
+ if verbose:
1033
+ # When verbose=True, always use streaming for better UX
1034
+ with Live(display_generating("", current_time), console=console, refresh_per_second=4, transient=True) as live:
1035
+ response_text = ""
1036
+ # Use streaming when verbose for progressive display
1037
+ for chunk in litellm.completion(
1038
+ **self._build_completion_params(
1039
+ messages=messages,
1040
+ tools=formatted_tools,
1041
+ temperature=temperature,
1042
+ stream=True, # Always stream when verbose=True
1043
+ output_json=output_json,
1044
+ output_pydantic=output_pydantic,
1045
+ **kwargs
1046
+ )
1047
+ ):
1048
+ if chunk and chunk.choices and chunk.choices[0].delta:
1049
+ delta = chunk.choices[0].delta
1050
+ if hasattr(delta, "content") and delta.content:
1051
+ response_text += delta.content
1052
+ live.update(display_generating(response_text, current_time))
1053
+
1054
+ # Clear the live display after completion
1055
+ console.print()
1056
+
1057
+ # Create final response structure
1058
+ final_response = {
1059
+ "choices": [{
1060
+ "message": {
1061
+ "content": response_text,
1062
+ "tool_calls": None
1063
+ }
1064
+ }]
1065
+ }
1066
+ else:
1067
+ # For non-streaming + non-verbose: no display_generating (per user requirements)
1068
+ final_response = litellm.completion(
1069
+ **self._build_completion_params(
1070
+ messages=messages,
1071
+ tools=formatted_tools,
1072
+ temperature=temperature,
1073
+ stream=False,
1074
+ output_json=output_json,
1075
+ output_pydantic=output_pydantic,
1076
+ **kwargs
1077
+ )
1078
+ )
1079
+ response_text = final_response["choices"][0]["message"]["content"]
1080
+
1081
+ # Execute callbacks and display based on verbose setting
1082
+ if verbose and not interaction_displayed:
1083
+ # Display the complete response at once (this will trigger callbacks internally)
1084
+ display_interaction(
1085
+ original_prompt,
1086
+ response_text,
1087
+ markdown=markdown,
1088
+ generation_time=time.time() - current_time,
1089
+ console=console,
1090
+ agent_name=agent_name,
1091
+ agent_role=agent_role,
1092
+ agent_tools=agent_tools,
1093
+ task_name=task_name,
1094
+ task_description=task_description,
1095
+ task_id=task_id
1096
+ )
1097
+ interaction_displayed = True
1098
+ callback_executed = True
1099
+ elif not callback_executed:
1100
+ # Only execute callback if display_interaction hasn't been called
1101
+ execute_sync_callback(
1102
+ 'interaction',
1103
+ message=original_prompt,
1104
+ response=response_text,
1105
+ markdown=markdown,
1106
+ generation_time=time.time() - current_time,
1107
+ agent_name=agent_name,
1108
+ agent_role=agent_role,
1109
+ agent_tools=agent_tools,
1110
+ task_name=task_name,
1111
+ task_description=task_description,
1112
+ task_id=task_id
1113
+ )
1114
+ callback_executed = True
1115
+
1116
+ # Mark that fallback completed successfully
1117
+ fallback_completed = True
1118
+ streaming_success = False
1119
+
1120
+ except Exception as fallback_error:
1121
+ # If non-streaming also fails, create a graceful fallback with partial streaming data
1122
+ logging.warning(f"Non-streaming fallback also failed: {fallback_error}")
1123
+ logging.warning("Using partial streaming response data")
1124
+ response_text = response_text or ""
1125
+ # Create a mock response with whatever partial data we have
1126
+ final_response = {
1127
+ "choices": [{
1128
+ "message": {
1129
+ "content": response_text,
1130
+ "tool_calls": tool_calls if tool_calls else None
1131
+ }
1132
+ }]
1133
+ }
1134
+ fallback_completed = True
1135
+ streaming_success = False
1136
+ else:
1137
+ # For non-recoverable errors, re-raise immediately
1138
+ logging.error(f"Non-recoverable streaming error: {streaming_error}")
1139
+ raise streaming_error
1140
+
1141
+ if streaming_success:
1142
+ response_text = response_text.strip() if response_text else ""
1143
+
1144
+ # Execute callbacks after streaming completes (only if not verbose, since verbose will call display_interaction later)
1145
+ if not verbose and not callback_executed:
1146
+ execute_sync_callback(
1147
+ 'interaction',
1148
+ message=original_prompt,
1149
+ response=response_text,
1150
+ markdown=markdown,
1151
+ generation_time=time.time() - current_time,
1152
+ agent_name=agent_name,
1153
+ agent_role=agent_role,
1154
+ agent_tools=agent_tools,
1155
+ task_name=task_name,
1156
+ task_description=task_description,
1157
+ task_id=task_id
1158
+ )
1159
+ callback_executed = True
1160
+
1161
+ # Create a mock final_response with the captured data
1162
+ final_response = {
1163
+ "choices": [{
1164
+ "message": {
1165
+ "content": response_text,
1166
+ "tool_calls": tool_calls if tool_calls else None
1167
+ }
1168
+ }]
1169
+ }
1170
+
1171
+ # Only execute non-streaming if we haven't used streaming AND fallback hasn't completed
1172
+ if not use_streaming and not fallback_completed:
1173
+ # Non-streaming approach (when tools require it, streaming is disabled, or streaming fallback)
1174
+ if verbose:
1175
+ # When verbose=True, always use streaming for better UX
1176
+ with Live(display_generating("", current_time), console=console, refresh_per_second=4, transient=True) as live:
1177
+ response_text = ""
1178
+ # Use streaming when verbose for progressive display
1179
+ for chunk in litellm.completion(
1180
+ **self._build_completion_params(
1181
+ messages=messages,
1182
+ tools=formatted_tools,
1183
+ temperature=temperature,
1184
+ stream=True, # Always stream when verbose=True
1185
+ output_json=output_json,
1186
+ output_pydantic=output_pydantic,
1187
+ **kwargs
1188
+ )
1189
+ ):
1190
+ if chunk and chunk.choices and chunk.choices[0].delta:
1191
+ delta = chunk.choices[0].delta
1192
+ if hasattr(delta, "content") and delta.content:
1193
+ response_text += delta.content
998
1194
  live.update(display_generating(response_text, current_time))
999
-
1195
+
1196
+ # Clear the live display after completion
1197
+ console.print()
1198
+
1199
+ # Create final response structure
1200
+ final_response = {
1201
+ "choices": [{
1202
+ "message": {
1203
+ "content": response_text,
1204
+ "tool_calls": None
1205
+ }
1206
+ }]
1207
+ }
1000
1208
  else:
1001
- # Non-verbose streaming
1002
- for chunk in litellm.completion(
1209
+ # For non-streaming + non-verbose: no display_generating (per user requirements)
1210
+ final_response = litellm.completion(
1003
1211
  **self._build_completion_params(
1004
1212
  messages=messages,
1005
1213
  tools=formatted_tools,
1006
1214
  temperature=temperature,
1007
- stream=True,
1215
+ stream=False,
1008
1216
  output_json=output_json,
1009
1217
  output_pydantic=output_pydantic,
1010
1218
  **kwargs
1011
1219
  )
1012
- ):
1013
- if chunk and chunk.choices and chunk.choices[0].delta:
1014
- delta = chunk.choices[0].delta
1015
- if delta.content:
1016
- response_text += delta.content
1017
-
1018
- # Capture tool calls from streaming chunks if provider supports it
1019
- if formatted_tools and self._supports_streaming_tools():
1020
- tool_calls = self._process_tool_calls_from_stream(delta, tool_calls)
1021
-
1022
- response_text = response_text.strip() if response_text else ""
1023
-
1024
- # Execute callbacks after streaming completes (only if not verbose, since verbose will call display_interaction later)
1025
- if not verbose and not callback_executed:
1026
- execute_sync_callback(
1027
- 'interaction',
1028
- message=original_prompt,
1029
- response=response_text,
1030
- markdown=markdown,
1031
- generation_time=time.time() - current_time,
1032
- agent_name=agent_name,
1033
- agent_role=agent_role,
1034
- agent_tools=agent_tools,
1035
- task_name=task_name,
1036
- task_description=task_description,
1037
- task_id=task_id
1038
1220
  )
1039
- callback_executed = True
1040
-
1041
-
1042
- # Create a mock final_response with the captured data
1043
- final_response = {
1044
- "choices": [{
1045
- "message": {
1046
- "content": response_text,
1047
- "tool_calls": tool_calls if tool_calls else None
1048
- }
1049
- }]
1050
- }
1051
- else:
1052
- # Non-streaming approach (when tools require it or streaming is disabled)
1053
- final_response = litellm.completion(
1054
- **self._build_completion_params(
1055
- messages=messages,
1056
- tools=formatted_tools,
1057
- temperature=temperature,
1058
- stream=False,
1059
- output_json=output_json,
1060
- output_pydantic=output_pydantic,
1061
- **kwargs
1062
- )
1063
- )
1064
- response_text = final_response["choices"][0]["message"]["content"]
1221
+ response_text = final_response["choices"][0]["message"]["content"]
1065
1222
 
1066
1223
  # Execute callbacks and display based on verbose setting
1067
1224
  if verbose and not interaction_displayed:
@@ -1557,11 +1714,280 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1557
1714
  total_time = time.time() - start_time
1558
1715
  logging.debug(f"get_response completed in {total_time:.2f} seconds")
1559
1716
 
1717
+ def get_response_stream(
1718
+ self,
1719
+ prompt: Union[str, List[Dict]],
1720
+ system_prompt: Optional[str] = None,
1721
+ chat_history: Optional[List[Dict]] = None,
1722
+ temperature: float = 0.2,
1723
+ tools: Optional[List[Any]] = None,
1724
+ output_json: Optional[BaseModel] = None,
1725
+ output_pydantic: Optional[BaseModel] = None,
1726
+ verbose: bool = False, # Default to non-verbose for streaming
1727
+ markdown: bool = True,
1728
+ agent_name: Optional[str] = None,
1729
+ agent_role: Optional[str] = None,
1730
+ agent_tools: Optional[List[str]] = None,
1731
+ task_name: Optional[str] = None,
1732
+ task_description: Optional[str] = None,
1733
+ task_id: Optional[str] = None,
1734
+ execute_tool_fn: Optional[Callable] = None,
1735
+ **kwargs
1736
+ ):
1737
+ """Generator that yields real-time response chunks from the LLM.
1738
+
1739
+ This method provides true streaming by yielding content chunks as they
1740
+ are received from the underlying LLM, enabling real-time display of
1741
+ responses without waiting for the complete response.
1742
+
1743
+ Args:
1744
+ prompt: The prompt to send to the LLM
1745
+ system_prompt: Optional system prompt
1746
+ chat_history: Optional chat history
1747
+ temperature: Sampling temperature
1748
+ tools: Optional list of tools for function calling
1749
+ output_json: Optional JSON schema for structured output
1750
+ output_pydantic: Optional Pydantic model for structured output
1751
+ verbose: Whether to enable verbose logging (default False for streaming)
1752
+ markdown: Whether to enable markdown processing
1753
+ agent_name: Optional agent name for logging
1754
+ agent_role: Optional agent role for logging
1755
+ agent_tools: Optional list of agent tools for logging
1756
+ task_name: Optional task name for logging
1757
+ task_description: Optional task description for logging
1758
+ task_id: Optional task ID for logging
1759
+ execute_tool_fn: Optional function for executing tools
1760
+ **kwargs: Additional parameters
1761
+
1762
+ Yields:
1763
+ str: Individual content chunks as they are received from the LLM
1764
+
1765
+ Raises:
1766
+ Exception: If streaming fails or LLM call encounters an error
1767
+ """
1768
+ try:
1769
+ import litellm
1770
+
1771
+ # Build messages using existing logic
1772
+ messages, original_prompt = self._build_messages(
1773
+ prompt=prompt,
1774
+ system_prompt=system_prompt,
1775
+ chat_history=chat_history,
1776
+ output_json=output_json,
1777
+ output_pydantic=output_pydantic
1778
+ )
1779
+
1780
+ # Format tools for litellm
1781
+ formatted_tools = self._format_tools_for_litellm(tools)
1782
+
1783
+ # Determine if we should use streaming based on tool support
1784
+ use_streaming = True
1785
+ if formatted_tools and not self._supports_streaming_tools():
1786
+ # Provider doesn't support streaming with tools, fall back to non-streaming
1787
+ use_streaming = False
1788
+
1789
+ if use_streaming:
1790
+ # Real-time streaming approach with tool call support
1791
+ try:
1792
+ tool_calls = []
1793
+ response_text = ""
1794
+ consecutive_errors = 0
1795
+ max_consecutive_errors = 3 # Fallback to non-streaming after 3 consecutive errors
1796
+
1797
+ stream_iterator = litellm.completion(
1798
+ **self._build_completion_params(
1799
+ messages=messages,
1800
+ tools=formatted_tools,
1801
+ temperature=temperature,
1802
+ stream=True,
1803
+ output_json=output_json,
1804
+ output_pydantic=output_pydantic,
1805
+ **kwargs
1806
+ )
1807
+ )
1808
+
1809
+ # Wrap the iteration with additional error handling for LiteLLM JSON parsing errors
1810
+ try:
1811
+ for chunk in stream_iterator:
1812
+ try:
1813
+ if chunk and chunk.choices and chunk.choices[0].delta:
1814
+ delta = chunk.choices[0].delta
1815
+
1816
+ # Process both content and tool calls using existing helper
1817
+ response_text, tool_calls = self._process_stream_delta(
1818
+ delta, response_text, tool_calls, formatted_tools
1819
+ )
1820
+
1821
+ # Yield content chunks in real-time as they arrive
1822
+ if delta.content:
1823
+ yield delta.content
1824
+
1825
+ # Reset consecutive error counter only after successful chunk processing
1826
+ consecutive_errors = 0
1827
+
1828
+ except Exception as chunk_error:
1829
+ consecutive_errors += 1
1830
+
1831
+ # Log the specific error for debugging
1832
+ if verbose:
1833
+ logging.warning(f"Chunk processing error ({consecutive_errors}/{max_consecutive_errors}): {chunk_error}")
1834
+
1835
+ # Check if this error is recoverable using our helper method
1836
+ if self._is_streaming_error_recoverable(chunk_error):
1837
+ if verbose:
1838
+ logging.warning("Recoverable streaming error detected, skipping malformed chunk and continuing")
1839
+
1840
+ # Skip this malformed chunk and continue if we haven't hit the limit
1841
+ if consecutive_errors < max_consecutive_errors:
1842
+ continue
1843
+ else:
1844
+ # Too many recoverable errors, fallback to non-streaming
1845
+ logging.warning(f"Too many consecutive streaming errors ({consecutive_errors}), falling back to non-streaming mode")
1846
+ raise Exception(f"Streaming failed with {consecutive_errors} consecutive errors") from chunk_error
1847
+ else:
1848
+ # For non-recoverable errors, re-raise immediately
1849
+ logging.error(f"Non-recoverable streaming error: {chunk_error}")
1850
+ raise chunk_error
1851
+
1852
+ except Exception as iterator_error:
1853
+ # Handle errors that occur during stream iteration itself (e.g., JSON parsing in LiteLLM)
1854
+ error_msg = str(iterator_error).lower()
1855
+
1856
+ # Check if this is a recoverable streaming error (including JSON parsing errors)
1857
+ if self._is_streaming_error_recoverable(iterator_error):
1858
+ if verbose:
1859
+ logging.warning(f"Stream iterator error detected (recoverable): {iterator_error}")
1860
+ logging.warning("Falling back to non-streaming mode due to stream iteration failure")
1861
+
1862
+ # Force fallback to non-streaming for iterator-level errors
1863
+ raise Exception("Stream iteration failed with recoverable error, falling back to non-streaming") from iterator_error
1864
+ else:
1865
+ # For non-recoverable errors, re-raise immediately
1866
+ logging.error(f"Non-recoverable stream iterator error: {iterator_error}")
1867
+ raise iterator_error
1868
+
1869
+ # After streaming completes, handle tool calls if present
1870
+ if tool_calls and execute_tool_fn:
1871
+ # Add assistant message with tool calls to conversation
1872
+ if self._is_ollama_provider():
1873
+ messages.append({
1874
+ "role": "assistant",
1875
+ "content": response_text
1876
+ })
1877
+ else:
1878
+ serializable_tool_calls = self._serialize_tool_calls(tool_calls)
1879
+ messages.append({
1880
+ "role": "assistant",
1881
+ "content": response_text,
1882
+ "tool_calls": serializable_tool_calls
1883
+ })
1884
+
1885
+ # Execute tool calls and add results to conversation
1886
+ for tool_call in tool_calls:
1887
+ is_ollama = self._is_ollama_provider()
1888
+ function_name, arguments, tool_call_id = self._extract_tool_call_info(tool_call, is_ollama)
1889
+
1890
+ try:
1891
+ # Execute the tool
1892
+ tool_result = execute_tool_fn(function_name, arguments)
1893
+
1894
+ # Add tool result to messages
1895
+ tool_message = self._create_tool_message(function_name, tool_result, tool_call_id, is_ollama)
1896
+ messages.append(tool_message)
1897
+
1898
+ except Exception as e:
1899
+ logging.error(f"Tool execution error for {function_name}: {e}")
1900
+ # Add error message to conversation
1901
+ error_message = self._create_tool_message(
1902
+ function_name, f"Error executing tool: {e}", tool_call_id, is_ollama
1903
+ )
1904
+ messages.append(error_message)
1905
+
1906
+ # Continue conversation after tool execution - get follow-up response
1907
+ try:
1908
+ follow_up_response = litellm.completion(
1909
+ **self._build_completion_params(
1910
+ messages=messages,
1911
+ tools=formatted_tools,
1912
+ temperature=temperature,
1913
+ stream=False,
1914
+ **kwargs
1915
+ )
1916
+ )
1917
+
1918
+ if follow_up_response and follow_up_response.choices:
1919
+ follow_up_content = follow_up_response.choices[0].message.content
1920
+ if follow_up_content:
1921
+ # Yield the follow-up response after tool execution
1922
+ yield follow_up_content
1923
+ except Exception as e:
1924
+ logging.error(f"Follow-up response failed: {e}")
1925
+
1926
+ except Exception as e:
1927
+ error_msg = str(e).lower()
1928
+
1929
+ # Provide more specific error messages based on the error type
1930
+ if any(keyword in error_msg for keyword in ['json', 'expecting property name', 'parse', 'decode']):
1931
+ logging.warning(f"Streaming failed due to JSON parsing errors (likely malformed chunks from provider): {e}")
1932
+ elif 'connection' in error_msg or 'timeout' in error_msg:
1933
+ logging.warning(f"Streaming failed due to connection issues: {e}")
1934
+ else:
1935
+ logging.error(f"Streaming failed with unexpected error: {e}")
1936
+
1937
+ # Fall back to non-streaming if streaming fails
1938
+ use_streaming = False
1939
+
1940
+ if not use_streaming:
1941
+ # Fall back to non-streaming and yield the complete response
1942
+ try:
1943
+ response = litellm.completion(
1944
+ **self._build_completion_params(
1945
+ messages=messages,
1946
+ tools=formatted_tools,
1947
+ temperature=temperature,
1948
+ stream=False,
1949
+ output_json=output_json,
1950
+ output_pydantic=output_pydantic,
1951
+ **kwargs
1952
+ )
1953
+ )
1954
+
1955
+ if response and response.choices:
1956
+ content = response.choices[0].message.content
1957
+ if content:
1958
+ # Yield the complete response as a single chunk
1959
+ yield content
1960
+
1961
+ except Exception as e:
1962
+ logging.error(f"Non-streaming fallback failed: {e}")
1963
+ raise
1964
+
1965
+ except Exception as e:
1966
+ logging.error(f"Error in get_response_stream: {e}")
1967
+ raise
1968
+
1560
1969
  def _is_gemini_model(self) -> bool:
1561
1970
  """Check if the model is a Gemini model."""
1562
1971
  if not self.model:
1563
1972
  return False
1564
1973
  return any(prefix in self.model.lower() for prefix in ['gemini', 'gemini/', 'google/gemini'])
1974
+
1975
+ def _is_streaming_error_recoverable(self, error: Exception) -> bool:
1976
+ """Check if a streaming error is recoverable (e.g., malformed chunk vs connection error)."""
1977
+ error_msg = str(error).lower()
1978
+
1979
+ # JSON parsing errors are often recoverable (skip malformed chunk and continue)
1980
+ json_error_keywords = ['json', 'expecting property name', 'parse', 'decode', 'invalid json']
1981
+ if any(keyword in error_msg for keyword in json_error_keywords):
1982
+ return True
1983
+
1984
+ # Connection errors might be temporary but are less recoverable in streaming context
1985
+ connection_error_keywords = ['connection', 'timeout', 'network', 'http']
1986
+ if any(keyword in error_msg for keyword in connection_error_keywords):
1987
+ return False
1988
+
1989
+ # Other errors are generally not recoverable
1990
+ return False
1565
1991
 
1566
1992
  async def get_response_async(
1567
1993
  self,