praisonaiagents 0.0.145__py3-none-any.whl → 0.0.146__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,11 +5,27 @@ Praison AI Agents - A package for hierarchical AI agent task execution
5
5
  # Configure logging before any other imports
6
6
  import os
7
7
  import logging
8
+ import warnings
9
+ import re
8
10
  from rich.logging import RichHandler
9
11
 
12
+ # Set environment variables to suppress warnings at the source
13
+ os.environ["LITELLM_TELEMETRY"] = "False"
14
+ os.environ["LITELLM_DROP_PARAMS"] = "True"
15
+ # Disable httpx warnings
16
+ os.environ["HTTPX_DISABLE_WARNINGS"] = "True"
17
+
10
18
  # Get log level from environment variable
11
19
  LOGLEVEL = os.environ.get('LOGLEVEL', 'INFO').upper()
12
20
 
21
+ # Determine if warnings should be suppressed (not in DEBUG mode and not in tests)
22
+ def _should_suppress_warnings():
23
+ import sys
24
+ return (LOGLEVEL != 'DEBUG' and
25
+ not hasattr(sys, '_called_from_test') and
26
+ 'pytest' not in sys.modules and
27
+ os.environ.get('PYTEST_CURRENT_TEST') is None)
28
+
13
29
  # Configure root logger
14
30
  logging.basicConfig(
15
31
  level=getattr(logging, LOGLEVEL, logging.INFO),
@@ -18,13 +34,34 @@ logging.basicConfig(
18
34
  handlers=[RichHandler(rich_tracebacks=True)]
19
35
  )
20
36
 
21
- # Suppress specific noisy loggers
22
- logging.getLogger("litellm").setLevel(logging.WARNING)
23
- logging.getLogger("litellm.utils").setLevel(logging.WARNING)
24
- logging.getLogger("markdown_it").setLevel(logging.WARNING)
25
- logging.getLogger("rich.markdown").setLevel(logging.WARNING)
26
- logging.getLogger("httpx").setLevel(logging.WARNING)
27
- logging.getLogger("httpcore").setLevel(logging.WARNING)
37
+ # Suppress specific noisy loggers - more aggressive suppression (only when not in DEBUG mode)
38
+ if _should_suppress_warnings():
39
+ logging.getLogger("litellm").setLevel(logging.CRITICAL)
40
+ logging.getLogger("litellm_logging").setLevel(logging.CRITICAL)
41
+ logging.getLogger("httpx").setLevel(logging.CRITICAL)
42
+ logging.getLogger("httpcore").setLevel(logging.CRITICAL)
43
+ logging.getLogger("pydantic").setLevel(logging.WARNING)
44
+ logging.getLogger("markdown_it").setLevel(logging.WARNING)
45
+ logging.getLogger("rich.markdown").setLevel(logging.WARNING)
46
+
47
+ # Note: litellm child loggers (litellm.utils, litellm.proxy, etc.) automatically inherit
48
+ # the CRITICAL level from the parent litellm logger due to Python's hierarchical logging
49
+
50
+ # Comprehensive warning suppression for litellm and dependencies (issue #1033)
51
+ # These warnings clutter output and are not actionable for users
52
+
53
+ # Set warning filter to suppress all warnings from problematic modules at import time
54
+ if _should_suppress_warnings():
55
+ # Module-specific warning suppression - applied before imports (only when not in DEBUG mode)
56
+ for module in ['litellm', 'httpx', 'httpcore', 'pydantic']:
57
+ warnings.filterwarnings("ignore", category=DeprecationWarning, module=module)
58
+ warnings.filterwarnings("ignore", category=UserWarning, module=module)
59
+
60
+ # Specific filters for known problematic warnings
61
+ warnings.filterwarnings("ignore", message="There is no current event loop")
62
+ warnings.filterwarnings("ignore", message=".*Use 'content=<...>' to upload raw bytes/text content.*")
63
+ warnings.filterwarnings("ignore", message=".*The `dict` method is deprecated; use `model_dump` instead.*")
64
+ warnings.filterwarnings("ignore", message=".*model_dump.*deprecated.*")
28
65
 
29
66
  from .agent.agent import Agent
30
67
  from .agent.image_agent import ImageAgent
@@ -94,6 +131,30 @@ except ImportError:
94
131
  # Add Agents as an alias for PraisonAIAgents
95
132
  Agents = PraisonAIAgents
96
133
 
134
+ # Additional warning suppression after all imports (runtime suppression)
135
+ if _should_suppress_warnings():
136
+ # Try to import and configure litellm to suppress its warnings
137
+ try:
138
+ import litellm
139
+ # Disable all litellm logging and telemetry
140
+ litellm.telemetry = False
141
+ litellm.drop_params = True
142
+ # Set litellm to suppress warnings
143
+ litellm.suppress_debug_info = True
144
+ if hasattr(litellm, '_logging_obj'):
145
+ litellm._logging_obj.setLevel(logging.CRITICAL)
146
+ except (ImportError, AttributeError):
147
+ pass
148
+
149
+ # Suppress pydantic warnings that might occur at runtime (safer approach)
150
+ try:
151
+ warnings.filterwarnings("ignore", category=UserWarning, module="pydantic",
152
+ message=".*model_dump.*deprecated.*")
153
+ warnings.filterwarnings("ignore", category=UserWarning, module="pydantic",
154
+ message=".*dict.*method.*deprecated.*")
155
+ except Exception:
156
+ pass
157
+
97
158
  # Apply telemetry auto-instrumentation after all imports
98
159
  if _telemetry_available:
99
160
  try:
@@ -4,7 +4,7 @@ import json
4
4
  import copy
5
5
  import logging
6
6
  import asyncio
7
- from typing import List, Optional, Any, Dict, Union, Literal, TYPE_CHECKING, Callable, Tuple
7
+ from typing import List, Optional, Any, Dict, Union, Literal, TYPE_CHECKING, Callable, Tuple, Generator
8
8
  from rich.console import Console
9
9
  from rich.live import Live
10
10
  from ..llm import (
@@ -206,7 +206,7 @@ class Agent:
206
206
  knowledge_config: Optional[Dict[str, Any]] = None,
207
207
  use_system_prompt: Optional[bool] = True,
208
208
  markdown: bool = True,
209
- stream: bool = True,
209
+ stream: bool = False,
210
210
  self_reflect: bool = False,
211
211
  max_reflect: int = 3,
212
212
  min_reflect: int = 1,
@@ -281,8 +281,8 @@ class Agent:
281
281
  conversations to establish agent behavior and context. Defaults to True.
282
282
  markdown (bool, optional): Enable markdown formatting in agent responses for better
283
283
  readability and structure. Defaults to True.
284
- stream (bool, optional): Enable streaming responses from the language model. Set to False
285
- for LLM providers that don't support streaming. Defaults to True.
284
+ stream (bool, optional): Enable streaming responses from the language model for real-time
285
+ output when using Agent.start() method. Defaults to False for backward compatibility.
286
286
  self_reflect (bool, optional): Enable self-reflection capabilities where the agent
287
287
  evaluates and improves its own responses. Defaults to False.
288
288
  max_reflect (int, optional): Maximum number of self-reflection iterations to prevent
@@ -1070,7 +1070,7 @@ Your Goal: {self.goal}"""
1070
1070
  tools=formatted_tools,
1071
1071
  start_time=start_time,
1072
1072
  console=self.console,
1073
- display_fn=display_generating,
1073
+ display_fn=self.display_generating if self.verbose else None,
1074
1074
  reasoning_steps=reasoning_steps
1075
1075
  )
1076
1076
 
@@ -1109,25 +1109,51 @@ Your Goal: {self.goal}"""
1109
1109
  reasoning_steps=reasoning_steps
1110
1110
  )
1111
1111
  else:
1112
- # Non-streaming with custom LLM
1113
- final_response = self.llm_instance.get_response(
1114
- prompt=messages[1:],
1115
- system_prompt=messages[0]['content'] if messages and messages[0]['role'] == 'system' else None,
1116
- temperature=temperature,
1117
- tools=formatted_tools if formatted_tools else None,
1118
- verbose=self.verbose,
1119
- markdown=self.markdown,
1120
- stream=stream,
1121
- console=self.console,
1122
- execute_tool_fn=self.execute_tool,
1123
- agent_name=self.name,
1124
- agent_role=self.role,
1125
- agent_tools=[t.__name__ for t in self.tools] if self.tools else None,
1126
- task_name=task_name,
1127
- task_description=task_description,
1128
- task_id=task_id,
1129
- reasoning_steps=reasoning_steps
1130
- )
1112
+ # Non-streaming with custom LLM - don't show streaming-like behavior
1113
+ if False: # Don't use display_generating when stream=False to avoid streaming-like behavior
1114
+ # This block is disabled to maintain consistency with the OpenAI path fix
1115
+ with Live(
1116
+ display_generating("", start_time),
1117
+ console=self.console,
1118
+ refresh_per_second=4,
1119
+ ) as live:
1120
+ final_response = self.llm_instance.get_response(
1121
+ prompt=messages[1:],
1122
+ system_prompt=messages[0]['content'] if messages and messages[0]['role'] == 'system' else None,
1123
+ temperature=temperature,
1124
+ tools=formatted_tools if formatted_tools else None,
1125
+ verbose=self.verbose,
1126
+ markdown=self.markdown,
1127
+ stream=stream,
1128
+ console=self.console,
1129
+ execute_tool_fn=self.execute_tool,
1130
+ agent_name=self.name,
1131
+ agent_role=self.role,
1132
+ agent_tools=[t.__name__ for t in self.tools] if self.tools else None,
1133
+ task_name=task_name,
1134
+ task_description=task_description,
1135
+ task_id=task_id,
1136
+ reasoning_steps=reasoning_steps
1137
+ )
1138
+ else:
1139
+ final_response = self.llm_instance.get_response(
1140
+ prompt=messages[1:],
1141
+ system_prompt=messages[0]['content'] if messages and messages[0]['role'] == 'system' else None,
1142
+ temperature=temperature,
1143
+ tools=formatted_tools if formatted_tools else None,
1144
+ verbose=self.verbose,
1145
+ markdown=self.markdown,
1146
+ stream=stream,
1147
+ console=self.console,
1148
+ execute_tool_fn=self.execute_tool,
1149
+ agent_name=self.name,
1150
+ agent_role=self.role,
1151
+ agent_tools=[t.__name__ for t in self.tools] if self.tools else None,
1152
+ task_name=task_name,
1153
+ task_description=task_description,
1154
+ task_id=task_id,
1155
+ reasoning_steps=reasoning_steps
1156
+ )
1131
1157
  else:
1132
1158
  # Use the standard OpenAI client approach with tool support
1133
1159
  # Note: openai_client expects tools in various formats and will format them internally
@@ -1143,7 +1169,7 @@ Your Goal: {self.goal}"""
1143
1169
  execute_tool_fn=self.execute_tool,
1144
1170
  stream=stream,
1145
1171
  console=self.console if (self.verbose or stream) else None,
1146
- display_fn=display_generating if stream else None,
1172
+ display_fn=self.display_generating if self.verbose else None,
1147
1173
  reasoning_steps=reasoning_steps,
1148
1174
  verbose=self.verbose,
1149
1175
  max_iterations=10
@@ -1187,8 +1213,32 @@ Your Goal: {self.goal}"""
1187
1213
  task_description=None, # Not available in this context
1188
1214
  task_id=None) # Not available in this context
1189
1215
  self._final_display_shown = True
1216
+
1217
+ def display_generating(self, content: str, start_time: float):
1218
+ """Display function for generating animation with agent info."""
1219
+ from rich.panel import Panel
1220
+ from rich.markdown import Markdown
1221
+ elapsed = time.time() - start_time
1222
+
1223
+ # Show content if provided (for both streaming and progressive display)
1224
+ if content:
1225
+ display_content = Markdown(content) if self.markdown else content
1226
+ return Panel(
1227
+ display_content,
1228
+ title=f"[bold]{self.name}[/bold] - Generating... {elapsed:.1f}s",
1229
+ border_style="green",
1230
+ expand=False
1231
+ )
1232
+ else:
1233
+ # No content yet: show generating message
1234
+ return Panel(
1235
+ f"[bold cyan]Generating response...[/bold cyan]",
1236
+ title=f"[bold]{self.name}[/bold] - {elapsed:.1f}s",
1237
+ border_style="cyan",
1238
+ expand=False
1239
+ )
1190
1240
 
1191
- def chat(self, prompt, temperature=0.2, tools=None, output_json=None, output_pydantic=None, reasoning_steps=False, stream=True, task_name=None, task_description=None, task_id=None):
1241
+ def chat(self, prompt, temperature=0.2, tools=None, output_json=None, output_pydantic=None, reasoning_steps=False, stream=None, task_name=None, task_description=None, task_id=None):
1192
1242
  # Reset the final display flag for each new conversation
1193
1243
  self._final_display_shown = False
1194
1244
 
@@ -1209,6 +1259,9 @@ Your Goal: {self.goal}"""
1209
1259
 
1210
1260
  start_time = time.time()
1211
1261
  reasoning_steps = reasoning_steps or self.reasoning_steps
1262
+ # Use agent's stream setting if not explicitly provided
1263
+ if stream is None:
1264
+ stream = self.stream
1212
1265
  # Search for existing knowledge if any knowledge is provided
1213
1266
  if self.knowledge:
1214
1267
  search_results = self.knowledge.search(prompt, agent_id=self.agent_id)
@@ -1359,7 +1412,7 @@ Your Goal: {self.goal}"""
1359
1412
  agent_tools=agent_tools
1360
1413
  )
1361
1414
 
1362
- response = self._chat_completion(messages, temperature=temperature, tools=tools if tools else None, reasoning_steps=reasoning_steps, stream=self.stream, task_name=task_name, task_description=task_description, task_id=task_id)
1415
+ response = self._chat_completion(messages, temperature=temperature, tools=tools if tools else None, reasoning_steps=reasoning_steps, stream=stream, task_name=task_name, task_description=task_description, task_id=task_id)
1363
1416
  if not response:
1364
1417
  # Rollback chat history on response failure
1365
1418
  self.chat_history = self.chat_history[:chat_history_length]
@@ -1848,7 +1901,12 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1848
1901
  for tool_call in message.tool_calls:
1849
1902
  try:
1850
1903
  function_name = tool_call.function.name
1851
- arguments = json.loads(tool_call.function.arguments)
1904
+ # Parse JSON arguments safely
1905
+ try:
1906
+ arguments = json.loads(tool_call.function.arguments)
1907
+ except json.JSONDecodeError as json_error:
1908
+ logging.error(f"Failed to parse tool arguments as JSON: {json_error}")
1909
+ arguments = {}
1852
1910
 
1853
1911
  # Find the matching tool
1854
1912
  tool = next((t for t in tools if t.__name__ == function_name), None)
@@ -1890,25 +1948,16 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1890
1948
  chunks = []
1891
1949
  start_time = time.time()
1892
1950
 
1893
- with Live(
1894
- display_generating("", start_time),
1895
- console=self.console,
1896
- refresh_per_second=4,
1897
- transient=True,
1898
- vertical_overflow="ellipsis",
1899
- auto_refresh=True
1900
- ) as live:
1901
- async for chunk in final_response:
1902
- chunks.append(chunk)
1903
- if chunk.choices[0].delta.content:
1904
- full_response_text += chunk.choices[0].delta.content
1905
- live.update(display_generating(full_response_text, start_time))
1906
-
1907
- if reasoning_steps and hasattr(chunk.choices[0].delta, "reasoning_content"):
1908
- rc = chunk.choices[0].delta.reasoning_content
1909
- if rc:
1910
- reasoning_content += rc
1911
- live.update(display_generating(f"{full_response_text}\n[Reasoning: {reasoning_content}]", start_time))
1951
+ # Process stream without display_generating since streaming is active
1952
+ async for chunk in final_response:
1953
+ chunks.append(chunk)
1954
+ if chunk.choices[0].delta.content:
1955
+ full_response_text += chunk.choices[0].delta.content
1956
+
1957
+ if reasoning_steps and hasattr(chunk.choices[0].delta, "reasoning_content"):
1958
+ rc = chunk.choices[0].delta.reasoning_content
1959
+ if rc:
1960
+ reasoning_content += rc
1912
1961
 
1913
1962
  self.console.print()
1914
1963
 
@@ -1937,7 +1986,268 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1937
1986
 
1938
1987
  def start(self, prompt: str, **kwargs):
1939
1988
  """Start the agent with a prompt. This is a convenience method that wraps chat()."""
1940
- return self.chat(prompt, **kwargs)
1989
+ # Check if streaming is enabled (either from kwargs or agent's stream attribute)
1990
+ stream_enabled = kwargs.get('stream', getattr(self, 'stream', False))
1991
+
1992
+ if stream_enabled:
1993
+ # Return a generator for streaming response
1994
+ return self._start_stream(prompt, **kwargs)
1995
+ else:
1996
+ # Return regular chat response for backward compatibility
1997
+ # Explicitly pass the resolved stream parameter to avoid chat() method default
1998
+ kwargs['stream'] = stream_enabled
1999
+ return self.chat(prompt, **kwargs)
2000
+
2001
+ def _start_stream(self, prompt: str, **kwargs) -> Generator[str, None, None]:
2002
+ """Stream generator for real-time response chunks."""
2003
+ try:
2004
+ # Reset the final display flag for each new conversation
2005
+ self._final_display_shown = False
2006
+
2007
+ # Temporarily disable verbose mode to prevent console output conflicts during streaming
2008
+ original_verbose = self.verbose
2009
+ self.verbose = False
2010
+
2011
+ # For custom LLM path, use the new get_response_stream generator
2012
+ if self._using_custom_llm:
2013
+ # Handle knowledge search
2014
+ actual_prompt = prompt
2015
+ if self.knowledge:
2016
+ search_results = self.knowledge.search(prompt, agent_id=self.agent_id)
2017
+ if search_results:
2018
+ if isinstance(search_results, dict) and 'results' in search_results:
2019
+ knowledge_content = "\n".join([result['memory'] for result in search_results['results']])
2020
+ else:
2021
+ knowledge_content = "\n".join(search_results)
2022
+ actual_prompt = f"{prompt}\n\nKnowledge: {knowledge_content}"
2023
+
2024
+ # Handle tools properly
2025
+ tools = kwargs.get('tools', self.tools)
2026
+ if tools is None or (isinstance(tools, list) and len(tools) == 0):
2027
+ tool_param = self.tools
2028
+ else:
2029
+ tool_param = tools
2030
+
2031
+ # Convert MCP tools if needed
2032
+ if tool_param is not None:
2033
+ from ..mcp.mcp import MCP
2034
+ if isinstance(tool_param, MCP) and hasattr(tool_param, 'to_openai_tool'):
2035
+ openai_tool = tool_param.to_openai_tool()
2036
+ if openai_tool:
2037
+ if isinstance(openai_tool, list):
2038
+ tool_param = openai_tool
2039
+ else:
2040
+ tool_param = [openai_tool]
2041
+
2042
+ # Store chat history length for potential rollback
2043
+ chat_history_length = len(self.chat_history)
2044
+
2045
+ # Normalize prompt content for chat history
2046
+ normalized_content = actual_prompt
2047
+ if isinstance(actual_prompt, list):
2048
+ normalized_content = next((item["text"] for item in actual_prompt if item.get("type") == "text"), "")
2049
+
2050
+ # Prevent duplicate messages in chat history
2051
+ if not (self.chat_history and
2052
+ self.chat_history[-1].get("role") == "user" and
2053
+ self.chat_history[-1].get("content") == normalized_content):
2054
+ self.chat_history.append({"role": "user", "content": normalized_content})
2055
+
2056
+ try:
2057
+ # Use the new streaming generator from LLM class
2058
+ response_content = ""
2059
+ for chunk in self.llm_instance.get_response_stream(
2060
+ prompt=actual_prompt,
2061
+ system_prompt=self._build_system_prompt(tool_param),
2062
+ chat_history=self.chat_history,
2063
+ temperature=kwargs.get('temperature', 0.2),
2064
+ tools=tool_param,
2065
+ output_json=kwargs.get('output_json'),
2066
+ output_pydantic=kwargs.get('output_pydantic'),
2067
+ verbose=False, # Keep verbose false for streaming
2068
+ markdown=self.markdown,
2069
+ agent_name=self.name,
2070
+ agent_role=self.role,
2071
+ agent_tools=[t.__name__ if hasattr(t, '__name__') else str(t) for t in (tool_param or [])],
2072
+ task_name=kwargs.get('task_name'),
2073
+ task_description=kwargs.get('task_description'),
2074
+ task_id=kwargs.get('task_id'),
2075
+ execute_tool_fn=self.execute_tool
2076
+ ):
2077
+ response_content += chunk
2078
+ yield chunk
2079
+
2080
+ # Add complete response to chat history
2081
+ if response_content:
2082
+ self.chat_history.append({"role": "assistant", "content": response_content})
2083
+
2084
+ except Exception as e:
2085
+ # Rollback chat history on error
2086
+ self.chat_history = self.chat_history[:chat_history_length]
2087
+ logging.error(f"Custom LLM streaming error: {e}")
2088
+ raise
2089
+
2090
+ else:
2091
+ # For OpenAI-style models, implement proper streaming without display
2092
+ # Handle knowledge search
2093
+ actual_prompt = prompt
2094
+ if self.knowledge:
2095
+ search_results = self.knowledge.search(prompt, agent_id=self.agent_id)
2096
+ if search_results:
2097
+ if isinstance(search_results, dict) and 'results' in search_results:
2098
+ knowledge_content = "\n".join([result['memory'] for result in search_results['results']])
2099
+ else:
2100
+ knowledge_content = "\n".join(search_results)
2101
+ actual_prompt = f"{prompt}\n\nKnowledge: {knowledge_content}"
2102
+
2103
+ # Handle tools properly
2104
+ tools = kwargs.get('tools', self.tools)
2105
+ if tools is None or (isinstance(tools, list) and len(tools) == 0):
2106
+ tool_param = self.tools
2107
+ else:
2108
+ tool_param = tools
2109
+
2110
+ # Build messages using the helper method
2111
+ messages, original_prompt = self._build_messages(actual_prompt, kwargs.get('temperature', 0.2),
2112
+ kwargs.get('output_json'), kwargs.get('output_pydantic'))
2113
+
2114
+ # Store chat history length for potential rollback
2115
+ chat_history_length = len(self.chat_history)
2116
+
2117
+ # Normalize original_prompt for consistent chat history storage
2118
+ normalized_content = original_prompt
2119
+ if isinstance(original_prompt, list):
2120
+ normalized_content = next((item["text"] for item in original_prompt if item.get("type") == "text"), "")
2121
+
2122
+ # Prevent duplicate messages in chat history
2123
+ if not (self.chat_history and
2124
+ self.chat_history[-1].get("role") == "user" and
2125
+ self.chat_history[-1].get("content") == normalized_content):
2126
+ self.chat_history.append({"role": "user", "content": normalized_content})
2127
+
2128
+ try:
2129
+ # Check if OpenAI client is available
2130
+ if self._openai_client is None:
2131
+ raise ValueError("OpenAI client is not initialized. Please provide OPENAI_API_KEY or use a custom LLM provider.")
2132
+
2133
+ # Format tools for OpenAI
2134
+ formatted_tools = self._format_tools_for_completion(tool_param)
2135
+
2136
+ # Create streaming completion directly without display function
2137
+ completion_args = {
2138
+ "model": self.llm,
2139
+ "messages": messages,
2140
+ "temperature": kwargs.get('temperature', 0.2),
2141
+ "stream": True
2142
+ }
2143
+ if formatted_tools:
2144
+ completion_args["tools"] = formatted_tools
2145
+
2146
+ completion = self._openai_client.sync_client.chat.completions.create(**completion_args)
2147
+
2148
+ # Stream the response chunks without display
2149
+ response_text = ""
2150
+ tool_calls_data = []
2151
+
2152
+ for chunk in completion:
2153
+ delta = chunk.choices[0].delta
2154
+
2155
+ # Handle text content
2156
+ if delta.content is not None:
2157
+ chunk_content = delta.content
2158
+ response_text += chunk_content
2159
+ yield chunk_content
2160
+
2161
+ # Handle tool calls (accumulate but don't yield as chunks)
2162
+ if hasattr(delta, 'tool_calls') and delta.tool_calls:
2163
+ for tool_call_delta in delta.tool_calls:
2164
+ # Extend tool_calls_data list to accommodate the tool call index
2165
+ while len(tool_calls_data) <= tool_call_delta.index:
2166
+ tool_calls_data.append({'id': '', 'function': {'name': '', 'arguments': ''}})
2167
+
2168
+ # Accumulate tool call data
2169
+ if tool_call_delta.id:
2170
+ tool_calls_data[tool_call_delta.index]['id'] = tool_call_delta.id
2171
+ if tool_call_delta.function.name:
2172
+ tool_calls_data[tool_call_delta.index]['function']['name'] = tool_call_delta.function.name
2173
+ if tool_call_delta.function.arguments:
2174
+ tool_calls_data[tool_call_delta.index]['function']['arguments'] += tool_call_delta.function.arguments
2175
+
2176
+ # Handle any tool calls that were accumulated
2177
+ if tool_calls_data:
2178
+ # Add assistant message with tool calls to chat history
2179
+ assistant_message = {"role": "assistant", "content": response_text}
2180
+ if tool_calls_data:
2181
+ assistant_message["tool_calls"] = [
2182
+ {
2183
+ "id": tc['id'],
2184
+ "type": "function",
2185
+ "function": tc['function']
2186
+ } for tc in tool_calls_data if tc['id']
2187
+ ]
2188
+ self.chat_history.append(assistant_message)
2189
+
2190
+ # Execute tool calls and add results to chat history
2191
+ for tool_call in tool_calls_data:
2192
+ if tool_call['id'] and tool_call['function']['name']:
2193
+ try:
2194
+ # Parse JSON arguments safely
2195
+ try:
2196
+ parsed_args = json.loads(tool_call['function']['arguments']) if tool_call['function']['arguments'] else {}
2197
+ except json.JSONDecodeError as json_error:
2198
+ logging.error(f"Failed to parse tool arguments as JSON: {json_error}")
2199
+ parsed_args = {}
2200
+
2201
+ tool_result = self.execute_tool(
2202
+ tool_call['function']['name'],
2203
+ parsed_args
2204
+ )
2205
+ # Add tool result to chat history
2206
+ self.chat_history.append({
2207
+ "role": "tool",
2208
+ "tool_call_id": tool_call['id'],
2209
+ "content": str(tool_result)
2210
+ })
2211
+ except Exception as tool_error:
2212
+ logging.error(f"Tool execution error in streaming: {tool_error}")
2213
+ # Add error result to chat history
2214
+ self.chat_history.append({
2215
+ "role": "tool",
2216
+ "tool_call_id": tool_call['id'],
2217
+ "content": f"Error: {str(tool_error)}"
2218
+ })
2219
+ else:
2220
+ # Add complete response to chat history (text-only response)
2221
+ if response_text:
2222
+ self.chat_history.append({"role": "assistant", "content": response_text})
2223
+
2224
+ except Exception as e:
2225
+ # Rollback chat history on error
2226
+ self.chat_history = self.chat_history[:chat_history_length]
2227
+ logging.error(f"OpenAI streaming error: {e}")
2228
+ # Fall back to simulated streaming
2229
+ response = self.chat(prompt, **kwargs)
2230
+ if response:
2231
+ words = str(response).split()
2232
+ chunk_size = max(1, len(words) // 20)
2233
+ for i in range(0, len(words), chunk_size):
2234
+ chunk_words = words[i:i + chunk_size]
2235
+ chunk = ' '.join(chunk_words)
2236
+ if i + chunk_size < len(words):
2237
+ chunk += ' '
2238
+ yield chunk
2239
+
2240
+ # Restore original verbose mode
2241
+ self.verbose = original_verbose
2242
+
2243
+ except Exception as e:
2244
+ # Restore verbose mode on any error
2245
+ self.verbose = original_verbose
2246
+ # Graceful fallback to non-streaming if streaming fails
2247
+ logging.warning(f"Streaming failed, falling back to regular response: {e}")
2248
+ response = self.chat(prompt, **kwargs)
2249
+ if response:
2250
+ yield response
1941
2251
 
1942
2252
  def execute(self, task, context=None):
1943
2253
  """Execute a task synchronously - backward compatibility method"""
@@ -1,19 +1,36 @@
1
1
  import logging
2
2
  import warnings
3
3
  import os
4
+ import re
4
5
 
5
6
  # Disable litellm telemetry before any imports
6
7
  os.environ["LITELLM_TELEMETRY"] = "False"
7
8
 
8
- # Suppress all relevant logs at module level - consistent with main __init__.py
9
- logging.getLogger("litellm").setLevel(logging.WARNING)
10
- logging.getLogger("openai").setLevel(logging.WARNING)
11
- logging.getLogger("httpx").setLevel(logging.WARNING)
12
- logging.getLogger("httpcore").setLevel(logging.WARNING)
13
- logging.getLogger("pydantic").setLevel(logging.WARNING)
9
+ # Check if warnings should be suppressed (consistent with main __init__.py)
10
+ def _should_suppress_warnings():
11
+ import sys
12
+ LOGLEVEL = os.environ.get('LOGLEVEL', 'INFO').upper()
13
+ return (LOGLEVEL != 'DEBUG' and
14
+ not hasattr(sys, '_called_from_test') and
15
+ 'pytest' not in sys.modules and
16
+ os.environ.get('PYTEST_CURRENT_TEST') is None)
14
17
 
15
- # Suppress pydantic warnings
16
- warnings.filterwarnings("ignore", category=UserWarning, module="pydantic")
18
+ # Suppress all relevant logs at module level - more aggressive suppression consistent with main __init__.py (only when not in DEBUG mode)
19
+ if _should_suppress_warnings():
20
+ logging.getLogger("litellm").setLevel(logging.CRITICAL)
21
+ logging.getLogger("openai").setLevel(logging.WARNING)
22
+ logging.getLogger("httpx").setLevel(logging.CRITICAL)
23
+ logging.getLogger("httpcore").setLevel(logging.CRITICAL)
24
+ logging.getLogger("pydantic").setLevel(logging.WARNING)
25
+
26
+ # Note: litellm child loggers automatically inherit the CRITICAL level from the parent logger
27
+
28
+ # Warning filters are centrally managed in the main __init__.py file
29
+ # Apply additional local suppression for safety during LLM imports (only when not in DEBUG mode)
30
+ if _should_suppress_warnings():
31
+ for module in ['litellm', 'httpx', 'httpcore', 'pydantic']:
32
+ warnings.filterwarnings("ignore", category=DeprecationWarning, module=module)
33
+ warnings.filterwarnings("ignore", category=UserWarning, module=module)
17
34
 
18
35
  # Import after suppressing warnings
19
36
  from .llm import LLM, LLMContextLengthExceededException
@@ -40,12 +57,21 @@ from .model_router import (
40
57
  create_routing_agent
41
58
  )
42
59
 
43
- # Ensure telemetry is disabled after import as well
44
- try:
45
- import litellm
46
- litellm.telemetry = False
47
- except ImportError:
48
- pass
60
+ # Ensure comprehensive litellm configuration after import (only when not in DEBUG mode)
61
+ if _should_suppress_warnings():
62
+ try:
63
+ import litellm
64
+ # Disable all litellm logging and telemetry features
65
+ litellm.telemetry = False
66
+ litellm.drop_params = True
67
+ if hasattr(litellm, 'suppress_debug_info'):
68
+ litellm.suppress_debug_info = True
69
+ # Set all litellm loggers to CRITICAL level
70
+ if hasattr(litellm, '_logging_obj'):
71
+ litellm._logging_obj.setLevel(logging.CRITICAL)
72
+ # Note: Child loggers inherit from parent, no need to iterate over all loggers
73
+ except ImportError:
74
+ pass
49
75
 
50
76
  __all__ = [
51
77
  "LLM",