PyPI - praisonaiagents - Versions diffs - 0.0.50__py3-none-any.whl → 0.0.52__py3-none-any.whl - Mend

praisonaiagents 0.0.50py3-none-any.whl → 0.0.52py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

praisonaiagents/agent/agent.py CHANGED Viewed

@@ -21,10 +21,137 @@ from ..main import (
 )
 import inspect
 import uuid
+from dataclasses import dataclass
 if TYPE_CHECKING:
     from ..task.task import Task
+@dataclass
+class ChatCompletionMessage:
+    content: str
+    role: str = "assistant"
+    refusal: Optional[str] = None
+    audio: Optional[str] = None
+    function_call: Optional[dict] = None
+    tool_calls: Optional[List] = None
+    reasoning_content: Optional[str] = None
+@dataclass
+class Choice:
+    finish_reason: Optional[str]
+    index: int
+    message: ChatCompletionMessage
+    logprobs: Optional[dict] = None
+@dataclass
+class CompletionTokensDetails:
+    accepted_prediction_tokens: Optional[int] = None
+    audio_tokens: Optional[int] = None
+    reasoning_tokens: Optional[int] = None
+    rejected_prediction_tokens: Optional[int] = None
+@dataclass
+class PromptTokensDetails:
+    audio_tokens: Optional[int] = None
+    cached_tokens: int = 0
+@dataclass
+class CompletionUsage:
+    completion_tokens: int = 0
+    prompt_tokens: int = 0
+    total_tokens: int = 0
+    completion_tokens_details: Optional[CompletionTokensDetails] = None
+    prompt_tokens_details: Optional[PromptTokensDetails] = None
+    prompt_cache_hit_tokens: int = 0
+    prompt_cache_miss_tokens: int = 0
+@dataclass
+class ChatCompletion:
+    id: str
+    choices: List[Choice]
+    created: int
+    model: str
+    object: str = "chat.completion"
+    system_fingerprint: Optional[str] = None
+    service_tier: Optional[str] = None
+    usage: Optional[CompletionUsage] = None
+def process_stream_chunks(chunks):
+    """Process streaming chunks into combined response"""
+    if not chunks:
+        return None
+    try:
+        first_chunk = chunks[0]
+        last_chunk = chunks[-1]
+        # Basic metadata
+        id = getattr(first_chunk, "id", None)
+        created = getattr(first_chunk, "created", None)
+        model = getattr(first_chunk, "model", None)
+        system_fingerprint = getattr(first_chunk, "system_fingerprint", None)
+        # Track usage
+        completion_tokens = 0
+        prompt_tokens = 0
+        content_list = []
+        reasoning_list = []
+        for chunk in chunks:
+            if not hasattr(chunk, "choices") or not chunk.choices:
+                continue
+            # Track usage from each chunk
+            if hasattr(chunk, "usage"):
+                completion_tokens += getattr(chunk.usage, "completion_tokens", 0)
+                prompt_tokens += getattr(chunk.usage, "prompt_tokens", 0)
+            delta = getattr(chunk.choices[0], "delta", None)
+            if not delta:
+                continue
+            if hasattr(delta, "content") and delta.content:
+                content_list.append(delta.content)
+            if hasattr(delta, "reasoning_content") and delta.reasoning_content:
+                reasoning_list.append(delta.reasoning_content)
+        combined_content = "".join(content_list) if content_list else ""
+        combined_reasoning = "".join(reasoning_list) if reasoning_list else None
+        finish_reason = getattr(last_chunk.choices[0], "finish_reason", None) if hasattr(last_chunk, "choices") and last_chunk.choices else None
+        message = ChatCompletionMessage(
+            content=combined_content,
+            reasoning_content=combined_reasoning
+        )
+        choice = Choice(
+            finish_reason=finish_reason,
+            index=0,
+            message=message
+        )
+        usage = CompletionUsage(
+            completion_tokens=completion_tokens,
+            prompt_tokens=prompt_tokens,
+            total_tokens=completion_tokens + prompt_tokens,
+            completion_tokens_details=CompletionTokensDetails(),
+            prompt_tokens_details=PromptTokensDetails()
+        )
+        return ChatCompletion(
+            id=id,
+            choices=[choice],
+            created=created,
+            model=model,
+            system_fingerprint=system_fingerprint,
+            usage=usage
+        )
+    except Exception as e:
+        print(f"Error processing chunks: {e}")
+        return None
 class Agent:
     def _generate_tool_definition(self, function_name):
         """
@@ -190,7 +317,8 @@ class Agent:
         max_reflect: int = 3,
         min_reflect: int = 1,
         reflect_llm: Optional[str] = None,
-        user_id: Optional[str] = None
+        user_id: Optional[str] = None,
+        show_reasoning: bool = False
     ):
         # Add check at start if memory is requested
         if memory is not None:
@@ -298,6 +426,7 @@ Your Goal: {self.goal}
         # Store user_id
         self.user_id = user_id or "praison"
+        self.show_reasoning = show_reasoning
         # Check if knowledge parameter has any values
         if not knowledge:
@@ -399,7 +528,7 @@ Your Goal: {self.goal}
     def __str__(self):
         return f"Agent(name='{self.name}', role='{self.role}', goal='{self.goal}')"
-    def _chat_completion(self, messages, temperature=0.2, tools=None, stream=True):
+    def _chat_completion(self, messages, temperature=0.2, tools=None, stream=True, show_reasoning=False):
         start_time = time.time()
         logging.debug(f"{self.name} sending messages to LLM: {messages}")
@@ -469,30 +598,35 @@ Your Goal: {self.goal}
                     stream=True
                 )
                 full_response_text = ""
+                reasoning_content = ""
+                chunks = []
                 # Create Live display with proper configuration
                 with Live(
                     display_generating("", start_time),
                     console=self.console,
                     refresh_per_second=4,
-                    transient=True,  # Changed to False to preserve output
+                    transient=True,
                     vertical_overflow="ellipsis",
                     auto_refresh=True
                 ) as live:
                     for chunk in response_stream:
+                        chunks.append(chunk)
                         if chunk.choices[0].delta.content:
                             full_response_text += chunk.choices[0].delta.content
                             live.update(display_generating(full_response_text, start_time))
+                        # Update live display with reasoning content if enabled
+                        if show_reasoning and hasattr(chunk.choices[0].delta, "reasoning_content"):
+                            rc = chunk.choices[0].delta.reasoning_content
+                            if rc:
+                                reasoning_content += rc
+                                live.update(display_generating(f"{full_response_text}\n[Reasoning: {reasoning_content}]", start_time))
                 # Clear the last generating display with a blank line
                 self.console.print()
-                final_response = client.chat.completions.create(
-                    model=self.llm,
-                    messages=messages,
-                    temperature=temperature,
-                    stream=False
-                )
+                final_response = process_stream_chunks(chunks)
                 return final_response
             else:
                 if tool_calls:
@@ -510,7 +644,8 @@ Your Goal: {self.goal}
             display_error(f"Error in chat completion: {e}")
             return None
-    def chat(self, prompt, temperature=0.2, tools=None, output_json=None, output_pydantic=None):
+    def chat(self, prompt, temperature=0.2, tools=None, output_json=None, output_pydantic=None, show_reasoning=False):
+        show_reasoning = show_reasoning or self.show_reasoning
         # Search for existing knowledge if any knowledge is provided
         if self.knowledge:
             search_results = self.knowledge.search(prompt, agent_id=self.agent_id)
@@ -546,7 +681,8 @@ Your Goal: {self.goal}
                     agent_name=self.name,
                     agent_role=self.role,
                     agent_tools=[t.__name__ if hasattr(t, '__name__') else str(t) for t in self.tools],
-                    execute_tool_fn=self.execute_tool  # Pass tool execution function
+                    execute_tool_fn=self.execute_tool,  # Pass tool execution function
+                    show_reasoning=show_reasoning
                 )
                 self.chat_history.append({"role": "user", "content": prompt})
@@ -616,7 +752,7 @@ Your Goal: {self.goal}
                                 agent_tools=agent_tools
                             )
-                    response = self._chat_completion(messages, temperature=temperature, tools=tools if tools else None)
+                    response = self._chat_completion(messages, temperature=temperature, tools=tools if tools else None, show_reasoning=show_reasoning)
                     if not response:
                         return None
@@ -749,8 +885,9 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
             cleaned = cleaned[:-3].strip()
         return cleaned
-    async def achat(self, prompt: str, temperature=0.2, tools=None, output_json=None, output_pydantic=None):
+    async def achat(self, prompt: str, temperature=0.2, tools=None, output_json=None, output_pydantic=None, show_reasoning=False):
         """Async version of chat method. TODO: Requires Syncing with chat method."""
+        show_reasoning = show_reasoning or self.show_reasoning
         try:
             # Search for existing knowledge if any knowledge is provided
             if self.knowledge:
@@ -781,7 +918,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                         agent_name=self.name,
                         agent_role=self.role,
                         agent_tools=[t.__name__ if hasattr(t, '__name__') else str(t) for t in self.tools],
-                        execute_tool_fn=self.execute_tool_async
+                        execute_tool_fn=self.execute_tool_async,
+                        show_reasoning=show_reasoning
                     )
                     self.chat_history.append({"role": "user", "content": prompt})
@@ -896,7 +1034,7 @@ Your Goal: {self.goal}
             display_error(f"Error in achat: {e}")
             return None
-    async def _achat_completion(self, response, tools):
+    async def _achat_completion(self, response, tools, show_reasoning=False):
         """Async version of _chat_completion method"""
         try:
             message = response.choices[0].message
@@ -942,9 +1080,39 @@ Your Goal: {self.goal}
                         final_response = await async_client.chat.completions.create(
                             model=self.llm,
                             messages=messages,
-                            temperature=0.2
+                            temperature=0.2,
+                            stream=True
                         )
-                        return final_response.choices[0].message.content
+                        full_response_text = ""
+                        reasoning_content = ""
+                        chunks = []
+                        start_time = time.time()
+                        with Live(
+                            display_generating("", start_time),
+                            console=self.console,
+                            refresh_per_second=4,
+                            transient=True,
+                            vertical_overflow="ellipsis",
+                            auto_refresh=True
+                        ) as live:
+                            async for chunk in final_response:
+                                chunks.append(chunk)
+                                if chunk.choices[0].delta.content:
+                                    full_response_text += chunk.choices[0].delta.content
+                                    live.update(display_generating(full_response_text, start_time))
+                                if show_reasoning and hasattr(chunk.choices[0].delta, "reasoning_content"):
+                                    rc = chunk.choices[0].delta.reasoning_content
+                                    if rc:
+                                        reasoning_content += rc
+                                        live.update(display_generating(f"{full_response_text}\n[Reasoning: {reasoning_content}]", start_time))
+                        self.console.print()
+                        final_response = process_stream_chunks(chunks)
+                        return final_response.choices[0].message.content if final_response else full_response_text
                     except Exception as e:
                         display_error(f"Error in final chat completion: {e}")
                         return formatted_results
@@ -952,7 +1120,7 @@ Your Goal: {self.goal}
             return None
         except Exception as e:
             display_error(f"Error in _achat_completion: {e}")
-            return None
+            return None
     async def astart(self, prompt: str, **kwargs):
         """Async version of start method"""

praisonaiagents/llm/llm.py CHANGED Viewed

@@ -113,6 +113,7 @@ class LLM:
             litellm.callbacks = []
             # Additional logging suppression
             litellm.suppress_debug_messages = True
+            litellm._logging._disable_debugging()
             logging.getLogger("litellm.utils").setLevel(logging.WARNING)
             logging.getLogger("litellm.main").setLevel(logging.WARNING)
         except ImportError:
@@ -147,6 +148,7 @@ class LLM:
         self.self_reflect = extra_settings.get('self_reflect', False)
         self.max_reflect = extra_settings.get('max_reflect', 3)
         self.min_reflect = extra_settings.get('min_reflect', 1)
+        self.show_reasoning = extra_settings.get('show_reasoning', False)
         # Enable error dropping for cleaner output
         litellm.drop_params = True
@@ -176,7 +178,7 @@ class LLM:
         """Enhanced get_response with all OpenAI-like features"""
         try:
             import litellm
+            show_reasoning = kwargs.get('show_reasoning', self.show_reasoning)
             # Disable litellm debug messages
             litellm.set_verbose = False
@@ -230,8 +232,55 @@ class LLM:
                     # Get response from LiteLLM
                     start_time = time.time()
-                    if verbose:
-                        with Live(display_generating("", start_time), console=console, refresh_per_second=4) as live:
+                    # If show_reasoning is True, do a single non-streaming call
+                    if show_reasoning:
+                        resp = litellm.completion(
+                            model=self.model,
+                            messages=messages,
+                            temperature=temperature,
+                            stream=False,  # force non-streaming
+                            **kwargs
+                        )
+                        reasoning_content = resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
+                        response_text = resp["choices"][0]["message"]["content"]
+                        # Optionally display reasoning if present
+                        if verbose and reasoning_content:
+                            display_interaction(
+                                original_prompt,
+                                f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
+                                markdown=markdown,
+                                generation_time=time.time() - start_time,
+                                console=console
+                            )
+                        else:
+                            display_interaction(
+                                original_prompt,
+                                response_text,
+                                markdown=markdown,
+                                generation_time=time.time() - start_time,
+                                console=console
+                            )
+                    # Otherwise do the existing streaming approach
+                    else:
+                        if verbose:
+                            with Live(display_generating("", start_time), console=console, refresh_per_second=4) as live:
+                                response_text = ""
+                                for chunk in litellm.completion(
+                                    model=self.model,
+                                    messages=messages,
+                                    temperature=temperature,
+                                    stream=True,
+                                    **kwargs
+                                ):
+                                    if chunk and chunk.choices and chunk.choices[0].delta.content:
+                                        content = chunk.choices[0].delta.content
+                                        response_text += content
+                                        live.update(display_generating(response_text, start_time))
+                        else:
+                            # Non-verbose mode, just collect the response
                             response_text = ""
                             for chunk in litellm.completion(
                                 model=self.model,
@@ -241,23 +290,9 @@ class LLM:
                                 **kwargs
                             ):
                                 if chunk and chunk.choices and chunk.choices[0].delta.content:
-                                    content = chunk.choices[0].delta.content
-                                    response_text += content
-                                    live.update(display_generating(response_text, start_time))
-                    else:
-                        # Non-verbose mode, just collect the response
-                        response_text = ""
-                        for chunk in litellm.completion(
-                            model=self.model,
-                            messages=messages,
-                            temperature=temperature,
-                            stream=True,
-                            **kwargs
-                        ):
-                            if chunk and chunk.choices and chunk.choices[0].delta.content:
-                                response_text += chunk.choices[0].delta.content
+                                    response_text += chunk.choices[0].delta.content
-                    response_text = response_text.strip()
+                        response_text = response_text.strip()
                     # Get final completion to check for tool calls
                     final_response = litellm.completion(
@@ -302,9 +337,53 @@ class LLM:
                                     "content": "Function returned an empty output"
                                 })
-                        # Get response after tool calls with streaming
-                        if verbose:
-                            with Live(display_generating("", start_time), console=console, refresh_per_second=4) as live:
+                        # If show_reasoning is True, do a single non-streaming call
+                        if show_reasoning:
+                            resp = litellm.completion(
+                                model=self.model,
+                                messages=messages,
+                                temperature=temperature,
+                                stream=False,  # force non-streaming
+                                **kwargs
+                            )
+                            reasoning_content = resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
+                            response_text = resp["choices"][0]["message"]["content"]
+                            # Optionally display reasoning if present
+                            if verbose and reasoning_content:
+                                display_interaction(
+                                    original_prompt,
+                                    f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
+                                    markdown=markdown,
+                                    generation_time=time.time() - start_time,
+                                    console=console
+                                )
+                            else:
+                                display_interaction(
+                                    original_prompt,
+                                    response_text,
+                                    markdown=markdown,
+                                    generation_time=time.time() - start_time,
+                                    console=console
+                                )
+                        # Otherwise do the existing streaming approach
+                        else:
+                            # Get response after tool calls with streaming
+                            if verbose:
+                                with Live(display_generating("", start_time), console=console, refresh_per_second=4) as live:
+                                    response_text = ""
+                                    for chunk in litellm.completion(
+                                        model=self.model,
+                                        messages=messages,
+                                        temperature=temperature,
+                                        stream=True
+                                    ):
+                                        if chunk and chunk.choices and chunk.choices[0].delta.content:
+                                            content = chunk.choices[0].delta.content
+                                            response_text += content
+                                            live.update(display_generating(response_text, start_time))
+                            else:
                                 response_text = ""
                                 for chunk in litellm.completion(
                                     model=self.model,
@@ -313,21 +392,9 @@ class LLM:
                                     stream=True
                                 ):
                                     if chunk and chunk.choices and chunk.choices[0].delta.content:
-                                        content = chunk.choices[0].delta.content
-                                        response_text += content
-                                        live.update(display_generating(response_text, start_time))
-                        else:
-                            response_text = ""
-                            for chunk in litellm.completion(
-                                model=self.model,
-                                messages=messages,
-                                temperature=temperature,
-                                stream=True
-                            ):
-                                if chunk and chunk.choices and chunk.choices[0].delta.content:
-                                    response_text += chunk.choices[0].delta.content
+                                        response_text += chunk.choices[0].delta.content
-                        response_text = response_text.strip()
+                            response_text = response_text.strip()
                     # Handle output formatting
                     if output_json or output_pydantic:
@@ -357,32 +424,66 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                         {"role": "user", "content": reflection_prompt}
                     ]
-                    # Get reflection response with streaming
-                    if verbose:
-                        with Live(display_generating("", start_time), console=console, refresh_per_second=4) as live:
+                    # If show_reasoning is True, do a single non-streaming call to capture reasoning
+                    if show_reasoning:
+                        reflection_resp = litellm.completion(
+                            model=self.model,
+                            messages=reflection_messages,
+                            temperature=temperature,
+                            stream=False,  # Force non-streaming
+                            response_format={"type": "json_object"},
+                            **kwargs
+                        )
+                        # Grab reflection text and optional reasoning
+                        reasoning_content = reflection_resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
+                        reflection_text = reflection_resp["choices"][0]["message"]["content"]
+                        # Optionally display reasoning if present
+                        if verbose and reasoning_content:
+                            display_interaction(
+                                "Reflection reasoning:",
+                                f"{reasoning_content}\n\nReflection result:\n{reflection_text}",
+                                markdown=markdown,
+                                generation_time=time.time() - start_time,
+                                console=console
+                            )
+                        elif verbose:
+                            display_interaction(
+                                "Self-reflection (non-streaming):",
+                                reflection_text,
+                                markdown=markdown,
+                                generation_time=time.time() - start_time,
+                                console=console
+                            )
+                    else:
+                        # Existing streaming approach
+                        if verbose:
+                            with Live(display_generating("", start_time), console=console, refresh_per_second=4) as live:
+                                reflection_text = ""
+                                for chunk in litellm.completion(
+                                    model=self.model,
+                                    messages=reflection_messages,
+                                    temperature=temperature,
+                                    stream=True,
+                                    response_format={"type": "json_object"},
+                                    **kwargs
+                                ):
+                                    if chunk and chunk.choices and chunk.choices[0].delta.content:
+                                        content = chunk.choices[0].delta.content
+                                        reflection_text += content
+                                        live.update(display_generating(reflection_text, start_time))
+                        else:
                             reflection_text = ""
                             for chunk in litellm.completion(
                                 model=self.model,
                                 messages=reflection_messages,
                                 temperature=temperature,
                                 stream=True,
-                                response_format={"type": "json_object"}
+                                response_format={"type": "json_object"},
+                                **kwargs
                             ):
                                 if chunk and chunk.choices and chunk.choices[0].delta.content:
-                                    content = chunk.choices[0].delta.content
-                                    reflection_text += content
-                                    live.update(display_generating(reflection_text, start_time))
-                    else:
-                        reflection_text = ""
-                        for chunk in litellm.completion(
-                            model=self.model,
-                            messages=reflection_messages,
-                            temperature=temperature,
-                            stream=True,
-                            response_format={"type": "json_object"}
-                        ):
-                            if chunk and chunk.choices and chunk.choices[0].delta.content:
-                                reflection_text += chunk.choices[0].delta.content
+                                    reflection_text += chunk.choices[0].delta.content
                     try:
                         reflection_data = json.loads(reflection_text)
@@ -453,6 +554,7 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
         """Async version of get_response with identical functionality."""
         try:
             import litellm
+            show_reasoning = kwargs.get('show_reasoning', self.show_reasoning)
             litellm.set_verbose = False
             # Build messages list
@@ -490,10 +592,10 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
             # Format tools for LiteLLM
             formatted_tools = None
             if tools:
-                logging.info(f"Starting tool formatting for {len(tools)} tools")
+                logging.debug(f"Starting tool formatting for {len(tools)} tools")
                 formatted_tools = []
                 for tool in tools:
-                    logging.info(f"Processing tool: {tool.__name__ if hasattr(tool, '__name__') else str(tool)}")
+                    logging.debug(f"Processing tool: {tool.__name__ if hasattr(tool, '__name__') else str(tool)}")
                     if hasattr(tool, '__name__'):
                         tool_name = tool.__name__
                         tool_doc = tool.__doc__ or "No description available"
@@ -539,10 +641,9 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                             }
                         }
                         # Ensure tool definition is JSON serializable
-                        print(f"Generated tool definition: {tool_def}")
                         try:
                             json.dumps(tool_def)  # Test serialization
-                            logging.info(f"Generated tool definition: {tool_def}")
+                            logging.debug(f"Generated tool definition: {tool_def}")
                             formatted_tools.append(tool_def)
                         except TypeError as e:
                             logging.error(f"Tool definition not JSON serializable: {e}")
@@ -552,38 +653,67 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
             if formatted_tools:
                 try:
                     json.dumps(formatted_tools)  # Final serialization check
-                    logging.info(f"Final formatted tools: {json.dumps(formatted_tools, indent=2)}")
+                    logging.debug(f"Final formatted tools: {json.dumps(formatted_tools, indent=2)}")
                 except TypeError as e:
                     logging.error(f"Final tools list not JSON serializable: {e}")
                     formatted_tools = None
             response_text = ""
-            if verbose:
-                # ----------------------------------------------------
-                # 1) Make the streaming call WITHOUT tools
-                # ----------------------------------------------------
-                async for chunk in await litellm.acompletion(
+            if show_reasoning:
+                # Non-streaming call to capture reasoning
+                resp = await litellm.acompletion(
                     model=self.model,
                     messages=messages,
                     temperature=temperature,
-                    stream=True,
+                    stream=False,  # force non-streaming
                     **kwargs
-                ):
-                    if chunk and chunk.choices and chunk.choices[0].delta.content:
-                        response_text += chunk.choices[0].delta.content
-                        print("\033[K", end="\r")
-                        print(f"Generating... {time.time() - start_time:.1f}s", end="\r")
+                )
+                reasoning_content = resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
+                response_text = resp["choices"][0]["message"]["content"]
+                if verbose and reasoning_content:
+                    display_interaction(
+                        "Initial reasoning:",
+                        f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
+                        markdown=markdown,
+                        generation_time=time.time() - start_time,
+                        console=console
+                    )
+                elif verbose:
+                    display_interaction(
+                        "Initial response:",
+                        response_text,
+                        markdown=markdown,
+                        generation_time=time.time() - start_time,
+                        console=console
+                    )
             else:
-                # Non-verbose streaming call, still no tools
-                async for chunk in await litellm.acompletion(
-                    model=self.model,
-                    messages=messages,
-                    temperature=temperature,
-                    stream=True,
-                    **kwargs
-                ):
-                    if chunk and chunk.choices and chunk.choices[0].delta.content:
-                        response_text += chunk.choices[0].delta.content
+                if verbose:
+                    # ----------------------------------------------------
+                    # 1) Make the streaming call WITHOUT tools
+                    # ----------------------------------------------------
+                    async for chunk in await litellm.acompletion(
+                        model=self.model,
+                        messages=messages,
+                        temperature=temperature,
+                        stream=True,
+                        **kwargs
+                    ):
+                        if chunk and chunk.choices and chunk.choices[0].delta.content:
+                            response_text += chunk.choices[0].delta.content
+                            print("\033[K", end="\r")
+                            print(f"Generating... {time.time() - start_time:.1f}s", end="\r")
+                else:
+                    # Non-verbose streaming call, still no tools
+                    async for chunk in await litellm.acompletion(
+                        model=self.model,
+                        messages=messages,
+                        temperature=temperature,
+                        stream=True,
+                        **kwargs
+                    ):
+                        if chunk and chunk.choices and chunk.choices[0].delta.content:
+                            response_text += chunk.choices[0].delta.content
             response_text = response_text.strip()
@@ -634,35 +764,66 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                                 "content": "Function returned an empty output"
                             })
-                    # Get response after tool calls with streaming
+                    # Get response after tool calls
                     response_text = ""
-                    if verbose:
-                        async for chunk in await litellm.acompletion(
+                    if show_reasoning:
+                        # Non-streaming call to capture reasoning
+                        resp = await litellm.acompletion(
                             model=self.model,
                             messages=messages,
                             temperature=temperature,
-                            stream=True,
-                            tools=formatted_tools,
+                            stream=False,  # force non-streaming
+                            tools=formatted_tools,  # Include tools
                             **kwargs
-                        ):
-                            if chunk and chunk.choices and chunk.choices[0].delta.content:
-                                content = chunk.choices[0].delta.content
-                                response_text += content
-                                print("\033[K", end="\r")
-                                print(f"Reflecting... {time.time() - start_time:.1f}s", end="\r")
-            else:
-                response_text = ""
-                for chunk in litellm.completion(
-                    model=self.model,
-                    messages=messages,
-                    temperature=temperature,
-                    stream=True,
-                    **kwargs
-                ):
-                    if chunk and chunk.choices and chunk.choices[0].delta.content:
-                        response_text += chunk.choices[0].delta.content
+                        )
+                        reasoning_content = resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
+                        response_text = resp["choices"][0]["message"]["content"]
+                        if verbose and reasoning_content:
+                            display_interaction(
+                                "Tool response reasoning:",
+                                f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
+                                markdown=markdown,
+                                generation_time=time.time() - start_time,
+                                console=console
+                            )
+                        elif verbose:
+                            display_interaction(
+                                "Tool response:",
+                                response_text,
+                                markdown=markdown,
+                                generation_time=time.time() - start_time,
+                                console=console
+                            )
+                    else:
+                        # Get response after tool calls with streaming
+                        if verbose:
+                            async for chunk in await litellm.acompletion(
+                                model=self.model,
+                                messages=messages,
+                                temperature=temperature,
+                                stream=True,
+                                tools=formatted_tools,
+                                **kwargs
+                            ):
+                                if chunk and chunk.choices and chunk.choices[0].delta.content:
+                                    content = chunk.choices[0].delta.content
+                                    response_text += content
+                                    print("\033[K", end="\r")
+                                    print(f"Reflecting... {time.time() - start_time:.1f}s", end="\r")
+                        else:
+                            response_text = ""
+                            for chunk in litellm.completion(
+                                model=self.model,
+                                messages=messages,
+                                temperature=temperature,
+                                stream=True,
+                                **kwargs
+                            ):
+                                if chunk and chunk.choices and chunk.choices[0].delta.content:
+                                    response_text += chunk.choices[0].delta.content
-            response_text = response_text.strip()
+                    response_text = response_text.strip()
             # Handle output formatting
             if output_json or output_pydantic:
@@ -692,33 +853,66 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                 {"role": "user", "content": reflection_prompt}
             ]
-            # Get reflection response
-            reflection_text = ""
-            if verbose:
-                async for chunk in await litellm.acompletion(
+            # If show_reasoning is True, do a single non-streaming call to capture reasoning
+            if show_reasoning:
+                reflection_resp = litellm.completion(
                     model=self.model,
                     messages=reflection_messages,
                     temperature=temperature,
-                    stream=True,
+                    stream=False,  # Force non-streaming
                     response_format={"type": "json_object"},
                     **kwargs
-                ):
-                    if chunk and chunk.choices and chunk.choices[0].delta.content:
-                        content = chunk.choices[0].delta.content
-                        reflection_text += content
-                        print("\033[K", end="\r")
-                        print(f"Reflecting... {time.time() - start_time:.1f}s", end="\r")
+                )
+                # Grab reflection text and optional reasoning
+                reasoning_content = reflection_resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
+                reflection_text = reflection_resp["choices"][0]["message"]["content"]
+                # Optionally display reasoning if present
+                if verbose and reasoning_content:
+                    display_interaction(
+                        "Reflection reasoning:",
+                        f"{reasoning_content}\n\nReflection result:\n{reflection_text}",
+                        markdown=markdown,
+                        generation_time=time.time() - start_time,
+                        console=console
+                    )
+                elif verbose:
+                    display_interaction(
+                        "Self-reflection (non-streaming):",
+                        reflection_text,
+                        markdown=markdown,
+                        generation_time=time.time() - start_time,
+                        console=console
+                    )
             else:
-                async for chunk in await litellm.acompletion(
-                    model=self.model,
-                    messages=reflection_messages,
-                    temperature=temperature,
-                    stream=True,
-                    response_format={"type": "json_object"},
-                    **kwargs
-                ):
-                    if chunk and chunk.choices and chunk.choices[0].delta.content:
-                        reflection_text += chunk.choices[0].delta.content
+                # Existing streaming approach
+                if verbose:
+                    with Live(display_generating("", start_time), console=console, refresh_per_second=4) as live:
+                        reflection_text = ""
+                        for chunk in litellm.completion(
+                            model=self.model,
+                            messages=reflection_messages,
+                            temperature=temperature,
+                            stream=True,
+                            response_format={"type": "json_object"},
+                            **kwargs
+                        ):
+                            if chunk and chunk.choices and chunk.choices[0].delta.content:
+                                content = chunk.choices[0].delta.content
+                                reflection_text += content
+                                live.update(display_generating(reflection_text, start_time))
+                else:
+                    reflection_text = ""
+                    for chunk in litellm.completion(
+                        model=self.model,
+                        messages=reflection_messages,
+                        temperature=temperature,
+                        stream=True,
+                        response_format={"type": "json_object"},
+                        **kwargs
+                    ):
+                        if chunk and chunk.choices and chunk.choices[0].delta.content:
+                            reflection_text += chunk.choices[0].delta.content
             while True:  # Add loop for reflection handling
                 try:

{praisonaiagents-0.0.50.dist-info → praisonaiagents-0.0.52.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: praisonaiagents
-Version: 0.0.50
+Version: 0.0.52
 Summary: Praison AI agents for completing complex tasks with Self Reflection Agents
 Author: Mervin Praison
 Requires-Dist: pydantic

{praisonaiagents-0.0.50.dist-info → praisonaiagents-0.0.52.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
 praisonaiagents/__init__.py,sha256=JtPibbmeFv3meIb3vkKjckB0p7m-Vqt2RYPwOH8P41k,1228
 praisonaiagents/main.py,sha256=0kB9gn9meXtr4EIrdgA2lAioKIHCRJ61audsGDwuTm4,14428
 praisonaiagents/agent/__init__.py,sha256=sKO8wGEXvtCrvV1e834r1Okv0XAqAxqZCqz6hKLiTvA,79
-praisonaiagents/agent/agent.py,sha256=C1Ba9eGZXSeMmu8-Kcqtm3NjZN5vqhXTDzM_Lzs3pPA,47316
+praisonaiagents/agent/agent.py,sha256=9r9eN9sTI3A_3IZdA4GYpsKXE5Q4m8yQ_QXGyFirQok,53844
 praisonaiagents/agents/__init__.py,sha256=_1d6Pqyk9EoBSo7E68sKyd1jDRlN1vxvVIRpoMc0Jcw,168
 praisonaiagents/agents/agents.py,sha256=PRqBEUqRadVLBoDd-tgne5fVB87bR6P9qOgvDdjS-dY,37028
 praisonaiagents/agents/autoagents.py,sha256=bjC2O5oZmoJItJXIMPTWc2lsp_AJC9tMiTQOal2hwPA,13532
@@ -9,7 +9,7 @@ praisonaiagents/knowledge/__init__.py,sha256=xL1Eh-a3xsHyIcU4foOWF-JdWYIYBALJH9b
 praisonaiagents/knowledge/chunking.py,sha256=FzoNY0q8MkvG4gADqk4JcRhmH3lcEHbRdonDgitQa30,6624
 praisonaiagents/knowledge/knowledge.py,sha256=fQNREDiwdoisfIxJBLVkteXgq_8Gbypfc3UaZbxf5QY,13210
 praisonaiagents/llm/__init__.py,sha256=ttPQQJQq6Tah-0updoEXDZFKWtJAM93rBWRoIgxRWO8,689
-praisonaiagents/llm/llm.py,sha256=h1n8Iyo0WTCLwTPEWpiTjoK28LNk3pbAjvcTtJLjwzY,38077
+praisonaiagents/llm/llm.py,sha256=WEfqWEOb2Sa2V5MRVa2XbFCBcrE5WBMeEhSFq3HCZvM,49145
 praisonaiagents/memory/memory.py,sha256=I8dOTkrl1i-GgQbDcrFOsSruzJ7MiI6Ys37DK27wrUs,35537
 praisonaiagents/process/__init__.py,sha256=lkYbL7Hn5a0ldvJtkdH23vfIIZLIcanK-65C0MwaorY,52
 praisonaiagents/process/process.py,sha256=_1Nk37kOYakPaUWAJff86rP0ENyykXqMnhTp8E0efuE,30802
@@ -35,7 +35,7 @@ praisonaiagents/tools/wikipedia_tools.py,sha256=pGko-f33wqXgxJTv8db7TbizY5XnzBQR
 praisonaiagents/tools/xml_tools.py,sha256=iYTMBEk5l3L3ryQ1fkUnNVYK-Nnua2Kx2S0dxNMMs1A,17122
 praisonaiagents/tools/yaml_tools.py,sha256=uogAZrhXV9O7xvspAtcTfpKSQYL2nlOTvCQXN94-G9A,14215
 praisonaiagents/tools/yfinance_tools.py,sha256=s2PBj_1v7oQnOobo2fDbQBACEHl61ftG4beG6Z979ZE,8529
-praisonaiagents-0.0.50.dist-info/METADATA,sha256=DHCATTegMIepneKqX9LT-JUbkLjqqJ1Ch_yafuE1Cp8,830
-praisonaiagents-0.0.50.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-praisonaiagents-0.0.50.dist-info/top_level.txt,sha256=_HsRddrJ23iDx5TTqVUVvXG2HeHBL5voshncAMDGjtA,16
-praisonaiagents-0.0.50.dist-info/RECORD,,
+praisonaiagents-0.0.52.dist-info/METADATA,sha256=NIkU1FNQXms13nIStb4r2yYjItL1aA0XSL7sCy1HxMM,830
+praisonaiagents-0.0.52.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+praisonaiagents-0.0.52.dist-info/top_level.txt,sha256=_HsRddrJ23iDx5TTqVUVvXG2HeHBL5voshncAMDGjtA,16
+praisonaiagents-0.0.52.dist-info/RECORD,,

{praisonaiagents-0.0.50.dist-info → praisonaiagents-0.0.52.dist-info}/WHEEL RENAMED Viewed

File without changes

{praisonaiagents-0.0.50.dist-info → praisonaiagents-0.0.52.dist-info}/top_level.txt RENAMED Viewed

File without changes

praisonaiagents 0.0.50__py3-none-any.whl → 0.0.52__py3-none-any.whl

praisonaiagents 0.0.50py3-none-any.whl → 0.0.52py3-none-any.whl