PyPI - praisonaiagents - Versions diffs - 0.0.127__py3-none-any.whl → 0.0.129__py3-none-any.whl - Mend

praisonaiagents 0.0.127py3-none-any.whl → 0.0.129py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

praisonaiagents/agent/__init__.py +2 -1
praisonaiagents/agent/router_agent.py +334 -0
praisonaiagents/agents/agents.py +15 -17
praisonaiagents/agents/autoagents.py +1 -1
praisonaiagents/llm/__init__.py +11 -1
praisonaiagents/llm/llm.py +240 -274
praisonaiagents/llm/model_capabilities.py +20 -3
praisonaiagents/llm/model_router.py +348 -0
praisonaiagents/process/process.py +71 -61
praisonaiagents/task/task.py +17 -4
{praisonaiagents-0.0.127.dist-info → praisonaiagents-0.0.129.dist-info}/METADATA +1 -1
{praisonaiagents-0.0.127.dist-info → praisonaiagents-0.0.129.dist-info}/RECORD +14 -12
{praisonaiagents-0.0.127.dist-info → praisonaiagents-0.0.129.dist-info}/WHEEL +0 -0
{praisonaiagents-0.0.127.dist-info → praisonaiagents-0.0.129.dist-info}/top_level.txt +0 -0

praisonaiagents/llm/llm.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import logging
 import os
 import warnings
+import re
 from typing import Any, Dict, List, Optional, Union, Literal, Callable
 from pydantic import BaseModel
 import time
@@ -87,6 +88,10 @@ class LLM:
         "llama-3.2-90b-text-preview": 6144   # 8,192 actual
     }
+    # Ollama-specific prompt constants
+    OLLAMA_TOOL_USAGE_PROMPT = "Please analyze the request and use the available tools to help answer the question. Start by identifying what information you need."
+    OLLAMA_FINAL_ANSWER_PROMPT = "Based on the tool results above, please provide the final answer to the original question."
     def _log_llm_config(self, method_name: str, **config):
         """Centralized debug logging for LLM configuration and parameters.
@@ -277,15 +282,32 @@ class LLM:
         # Direct ollama/ prefix
         if self.model.startswith("ollama/"):
             return True
+        # Check base_url if provided
+        if self.base_url and "ollama" in self.base_url.lower():
+            return True
         # Check environment variables for Ollama base URL
         base_url = os.getenv("OPENAI_BASE_URL", "")
         api_base = os.getenv("OPENAI_API_BASE", "")
-        # Common Ollama endpoints
-        ollama_endpoints = ["localhost:11434", "127.0.0.1:11434", ":11434"]
+        # Common Ollama endpoints (including custom ports)
+        if any(url and ("ollama" in url.lower() or ":11434" in url)
+               for url in [base_url, api_base, self.base_url or ""]):
+            return True
-        return any(endpoint in base_url or endpoint in api_base for endpoint in ollama_endpoints)
+        return False
+    def _format_ollama_tool_result_message(self, function_name: str, tool_result: Any) -> Dict[str, str]:
+        """
+        Format tool result message for Ollama provider.
+        Simplified approach without hardcoded regex extraction.
+        """
+        tool_result_str = str(tool_result)
+        return {
+            "role": "user",
+            "content": f"The {function_name} function returned: {tool_result_str}"
+        }
     def _process_stream_delta(self, delta, response_text: str, tool_calls: List[Dict], formatted_tools: Optional[List] = None) -> tuple:
         """
@@ -422,13 +444,22 @@ class LLM:
         """
         messages = []
+        # Check if this is a Gemini model that supports native structured outputs
+        is_gemini_with_structured_output = False
+        if output_json or output_pydantic:
+            from .model_capabilities import supports_structured_outputs
+            is_gemini_with_structured_output = (
+                self._is_gemini_model() and
+                supports_structured_outputs(self.model)
+            )
         # Handle system prompt
         if system_prompt:
-            # Append JSON schema if needed
-            if output_json:
-                system_prompt += f"\nReturn ONLY a JSON object that matches this Pydantic model: {json.dumps(output_json.model_json_schema())}"
-            elif output_pydantic:
-                system_prompt += f"\nReturn ONLY a JSON object that matches this Pydantic model: {json.dumps(output_pydantic.model_json_schema())}"
+            # Only append JSON schema for non-Gemini models or Gemini models without structured output support
+            if (output_json or output_pydantic) and not is_gemini_with_structured_output:
+                schema_model = output_json or output_pydantic
+                if schema_model and hasattr(schema_model, 'model_json_schema'):
+                    system_prompt += f"\nReturn ONLY a JSON object that matches this Pydantic model: {json.dumps(schema_model.model_json_schema())}"
             # Skip system messages for legacy o1 models as they don't support them
             if not self._needs_system_message_skip():
@@ -440,7 +471,8 @@ class LLM:
         # Handle prompt modifications for JSON output
         original_prompt = prompt
-        if output_json or output_pydantic:
+        if (output_json or output_pydantic) and not is_gemini_with_structured_output:
+            # Only modify prompt for non-Gemini models
             if isinstance(prompt, str):
                 prompt = prompt + "\nReturn ONLY a valid JSON object. No other text or explanation."
             elif isinstance(prompt, list):
@@ -660,6 +692,7 @@ class LLM:
             start_time = time.time()
             reflection_count = 0
+            interaction_displayed = False  # Track if interaction has been displayed
             # Display initial instruction once
             if verbose:
@@ -695,6 +728,8 @@ class LLM:
                                 temperature=temperature,
                                 stream=False,  # force non-streaming
                                 tools=formatted_tools,
+                                output_json=output_json,
+                                output_pydantic=output_pydantic,
                                 **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
                             )
                         )
@@ -703,7 +738,7 @@ class LLM:
                         final_response = resp
                         # Optionally display reasoning if present
-                        if verbose and reasoning_content:
+                        if verbose and reasoning_content and not interaction_displayed:
                             display_interaction(
                                 original_prompt,
                                 f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
@@ -711,7 +746,8 @@ class LLM:
                                 generation_time=time.time() - current_time,
                                 console=console
                             )
-                        else:
+                            interaction_displayed = True
+                        elif verbose and not interaction_displayed:
                             display_interaction(
                                 original_prompt,
                                 response_text,
@@ -719,6 +755,7 @@ class LLM:
                                 generation_time=time.time() - current_time,
                                 console=console
                             )
+                            interaction_displayed = True
                     # Otherwise do the existing streaming approach
                     else:
@@ -741,6 +778,8 @@ class LLM:
                                             tools=formatted_tools,
                                             temperature=temperature,
                                             stream=True,
+                                            output_json=output_json,
+                                            output_pydantic=output_pydantic,
                                             **kwargs
                                         )
                                     ):
@@ -760,6 +799,8 @@ class LLM:
                                         tools=formatted_tools,
                                         temperature=temperature,
                                         stream=True,
+                                        output_json=output_json,
+                                        output_pydantic=output_pydantic,
                                         **kwargs
                                     )
                                 ):
@@ -772,7 +813,7 @@ class LLM:
                                         if formatted_tools and self._supports_streaming_tools():
                                             tool_calls = self._process_tool_calls_from_stream(delta, tool_calls)
-                            response_text = response_text.strip() if response_text else "" if response_text else "" if response_text else "" if response_text else ""
+                            response_text = response_text.strip() if response_text else ""
                             # Create a mock final_response with the captured data
                             final_response = {
@@ -791,12 +832,14 @@ class LLM:
                                     tools=formatted_tools,
                                     temperature=temperature,
                                     stream=False,
+                                    output_json=output_json,
+                                    output_pydantic=output_pydantic,
                                     **kwargs
                                 )
                             )
                             response_text = final_response["choices"][0]["message"]["content"]
-                            if verbose:
+                            if verbose and not interaction_displayed:
                                 # Display the complete response at once
                                 display_interaction(
                                     original_prompt,
@@ -805,18 +848,37 @@ class LLM:
                                     generation_time=time.time() - current_time,
                                     console=console
                                 )
+                                interaction_displayed = True
                     tool_calls = final_response["choices"][0]["message"].get("tool_calls")
+                    # For Ollama, if response is empty but we have tools, prompt for tool usage
+                    if self._is_ollama_provider() and (not response_text or response_text.strip() == "") and formatted_tools and iteration_count == 0:
+                        messages.append({
+                            "role": "user",
+                            "content": self.OLLAMA_TOOL_USAGE_PROMPT
+                        })
+                        iteration_count += 1
+                        continue
                     # Handle tool calls - Sequential tool calling logic
                     if tool_calls and execute_tool_fn:
                         # Convert tool_calls to a serializable format for all providers
                         serializable_tool_calls = self._serialize_tool_calls(tool_calls)
-                        messages.append({
-                            "role": "assistant",
-                            "content": response_text,
-                            "tool_calls": serializable_tool_calls
-                        })
+                        # Check if this is Ollama provider
+                        if self._is_ollama_provider():
+                            # For Ollama, only include role and content
+                            messages.append({
+                                "role": "assistant",
+                                "content": response_text
+                            })
+                        else:
+                            # For other providers, include tool_calls
+                            messages.append({
+                                "role": "assistant",
+                                "content": response_text,
+                                "tool_calls": serializable_tool_calls
+                            })
                         should_continue = False
                         tool_results = []  # Store all tool results
@@ -842,11 +904,17 @@ class LLM:
                                 logging.debug(f"[TOOL_EXEC_DEBUG] About to display tool call with message: {display_message}")
                                 display_tool_call(display_message, console=console)
-                            messages.append({
-                                "role": "tool",
-                                "tool_call_id": tool_call_id,
-                                "content": json.dumps(tool_result) if tool_result is not None else "Function returned an empty output"
-                            })
+                            # Check if this is Ollama provider
+                            if self._is_ollama_provider():
+                                # For Ollama, use user role and format as natural language
+                                messages.append(self._format_ollama_tool_result_message(function_name, tool_result))
+                            else:
+                                # For other providers, use tool role with tool_call_id
+                                messages.append({
+                                    "role": "tool",
+                                    "tool_call_id": tool_call_id,
+                                    "content": json.dumps(tool_result) if tool_result is not None else "Function returned an empty output"
+                                })
                             # Check if we should continue (for tools like sequential thinking)
                             # This mimics the logic from agent.py lines 1004-1007
@@ -858,100 +926,12 @@ class LLM:
                             iteration_count += 1
                             continue
-                        # Special handling for Ollama models that don't automatically process tool results
-                        ollama_handled = False
-                        ollama_params = self._handle_ollama_model(response_text, tool_results, messages, original_prompt)
-                        if ollama_params:
-                            # Get response based on streaming mode
-                            if stream:
-                                # Streaming approach
-                                if verbose:
-                                    with Live(display_generating("", start_time), console=console, refresh_per_second=4) as live:
-                                        response_text = ""
-                                        for chunk in litellm.completion(
-                                            **self._build_completion_params(
-                                                messages=ollama_params["follow_up_messages"],
-                                                temperature=temperature,
-                                                stream=True
-                                            )
-                                        ):
-                                            if chunk and chunk.choices and chunk.choices[0].delta.content:
-                                                content = chunk.choices[0].delta.content
-                                                response_text += content
-                                                live.update(display_generating(response_text, start_time))
-                                else:
-                                    response_text = ""
-                                    for chunk in litellm.completion(
-                                        **self._build_completion_params(
-                                            messages=ollama_params["follow_up_messages"],
-                                            temperature=temperature,
-                                            stream=True
-                                        )
-                                    ):
-                                        if chunk and chunk.choices and chunk.choices[0].delta.content:
-                                            response_text += chunk.choices[0].delta.content
-                            else:
-                                # Non-streaming approach
-                                resp = litellm.completion(
-                                    **self._build_completion_params(
-                                        messages=ollama_params["follow_up_messages"],
-                                        temperature=temperature,
-                                        stream=False
-                                    )
-                                )
-                                response_text = resp.get("choices", [{}])[0].get("message", {}).get("content", "") or ""
-                            # Set flag to indicate Ollama was handled
-                            ollama_handled = True
-                            final_response_text = response_text.strip() if response_text else ""
-                            logging.debug(f"[OLLAMA_DEBUG] Ollama follow-up response: {final_response_text[:200]}...")
-                            # Display the response if we got one
-                            if final_response_text and verbose:
-                                display_interaction(
-                                    ollama_params["original_prompt"],
-                                    final_response_text,
-                                    markdown=markdown,
-                                    generation_time=time.time() - start_time,
-                                    console=console
-                                )
-                            # Update messages and continue the loop instead of returning
-                            if final_response_text:
-                                # Update messages with the response to maintain conversation context
-                                messages.append({
-                                    "role": "assistant",
-                                    "content": final_response_text
-                                })
-                                # Continue the loop to check if more tools are needed
-                                iteration_count += 1
-                                continue
-                            else:
-                                logging.warning("[OLLAMA_DEBUG] Ollama follow-up returned empty response")
-                        # Handle reasoning_steps after tool execution if not already handled by Ollama
-                        if reasoning_steps and not ollama_handled:
-                            # Make a non-streaming call to capture reasoning content
-                            reasoning_resp = litellm.completion(
-                                **self._build_completion_params(
-                                    messages=messages,
-                                    temperature=temperature,
-                                    stream=False,  # force non-streaming
-                                    **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
-                                )
-                            )
-                            reasoning_content = reasoning_resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
-                            response_text = reasoning_resp["choices"][0]["message"]["content"]
-                            # Store reasoning content for later use
-                            if reasoning_content:
-                                stored_reasoning_content = reasoning_content
-                            # Update messages with the response
+                        # For Ollama, add explicit prompt if we need a final answer
+                        if self._is_ollama_provider() and iteration_count > 0:
+                            # Add an explicit prompt for Ollama to generate the final answer
                             messages.append({
-                                "role": "assistant",
-                                "content": response_text
+                                "role": "user",
+                                "content": self.OLLAMA_FINAL_ANSWER_PROMPT
                             })
                         # After tool execution, continue the loop to check if more tools are needed
@@ -974,7 +954,7 @@ class LLM:
                 return final_response_text
             # No tool calls were made in this iteration, return the response
-            if verbose:
+            if verbose and not interaction_displayed:
                 # If we have stored reasoning content from tool execution, display it
                 if stored_reasoning_content:
                     display_interaction(
@@ -992,6 +972,7 @@ class LLM:
                         generation_time=time.time() - start_time,
                         console=console
                     )
+                interaction_displayed = True
             response_text = response_text.strip() if response_text else ""
@@ -1003,15 +984,17 @@ class LLM:
             if output_json or output_pydantic:
                 self.chat_history.append({"role": "user", "content": original_prompt})
                 self.chat_history.append({"role": "assistant", "content": response_text})
-                if verbose:
+                if verbose and not interaction_displayed:
                     display_interaction(original_prompt, response_text, markdown=markdown,
                                      generation_time=time.time() - start_time, console=console)
+                    interaction_displayed = True
                 return response_text
             if not self_reflect:
-                if verbose:
+                if verbose and not interaction_displayed:
                     display_interaction(original_prompt, response_text, markdown=markdown,
                                      generation_time=time.time() - start_time, console=console)
+                    interaction_displayed = True
                 # Return reasoning content if reasoning_steps is True
                 if reasoning_steps and stored_reasoning_content:
                     return stored_reasoning_content
@@ -1040,6 +1023,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                             temperature=temperature,
                             stream=False,  # Force non-streaming
                             response_format={"type": "json_object"},
+                            output_json=output_json,
+                            output_pydantic=output_pydantic,
                             **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
                         )
                     )
@@ -1075,6 +1060,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                                     temperature=temperature,
                                     stream=stream,
                                     response_format={"type": "json_object"},
+                                    output_json=output_json,
+                                    output_pydantic=output_pydantic,
                                     **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
                                 )
                             ):
@@ -1090,6 +1077,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                                 temperature=temperature,
                                 stream=stream,
                                 response_format={"type": "json_object"},
+                                output_json=output_json,
+                                output_pydantic=output_pydantic,
                                 **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
                             )
                         ):
@@ -1107,15 +1096,17 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                         )
                     if satisfactory and reflection_count >= min_reflect - 1:
-                        if verbose:
+                        if verbose and not interaction_displayed:
                             display_interaction(prompt, response_text, markdown=markdown,
                                              generation_time=time.time() - start_time, console=console)
+                            interaction_displayed = True
                         return response_text
                     if reflection_count >= max_reflect - 1:
-                        if verbose:
+                        if verbose and not interaction_displayed:
                             display_interaction(prompt, response_text, markdown=markdown,
                                              generation_time=time.time() - start_time, console=console)
+                            interaction_displayed = True
                         return response_text
                     reflection_count += 1
@@ -1135,6 +1126,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                                     messages=messages,
                                     temperature=temperature,
                                     stream=True,
+                                    output_json=output_json,
+                                    output_pydantic=output_pydantic,
                                     **kwargs
                                 )
                             ):
@@ -1149,21 +1142,24 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                                 messages=messages,
                                 temperature=temperature,
                                 stream=True,
+                                output_json=output_json,
+                                output_pydantic=output_pydantic,
                                 **kwargs
                             )
                         ):
                             if chunk and chunk.choices and chunk.choices[0].delta.content:
                                 response_text += chunk.choices[0].delta.content
-                    response_text = response_text.strip() if response_text else "" if response_text else ""
+                    response_text = response_text.strip() if response_text else ""
                     continue
                 except json.JSONDecodeError:
                     reflection_count += 1
                     if reflection_count >= max_reflect:
-                        if verbose:
+                        if verbose and not interaction_displayed:
                             display_interaction(prompt, response_text, markdown=markdown,
                                              generation_time=time.time() - start_time, console=console)
+                            interaction_displayed = True
                         return response_text
                     continue
                 except Exception as e:
@@ -1171,9 +1167,10 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                     return None
             # If we've exhausted reflection attempts
-            if verbose:
+            if verbose and not interaction_displayed:
                 display_interaction(prompt, response_text, markdown=markdown,
                                  generation_time=time.time() - start_time, console=console)
+                interaction_displayed = True
             return response_text
         except Exception as error:
@@ -1185,6 +1182,12 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
             total_time = time.time() - start_time
             logging.debug(f"get_response completed in {total_time:.2f} seconds")
+    def _is_gemini_model(self) -> bool:
+        """Check if the model is a Gemini model."""
+        if not self.model:
+            return False
+        return any(prefix in self.model.lower() for prefix in ['gemini', 'gemini/', 'google/gemini'])
     async def get_response_async(
         self,
         prompt: Union[str, List[Dict]],
@@ -1273,6 +1276,7 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
             start_time = time.time()
             reflection_count = 0
+            interaction_displayed = False  # Track if interaction has been displayed
             # Format tools for LiteLLM using the shared helper
             formatted_tools = self._format_tools_for_litellm(tools)
@@ -1293,15 +1297,17 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                     resp = await litellm.acompletion(
                         **self._build_completion_params(
                             messages=messages,
-                        temperature=temperature,
-                        stream=False,  # force non-streaming
-                        **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
-                    )
+                            temperature=temperature,
+                            stream=False,  # force non-streaming
+                            output_json=output_json,
+                            output_pydantic=output_pydantic,
+                            **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
+                        )
                     )
                     reasoning_content = resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
                     response_text = resp["choices"][0]["message"]["content"]
-                    if verbose and reasoning_content:
+                    if verbose and reasoning_content and not interaction_displayed:
                         display_interaction(
                             "Initial reasoning:",
                             f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
@@ -1309,7 +1315,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                             generation_time=time.time() - start_time,
                             console=console
                         )
-                    elif verbose:
+                        interaction_displayed = True
+                    elif verbose and not interaction_displayed:
                         display_interaction(
                             "Initial response:",
                             response_text,
@@ -1317,6 +1324,7 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                             generation_time=time.time() - start_time,
                             console=console
                         )
+                        interaction_displayed = True
                 else:
                     # Determine if we should use streaming based on tool support
                     use_streaming = stream
@@ -1335,6 +1343,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                                     temperature=temperature,
                                     stream=True,
                                     tools=formatted_tools,
+                                    output_json=output_json,
+                                    output_pydantic=output_pydantic,
                                     **kwargs
                                 )
                             ):
@@ -1355,6 +1365,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                                     temperature=temperature,
                                     stream=True,
                                     tools=formatted_tools,
+                                    output_json=output_json,
+                                    output_pydantic=output_pydantic,
                                     **kwargs
                                 )
                             ):
@@ -1367,7 +1379,7 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                                     if formatted_tools and self._supports_streaming_tools():
                                         tool_calls = self._process_tool_calls_from_stream(delta, tool_calls)
-                        response_text = response_text.strip() if response_text else "" if response_text else "" if response_text else ""
+                        response_text = response_text.strip() if response_text else ""
                         # We already have tool_calls from streaming if supported
                         # No need for a second API call!
@@ -1379,13 +1391,15 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                                 temperature=temperature,
                                 stream=False,
                                 tools=formatted_tools,
+                                output_json=output_json,
+                                output_pydantic=output_pydantic,
                                 **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
                             )
                         )
                         response_text = tool_response.choices[0].message.get("content", "")
                         tool_calls = tool_response.choices[0].message.get("tool_calls", [])
-                        if verbose:
+                        if verbose and not interaction_displayed:
                             # Display the complete response at once
                             display_interaction(
                                 original_prompt,
@@ -1394,16 +1408,35 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                                 generation_time=time.time() - start_time,
                                 console=console
                             )
+                            interaction_displayed = True
+                # For Ollama, if response is empty but we have tools, prompt for tool usage
+                if self._is_ollama_provider() and (not response_text or response_text.strip() == "") and formatted_tools and iteration_count == 0:
+                    messages.append({
+                        "role": "user",
+                        "content": self.OLLAMA_TOOL_USAGE_PROMPT
+                    })
+                    iteration_count += 1
+                    continue
                 # Now handle tools if we have them (either from streaming or non-streaming)
                 if tools and execute_tool_fn and tool_calls:
                     # Convert tool_calls to a serializable format for all providers
                     serializable_tool_calls = self._serialize_tool_calls(tool_calls)
-                    messages.append({
-                        "role": "assistant",
-                        "content": response_text,
-                        "tool_calls": serializable_tool_calls
-                    })
+                    # Check if it's Ollama provider
+                    if self._is_ollama_provider():
+                        # For Ollama, only include role and content
+                        messages.append({
+                            "role": "assistant",
+                            "content": response_text
+                        })
+                    else:
+                        # For other providers, include tool_calls
+                        messages.append({
+                            "role": "assistant",
+                            "content": response_text,
+                            "tool_calls": serializable_tool_calls
+                        })
                     tool_results = []  # Store all tool results
                     for tool_call in tool_calls:
@@ -1421,77 +1454,31 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                             else:
                                 display_message += "Function returned no output"
                             display_tool_call(display_message, console=console)
+                        # Check if it's Ollama provider
+                        if self._is_ollama_provider():
+                            # For Ollama, use user role and format as natural language
+                            messages.append(self._format_ollama_tool_result_message(function_name, tool_result))
+                        else:
+                            # For other providers, use tool role with tool_call_id
+                            messages.append({
+                                "role": "tool",
+                                "tool_call_id": tool_call_id,
+                                "content": json.dumps(tool_result) if tool_result is not None else "Function returned an empty output"
+                            })
+                    # For Ollama, add explicit prompt if we need a final answer
+                    if self._is_ollama_provider() and iteration_count > 0:
+                        # Add an explicit prompt for Ollama to generate the final answer
                         messages.append({
-                            "role": "tool",
-                            "tool_call_id": tool_call_id,
-                            "content": json.dumps(tool_result) if tool_result is not None else "Function returned an empty output"
+                            "role": "user",
+                            "content": self.OLLAMA_FINAL_ANSWER_PROMPT
                         })
                     # Get response after tool calls
                     response_text = ""
-                    # Special handling for Ollama models that don't automatically process tool results
-                    ollama_handled = False
-                    ollama_params = self._handle_ollama_model(response_text, tool_results, messages, original_prompt)
-                    if ollama_params:
-                        # Get response with streaming
-                        if verbose:
-                            response_text = ""
-                            async for chunk in await litellm.acompletion(
-                                **self._build_completion_params(
-                                    messages=ollama_params["follow_up_messages"],
-                                    temperature=temperature,
-                                    stream=stream
-                                )
-                            ):
-                                if chunk and chunk.choices and chunk.choices[0].delta.content:
-                                    content = chunk.choices[0].delta.content
-                                    response_text += content
-                                    print("\033[K", end="\r")
-                                    print(f"Processing results... {time.time() - start_time:.1f}s", end="\r")
-                        else:
-                            response_text = ""
-                            async for chunk in await litellm.acompletion(
-                                **self._build_completion_params(
-                                    messages=ollama_params["follow_up_messages"],
-                                    temperature=temperature,
-                                    stream=stream
-                                )
-                            ):
-                                if chunk and chunk.choices and chunk.choices[0].delta.content:
-                                    response_text += chunk.choices[0].delta.content
-                        # Set flag to indicate Ollama was handled
-                        ollama_handled = True
-                        final_response_text = response_text.strip()
-                        logging.debug(f"[OLLAMA_DEBUG] Ollama follow-up response: {final_response_text[:200]}...")
-                        # Display the response if we got one
-                        if final_response_text and verbose:
-                            display_interaction(
-                                ollama_params["original_prompt"],
-                                final_response_text,
-                                markdown=markdown,
-                                generation_time=time.time() - start_time,
-                                console=console
-                            )
-                        # Store the response for potential final return
-                        if final_response_text:
-                            # Update messages with the response to maintain conversation context
-                            messages.append({
-                                "role": "assistant",
-                                "content": final_response_text
-                            })
-                            # Continue the loop to check if more tools are needed
-                            iteration_count += 1
-                            continue
-                        else:
-                            logging.warning("[OLLAMA_DEBUG] Ollama follow-up returned empty response")
-                    # If no special handling was needed or if it's not an Ollama model
-                    if reasoning_steps and not ollama_handled:
+                    # If no special handling was needed
+                    if reasoning_steps:
                         # Non-streaming call to capture reasoning
                         resp = await litellm.acompletion(
                             **self._build_completion_params(
@@ -1499,13 +1486,15 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                                 temperature=temperature,
                                 stream=False,  # force non-streaming
                                 tools=formatted_tools,  # Include tools
+                                output_json=output_json,
+                                output_pydantic=output_pydantic,
                                 **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
                             )
                         )
                         reasoning_content = resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
                         response_text = resp["choices"][0]["message"]["content"]
-                        if verbose and reasoning_content:
+                        if verbose and reasoning_content and not interaction_displayed:
                             display_interaction(
                                 "Tool response reasoning:",
                                 f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
@@ -1513,7 +1502,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                                 generation_time=time.time() - start_time,
                                 console=console
                             )
-                        elif verbose:
+                            interaction_displayed = True
+                        elif verbose and not interaction_displayed:
                             display_interaction(
                                 "Tool response:",
                                 response_text,
@@ -1521,7 +1511,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                                 generation_time=time.time() - start_time,
                                 console=console
                             )
-                    elif not ollama_handled:
+                            interaction_displayed = True
+                    else:
                         # Get response after tool calls with streaming if not already handled
                         if verbose:
                             async for chunk in await litellm.acompletion(
@@ -1530,6 +1521,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                                     temperature=temperature,
                                     stream=stream,
                                     tools=formatted_tools,
+                                    output_json=output_json,
+                                    output_pydantic=output_pydantic,
                                     **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
                                 )
                             ):
@@ -1545,13 +1538,15 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                                     messages=messages,
                                     temperature=temperature,
                                     stream=stream,
+                                    output_json=output_json,
+                                    output_pydantic=output_pydantic,
                                     **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
                                 )
                             ):
                                 if chunk and chunk.choices and chunk.choices[0].delta.content:
                                     response_text += chunk.choices[0].delta.content
-                    response_text = response_text.strip() if response_text else "" if response_text else ""
+                    response_text = response_text.strip() if response_text else ""
                     # After tool execution, update messages and continue the loop
                     if response_text:
@@ -1578,9 +1573,10 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
             if output_json or output_pydantic:
                 self.chat_history.append({"role": "user", "content": original_prompt})
                 self.chat_history.append({"role": "assistant", "content": response_text})
-                if verbose:
+                if verbose and not interaction_displayed:
                     display_interaction(original_prompt, response_text, markdown=markdown,
                                      generation_time=time.time() - start_time, console=console)
+                    interaction_displayed = True
                 return response_text
             if not self_reflect:
@@ -1588,7 +1584,7 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                 display_text = final_response_text if final_response_text else response_text
                 # Display with stored reasoning content if available
-                if verbose:
+                if verbose and not interaction_displayed:
                     if stored_reasoning_content:
                         display_interaction(
                             original_prompt,
@@ -1600,6 +1596,7 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                     else:
                         display_interaction(original_prompt, display_text, markdown=markdown,
                                          generation_time=time.time() - start_time, console=console)
+                    interaction_displayed = True
                 # Return reasoning content if reasoning_steps is True and we have it
                 if reasoning_steps and stored_reasoning_content:
@@ -1627,6 +1624,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                         temperature=temperature,
                         stream=False,  # Force non-streaming
                         response_format={"type": "json_object"},
+                        output_json=output_json,
+                        output_pydantic=output_pydantic,
                         **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
                     )
                 )
@@ -1662,6 +1661,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                                 temperature=temperature,
                                 stream=stream,
                                 response_format={"type": "json_object"},
+                                output_json=output_json,
+                                output_pydantic=output_pydantic,
                                 **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
                             )
                         ):
@@ -1677,6 +1678,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                             temperature=temperature,
                             stream=stream,
                             response_format={"type": "json_object"},
+                            output_json=output_json,
+                            output_pydantic=output_pydantic,
                             **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
                         )
                     ):
@@ -1695,15 +1698,17 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                         )
                     if satisfactory and reflection_count >= min_reflect - 1:
-                        if verbose:
+                        if verbose and not interaction_displayed:
                             display_interaction(prompt, response_text, markdown=markdown,
                                              generation_time=time.time() - start_time, console=console)
+                            interaction_displayed = True
                         return response_text
                     if reflection_count >= max_reflect - 1:
-                        if verbose:
+                        if verbose and not interaction_displayed:
                             display_interaction(prompt, response_text, markdown=markdown,
                                              generation_time=time.time() - start_time, console=console)
+                            interaction_displayed = True
                         return response_text
                     reflection_count += 1
@@ -1790,67 +1795,6 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
         litellm.callbacks = events
-    def _handle_ollama_model(self, response_text: str, tool_results: List[Any], messages: List[Dict], original_prompt: Union[str, List[Dict]]) -> Optional[Dict[str, Any]]:
-        """
-        Handle special Ollama model requirements when processing tool results.
-        Args:
-            response_text: The initial response text from the model
-            tool_results: List of tool execution results
-            messages: The conversation messages list
-            original_prompt: The original user prompt
-        Returns:
-            Dict with follow-up parameters if Ollama needs special handling, None otherwise
-        """
-        if not self._is_ollama_provider() or not tool_results:
-            return None
-        # Check if the response is just a JSON tool call
-        try:
-            json_response = json.loads(response_text.strip() if response_text else "{}")
-            if not (('name' in json_response or 'function' in json_response) and
-                    not any(word in response_text.lower() for word in ['summary', 'option', 'result', 'found'])):
-                return None
-            logging.debug("Detected Ollama returning only tool call JSON, preparing follow-up call to process results")
-            # Extract the original user query from messages
-            original_query = ""
-            for msg in reversed(messages):  # Look from the end to find the most recent user message
-                if msg.get("role") == "user":
-                    content = msg.get("content", "")
-                    # Handle list content (multimodal)
-                    if isinstance(content, list):
-                        for item in content:
-                            if isinstance(item, dict) and item.get("type") == "text":
-                                original_query = item.get("text", "")
-                                break
-                    else:
-                        original_query = content
-                    if original_query:
-                        break
-            # Create a shorter follow-up prompt with all tool results
-            # If there's only one result, use it directly; otherwise combine them
-            if len(tool_results) == 1:
-                results_text = json.dumps(tool_results[0], indent=2)
-            else:
-                results_text = json.dumps(tool_results, indent=2)
-            follow_up_prompt = f"Results:\n{results_text}\nProvide Answer to this Original Question based on the above results: '{original_query}'"
-            logging.debug(f"[OLLAMA_DEBUG] Original query extracted: {original_query}")
-            logging.debug(f"[OLLAMA_DEBUG] Follow-up prompt: {follow_up_prompt[:200]}...")
-            # Return parameters for follow-up call
-            return {
-                "follow_up_messages": [{"role": "user", "content": follow_up_prompt}],
-                "original_prompt": original_prompt
-            }
-        except (json.JSONDecodeError, KeyError):
-            # Not a JSON response or not a tool call format
-            return None
     def _build_completion_params(self, **override_params) -> Dict[str, Any]:
         """Build parameters for litellm completion calls with all necessary config"""
@@ -1895,11 +1839,33 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
         # Override with any provided parameters
         params.update(override_params)
+        # Handle structured output parameters
+        output_json = override_params.get('output_json')
+        output_pydantic = override_params.get('output_pydantic')
+        if output_json or output_pydantic:
+            # Always remove these from params as they're not native litellm parameters
+            params.pop('output_json', None)
+            params.pop('output_pydantic', None)
+            # Check if this is a Gemini model that supports native structured outputs
+            if self._is_gemini_model():
+                from .model_capabilities import supports_structured_outputs
+                schema_model = output_json or output_pydantic
+                if schema_model and hasattr(schema_model, 'model_json_schema') and supports_structured_outputs(self.model):
+                    schema = schema_model.model_json_schema()
+                    # Gemini uses response_mime_type and response_schema
+                    params['response_mime_type'] = 'application/json'
+                    params['response_schema'] = schema
+                    logging.debug(f"Using Gemini native structured output with schema: {json.dumps(schema, indent=2)}")
         # Add tool_choice="auto" when tools are provided (unless already specified)
         if 'tools' in params and params['tools'] and 'tool_choice' not in params:
             # For Gemini models, use tool_choice to encourage tool usage
-            # More comprehensive Gemini model detection
-            if any(prefix in self.model.lower() for prefix in ['gemini', 'gemini/', 'google/gemini']):
+            if self._is_gemini_model():
                 try:
                     import litellm
                     # Check if model supports function calling before setting tool_choice

praisonaiagents 0.0.127__py3-none-any.whl → 0.0.129__py3-none-any.whl

praisonaiagents 0.0.127py3-none-any.whl → 0.0.129py3-none-any.whl