praisonaiagents 0.0.124__tar.gz → 0.0.126__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/PKG-INFO +2 -1
  2. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/__init__.py +24 -0
  3. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/agent/agent.py +2 -1
  4. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/knowledge/knowledge.py +0 -3
  5. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/llm/__init__.py +6 -9
  6. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/llm/llm.py +232 -196
  7. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/main.py +1 -18
  8. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/mcp/mcp.py +46 -8
  9. praisonaiagents-0.0.126/praisonaiagents/mcp/mcp_http_stream.py +466 -0
  10. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/mcp/mcp_sse.py +19 -2
  11. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/process/process.py +88 -3
  12. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/task/task.py +1 -0
  13. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents.egg-info/PKG-INFO +2 -1
  14. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents.egg-info/SOURCES.txt +5 -1
  15. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents.egg-info/requires.txt +1 -0
  16. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/pyproject.toml +3 -2
  17. praisonaiagents-0.0.126/tests/test_fix_comprehensive.py +75 -0
  18. praisonaiagents-0.0.126/tests/test_http_stream_basic.py +58 -0
  19. praisonaiagents-0.0.126/tests/test_validation_feedback.py +252 -0
  20. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/README.md +0 -0
  21. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/agent/__init__.py +0 -0
  22. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/agent/handoff.py +0 -0
  23. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/agent/image_agent.py +0 -0
  24. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/agents/__init__.py +0 -0
  25. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/agents/agents.py +0 -0
  26. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/agents/autoagents.py +0 -0
  27. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/approval.py +0 -0
  28. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/guardrails/__init__.py +0 -0
  29. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/guardrails/guardrail_result.py +0 -0
  30. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/guardrails/llm_guardrail.py +0 -0
  31. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/knowledge/__init__.py +0 -0
  32. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/knowledge/chunking.py +0 -0
  33. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/llm/model_capabilities.py +0 -0
  34. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/llm/openai_client.py +0 -0
  35. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/mcp/__init__.py +0 -0
  36. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/memory/__init__.py +0 -0
  37. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/memory/memory.py +0 -0
  38. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/process/__init__.py +0 -0
  39. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/session.py +0 -0
  40. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/task/__init__.py +0 -0
  41. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/telemetry/__init__.py +0 -0
  42. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/telemetry/integration.py +0 -0
  43. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/telemetry/telemetry.py +0 -0
  44. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/README.md +0 -0
  45. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/__init__.py +0 -0
  46. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/arxiv_tools.py +0 -0
  47. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/calculator_tools.py +0 -0
  48. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/csv_tools.py +0 -0
  49. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/duckdb_tools.py +0 -0
  50. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/duckduckgo_tools.py +0 -0
  51. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/excel_tools.py +0 -0
  52. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/file_tools.py +0 -0
  53. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/json_tools.py +0 -0
  54. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/newspaper_tools.py +0 -0
  55. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/pandas_tools.py +0 -0
  56. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/python_tools.py +0 -0
  57. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/searxng_tools.py +0 -0
  58. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/shell_tools.py +0 -0
  59. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/spider_tools.py +0 -0
  60. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/test.py +0 -0
  61. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/tools.py +0 -0
  62. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/train/data/generatecot.py +0 -0
  63. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/wikipedia_tools.py +0 -0
  64. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/xml_tools.py +0 -0
  65. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/yaml_tools.py +0 -0
  66. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/yfinance_tools.py +0 -0
  67. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents.egg-info/dependency_links.txt +0 -0
  68. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents.egg-info/top_level.txt +0 -0
  69. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/setup.cfg +0 -0
  70. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/tests/test-graph-memory.py +0 -0
  71. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/tests/test.py +0 -0
  72. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/tests/test_handoff_compatibility.py +0 -0
  73. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/tests/test_ollama_async_fix.py +0 -0
  74. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/tests/test_ollama_fix.py +0 -0
  75. {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/tests/test_posthog_fixed.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: praisonaiagents
3
- Version: 0.0.124
3
+ Version: 0.0.126
4
4
  Summary: Praison AI agents for completing complex tasks with Self Reflection Agents
5
5
  Author: Mervin Praison
6
6
  Requires-Python: >=3.10
@@ -9,6 +9,7 @@ Requires-Dist: rich
9
9
  Requires-Dist: openai
10
10
  Requires-Dist: mcp>=1.6.0
11
11
  Requires-Dist: posthog>=3.0.0
12
+ Requires-Dist: aiohttp>=3.8.0
12
13
  Provides-Extra: mcp
13
14
  Requires-Dist: mcp>=1.6.0; extra == "mcp"
14
15
  Requires-Dist: fastapi>=0.115.0; extra == "mcp"
@@ -2,6 +2,30 @@
2
2
  Praison AI Agents - A package for hierarchical AI agent task execution
3
3
  """
4
4
 
5
+ # Configure logging before any other imports
6
+ import os
7
+ import logging
8
+ from rich.logging import RichHandler
9
+
10
+ # Get log level from environment variable
11
+ LOGLEVEL = os.environ.get('LOGLEVEL', 'INFO').upper()
12
+
13
+ # Configure root logger
14
+ logging.basicConfig(
15
+ level=getattr(logging, LOGLEVEL, logging.INFO),
16
+ format="%(asctime)s %(filename)s:%(lineno)d %(levelname)s %(message)s",
17
+ datefmt="[%X]",
18
+ handlers=[RichHandler(rich_tracebacks=True)]
19
+ )
20
+
21
+ # Suppress specific noisy loggers
22
+ logging.getLogger("litellm").setLevel(logging.WARNING)
23
+ logging.getLogger("litellm.utils").setLevel(logging.WARNING)
24
+ logging.getLogger("markdown_it").setLevel(logging.WARNING)
25
+ logging.getLogger("rich.markdown").setLevel(logging.WARNING)
26
+ logging.getLogger("httpx").setLevel(logging.WARNING)
27
+ logging.getLogger("httpcore").setLevel(logging.WARNING)
28
+
5
29
  from .agent.agent import Agent
6
30
  from .agent.image_agent import ImageAgent
7
31
  from .agents.agents import PraisonAIAgents
@@ -1235,7 +1235,8 @@ Your Goal: {self.goal}"""
1235
1235
  agent_role=self.role,
1236
1236
  agent_tools=[t.__name__ if hasattr(t, '__name__') else str(t) for t in (tools if tools is not None else self.tools)],
1237
1237
  execute_tool_fn=self.execute_tool, # Pass tool execution function
1238
- reasoning_steps=reasoning_steps
1238
+ reasoning_steps=reasoning_steps,
1239
+ stream=stream # Pass the stream parameter from chat method
1239
1240
  )
1240
1241
 
1241
1242
  self.chat_history.append({"role": "assistant", "content": response_text})
@@ -67,9 +67,6 @@ class Knowledge:
67
67
 
68
68
  # Disable OpenAI API request logging
69
69
  logging.getLogger('openai').setLevel(logging.WARNING)
70
-
71
- # Set root logger to warning to catch any uncategorized logs
72
- logging.getLogger().setLevel(logging.WARNING)
73
70
 
74
71
  @cached_property
75
72
  def _deps(self):
@@ -5,19 +5,16 @@ import os
5
5
  # Disable litellm telemetry before any imports
6
6
  os.environ["LITELLM_TELEMETRY"] = "False"
7
7
 
8
- # Suppress all relevant logs at module level
9
- logging.getLogger("litellm").setLevel(logging.ERROR)
10
- logging.getLogger("openai").setLevel(logging.ERROR)
11
- logging.getLogger("httpx").setLevel(logging.ERROR)
12
- logging.getLogger("httpcore").setLevel(logging.ERROR)
13
- logging.getLogger("pydantic").setLevel(logging.ERROR)
8
+ # Suppress all relevant logs at module level - consistent with main __init__.py
9
+ logging.getLogger("litellm").setLevel(logging.WARNING)
10
+ logging.getLogger("openai").setLevel(logging.WARNING)
11
+ logging.getLogger("httpx").setLevel(logging.WARNING)
12
+ logging.getLogger("httpcore").setLevel(logging.WARNING)
13
+ logging.getLogger("pydantic").setLevel(logging.WARNING)
14
14
 
15
15
  # Suppress pydantic warnings
16
16
  warnings.filterwarnings("ignore", category=UserWarning, module="pydantic")
17
17
 
18
- # Configure logging to suppress all INFO messages
19
- logging.basicConfig(level=logging.WARNING)
20
-
21
18
  # Import after suppressing warnings
22
19
  from .llm import LLM, LLMContextLengthExceededException
23
20
  from .openai_client import (
@@ -680,6 +680,7 @@ class LLM:
680
680
  max_iterations = 10 # Prevent infinite loops
681
681
  iteration_count = 0
682
682
  final_response_text = ""
683
+ stored_reasoning_content = None # Store reasoning content from tool execution
683
684
 
684
685
  while iteration_count < max_iterations:
685
686
  try:
@@ -857,39 +858,49 @@ class LLM:
857
858
  iteration_count += 1
858
859
  continue
859
860
 
860
- # If we reach here, no more tool calls needed - get final response
861
- # Make one more call to get the final summary response
862
861
  # Special handling for Ollama models that don't automatically process tool results
863
862
  ollama_handled = False
864
863
  ollama_params = self._handle_ollama_model(response_text, tool_results, messages, original_prompt)
865
864
 
866
865
  if ollama_params:
867
- # Get response with streaming
868
- if verbose:
869
- with Live(display_generating("", start_time), console=console, refresh_per_second=4) as live:
866
+ # Get response based on streaming mode
867
+ if stream:
868
+ # Streaming approach
869
+ if verbose:
870
+ with Live(display_generating("", start_time), console=console, refresh_per_second=4) as live:
871
+ response_text = ""
872
+ for chunk in litellm.completion(
873
+ **self._build_completion_params(
874
+ messages=ollama_params["follow_up_messages"],
875
+ temperature=temperature,
876
+ stream=True
877
+ )
878
+ ):
879
+ if chunk and chunk.choices and chunk.choices[0].delta.content:
880
+ content = chunk.choices[0].delta.content
881
+ response_text += content
882
+ live.update(display_generating(response_text, start_time))
883
+ else:
870
884
  response_text = ""
871
885
  for chunk in litellm.completion(
872
886
  **self._build_completion_params(
873
887
  messages=ollama_params["follow_up_messages"],
874
888
  temperature=temperature,
875
- stream=stream
889
+ stream=True
876
890
  )
877
891
  ):
878
892
  if chunk and chunk.choices and chunk.choices[0].delta.content:
879
- content = chunk.choices[0].delta.content
880
- response_text += content
881
- live.update(display_generating(response_text, start_time))
893
+ response_text += chunk.choices[0].delta.content
882
894
  else:
883
- response_text = ""
884
- for chunk in litellm.completion(
895
+ # Non-streaming approach
896
+ resp = litellm.completion(
885
897
  **self._build_completion_params(
886
898
  messages=ollama_params["follow_up_messages"],
887
899
  temperature=temperature,
888
- stream=stream
900
+ stream=False
889
901
  )
890
- ):
891
- if chunk and chunk.choices and chunk.choices[0].delta.content:
892
- response_text += chunk.choices[0].delta.content
902
+ )
903
+ response_text = resp.get("choices", [{}])[0].get("message", {}).get("content", "") or ""
893
904
 
894
905
  # Set flag to indicate Ollama was handled
895
906
  ollama_handled = True
@@ -906,15 +917,23 @@ class LLM:
906
917
  console=console
907
918
  )
908
919
 
909
- # Return the final response after processing Ollama's follow-up
920
+ # Update messages and continue the loop instead of returning
910
921
  if final_response_text:
911
- return final_response_text
922
+ # Update messages with the response to maintain conversation context
923
+ messages.append({
924
+ "role": "assistant",
925
+ "content": final_response_text
926
+ })
927
+ # Continue the loop to check if more tools are needed
928
+ iteration_count += 1
929
+ continue
912
930
  else:
913
931
  logging.warning("[OLLAMA_DEBUG] Ollama follow-up returned empty response")
914
932
 
915
- # If reasoning_steps is True and we haven't handled Ollama already, do a single non-streaming call
933
+ # Handle reasoning_steps after tool execution if not already handled by Ollama
916
934
  if reasoning_steps and not ollama_handled:
917
- resp = litellm.completion(
935
+ # Make a non-streaming call to capture reasoning content
936
+ reasoning_resp = litellm.completion(
918
937
  **self._build_completion_params(
919
938
  messages=messages,
920
939
  temperature=temperature,
@@ -922,75 +941,28 @@ class LLM:
922
941
  **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
923
942
  )
924
943
  )
925
- reasoning_content = resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
926
- response_text = resp["choices"][0]["message"]["content"]
944
+ reasoning_content = reasoning_resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
945
+ response_text = reasoning_resp["choices"][0]["message"]["content"]
927
946
 
928
- # Optionally display reasoning if present
929
- if verbose and reasoning_content:
930
- display_interaction(
931
- original_prompt,
932
- f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
933
- markdown=markdown,
934
- generation_time=time.time() - start_time,
935
- console=console
936
- )
937
- else:
938
- display_interaction(
939
- original_prompt,
940
- response_text,
941
- markdown=markdown,
942
- generation_time=time.time() - start_time,
943
- console=console
944
- )
945
-
946
- # Otherwise do the existing streaming approach if not already handled
947
- elif not ollama_handled:
948
- # Get response after tool calls with streaming
949
- if verbose:
950
- with Live(display_generating("", current_time), console=console, refresh_per_second=4) as live:
951
- final_response_text = ""
952
- for chunk in litellm.completion(
953
- **self._build_completion_params(
954
- messages=messages,
955
- tools=formatted_tools,
956
- temperature=temperature,
957
- stream=True,
958
- **kwargs
959
- )
960
- ):
961
- if chunk and chunk.choices and chunk.choices[0].delta.content:
962
- content = chunk.choices[0].delta.content
963
- final_response_text += content
964
- live.update(display_generating(final_response_text, current_time))
965
- else:
966
- final_response_text = ""
967
- for chunk in litellm.completion(
968
- **self._build_completion_params(
969
- messages=messages,
970
- tools=formatted_tools,
971
- temperature=temperature,
972
- stream=stream,
973
- **kwargs
974
- )
975
- ):
976
- if chunk and chunk.choices and chunk.choices[0].delta.content:
977
- final_response_text += chunk.choices[0].delta.content
947
+ # Store reasoning content for later use
948
+ if reasoning_content:
949
+ stored_reasoning_content = reasoning_content
978
950
 
979
- final_response_text = final_response_text.strip()
980
-
981
- # Display final response
982
- if verbose:
983
- display_interaction(
984
- original_prompt,
985
- final_response_text,
986
- markdown=markdown,
987
- generation_time=time.time() - start_time,
988
- console=console
989
- )
951
+ # Update messages with the response
952
+ messages.append({
953
+ "role": "assistant",
954
+ "content": response_text
955
+ })
990
956
 
991
- return final_response_text
957
+ # After tool execution, continue the loop to check if more tools are needed
958
+ # instead of immediately trying to get a final response
959
+ iteration_count += 1
960
+ continue
992
961
  else:
993
962
  # No tool calls, we're done with this iteration
963
+ # If we've executed tools in previous iterations, this response contains the final answer
964
+ if iteration_count > 0:
965
+ final_response_text = response_text.strip()
994
966
  break
995
967
 
996
968
  except Exception as e:
@@ -1003,16 +975,30 @@ class LLM:
1003
975
 
1004
976
  # No tool calls were made in this iteration, return the response
1005
977
  if verbose:
1006
- display_interaction(
1007
- original_prompt,
1008
- response_text,
1009
- markdown=markdown,
1010
- generation_time=time.time() - start_time,
1011
- console=console
1012
- )
978
+ # If we have stored reasoning content from tool execution, display it
979
+ if stored_reasoning_content:
980
+ display_interaction(
981
+ original_prompt,
982
+ f"Reasoning:\n{stored_reasoning_content}\n\nAnswer:\n{response_text}",
983
+ markdown=markdown,
984
+ generation_time=time.time() - start_time,
985
+ console=console
986
+ )
987
+ else:
988
+ display_interaction(
989
+ original_prompt,
990
+ response_text,
991
+ markdown=markdown,
992
+ generation_time=time.time() - start_time,
993
+ console=console
994
+ )
1013
995
 
1014
996
  response_text = response_text.strip()
1015
997
 
998
+ # Return reasoning content if reasoning_steps is True and we have it
999
+ if reasoning_steps and stored_reasoning_content:
1000
+ return stored_reasoning_content
1001
+
1016
1002
  # Handle output formatting
1017
1003
  if output_json or output_pydantic:
1018
1004
  self.chat_history.append({"role": "user", "content": original_prompt})
@@ -1027,8 +1013,8 @@ class LLM:
1027
1013
  display_interaction(original_prompt, response_text, markdown=markdown,
1028
1014
  generation_time=time.time() - start_time, console=console)
1029
1015
  # Return reasoning content if reasoning_steps is True
1030
- if reasoning_steps and reasoning_content:
1031
- return reasoning_content
1016
+ if reasoning_steps and stored_reasoning_content:
1017
+ return stored_reasoning_content
1032
1018
  return response_text
1033
1019
 
1034
1020
  # Handle self-reflection loop
@@ -1291,118 +1277,126 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1291
1277
  # Format tools for LiteLLM using the shared helper
1292
1278
  formatted_tools = self._format_tools_for_litellm(tools)
1293
1279
 
1294
- response_text = ""
1295
- if reasoning_steps:
1296
- # Non-streaming call to capture reasoning
1297
- resp = await litellm.acompletion(
1298
- **self._build_completion_params(
1299
- messages=messages,
1280
+ # Initialize variables for iteration loop
1281
+ max_iterations = 10 # Prevent infinite loops
1282
+ iteration_count = 0
1283
+ final_response_text = ""
1284
+ stored_reasoning_content = None # Store reasoning content from tool execution
1285
+
1286
+ while iteration_count < max_iterations:
1287
+ response_text = ""
1288
+ reasoning_content = None
1289
+ tool_calls = []
1290
+
1291
+ if reasoning_steps and iteration_count == 0:
1292
+ # Non-streaming call to capture reasoning
1293
+ resp = await litellm.acompletion(
1294
+ **self._build_completion_params(
1295
+ messages=messages,
1300
1296
  temperature=temperature,
1301
1297
  stream=False, # force non-streaming
1302
1298
  **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
1303
1299
  )
1304
- )
1305
- reasoning_content = resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
1306
- response_text = resp["choices"][0]["message"]["content"]
1307
-
1308
- if verbose and reasoning_content:
1309
- display_interaction(
1310
- "Initial reasoning:",
1311
- f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
1312
- markdown=markdown,
1313
- generation_time=time.time() - start_time,
1314
- console=console
1315
- )
1316
- elif verbose:
1317
- display_interaction(
1318
- "Initial response:",
1319
- response_text,
1320
- markdown=markdown,
1321
- generation_time=time.time() - start_time,
1322
- console=console
1323
1300
  )
1324
- else:
1325
- # Determine if we should use streaming based on tool support
1326
- use_streaming = stream
1327
- if formatted_tools and not self._supports_streaming_tools():
1328
- # Provider doesn't support streaming with tools, use non-streaming
1329
- use_streaming = False
1330
-
1331
- if use_streaming:
1332
- # Streaming approach (with or without tools)
1333
- tool_calls = []
1301
+ reasoning_content = resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
1302
+ response_text = resp["choices"][0]["message"]["content"]
1334
1303
 
1335
- if verbose:
1336
- async for chunk in await litellm.acompletion(
1337
- **self._build_completion_params(
1338
- messages=messages,
1339
- temperature=temperature,
1340
- stream=True,
1341
- tools=formatted_tools,
1342
- **kwargs
1343
- )
1344
- ):
1345
- if chunk and chunk.choices and chunk.choices[0].delta:
1346
- delta = chunk.choices[0].delta
1347
- response_text, tool_calls = self._process_stream_delta(
1348
- delta, response_text, tool_calls, formatted_tools
1304
+ if verbose and reasoning_content:
1305
+ display_interaction(
1306
+ "Initial reasoning:",
1307
+ f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
1308
+ markdown=markdown,
1309
+ generation_time=time.time() - start_time,
1310
+ console=console
1311
+ )
1312
+ elif verbose:
1313
+ display_interaction(
1314
+ "Initial response:",
1315
+ response_text,
1316
+ markdown=markdown,
1317
+ generation_time=time.time() - start_time,
1318
+ console=console
1319
+ )
1320
+ else:
1321
+ # Determine if we should use streaming based on tool support
1322
+ use_streaming = stream
1323
+ if formatted_tools and not self._supports_streaming_tools():
1324
+ # Provider doesn't support streaming with tools, use non-streaming
1325
+ use_streaming = False
1326
+
1327
+ if use_streaming:
1328
+ # Streaming approach (with or without tools)
1329
+ tool_calls = []
1330
+
1331
+ if verbose:
1332
+ async for chunk in await litellm.acompletion(
1333
+ **self._build_completion_params(
1334
+ messages=messages,
1335
+ temperature=temperature,
1336
+ stream=True,
1337
+ tools=formatted_tools,
1338
+ **kwargs
1349
1339
  )
1350
- if delta.content:
1351
- print("\033[K", end="\r")
1352
- print(f"Generating... {time.time() - start_time:.1f}s", end="\r")
1340
+ ):
1341
+ if chunk and chunk.choices and chunk.choices[0].delta:
1342
+ delta = chunk.choices[0].delta
1343
+ response_text, tool_calls = self._process_stream_delta(
1344
+ delta, response_text, tool_calls, formatted_tools
1345
+ )
1346
+ if delta.content:
1347
+ print("\033[K", end="\r")
1348
+ print(f"Generating... {time.time() - start_time:.1f}s", end="\r")
1353
1349
 
1350
+ else:
1351
+ # Non-verbose streaming
1352
+ async for chunk in await litellm.acompletion(
1353
+ **self._build_completion_params(
1354
+ messages=messages,
1355
+ temperature=temperature,
1356
+ stream=True,
1357
+ tools=formatted_tools,
1358
+ **kwargs
1359
+ )
1360
+ ):
1361
+ if chunk and chunk.choices and chunk.choices[0].delta:
1362
+ delta = chunk.choices[0].delta
1363
+ if delta.content:
1364
+ response_text += delta.content
1365
+
1366
+ # Capture tool calls from streaming chunks if provider supports it
1367
+ if formatted_tools and self._supports_streaming_tools():
1368
+ tool_calls = self._process_tool_calls_from_stream(delta, tool_calls)
1369
+
1370
+ response_text = response_text.strip()
1371
+
1372
+ # We already have tool_calls from streaming if supported
1373
+ # No need for a second API call!
1354
1374
  else:
1355
- # Non-verbose streaming
1356
- async for chunk in await litellm.acompletion(
1375
+ # Non-streaming approach (when tools require it or streaming is disabled)
1376
+ tool_response = await litellm.acompletion(
1357
1377
  **self._build_completion_params(
1358
1378
  messages=messages,
1359
1379
  temperature=temperature,
1360
- stream=True,
1380
+ stream=False,
1361
1381
  tools=formatted_tools,
1362
- **kwargs
1382
+ **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
1363
1383
  )
1364
- ):
1365
- if chunk and chunk.choices and chunk.choices[0].delta:
1366
- delta = chunk.choices[0].delta
1367
- if delta.content:
1368
- response_text += delta.content
1369
-
1370
- # Capture tool calls from streaming chunks if provider supports it
1371
- if formatted_tools and self._supports_streaming_tools():
1372
- tool_calls = self._process_tool_calls_from_stream(delta, tool_calls)
1373
-
1374
- response_text = response_text.strip()
1375
-
1376
- # We already have tool_calls from streaming if supported
1377
- # No need for a second API call!
1378
- else:
1379
- # Non-streaming approach (when tools require it or streaming is disabled)
1380
- tool_response = await litellm.acompletion(
1381
- **self._build_completion_params(
1382
- messages=messages,
1383
- temperature=temperature,
1384
- stream=False,
1385
- tools=formatted_tools,
1386
- **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
1387
- )
1388
- )
1389
- response_text = tool_response.choices[0].message.get("content", "")
1390
- tool_calls = tool_response.choices[0].message.get("tool_calls", [])
1391
-
1392
- if verbose:
1393
- # Display the complete response at once
1394
- display_interaction(
1395
- original_prompt,
1396
- response_text,
1397
- markdown=markdown,
1398
- generation_time=time.time() - start_time,
1399
- console=console
1400
1384
  )
1385
+ response_text = tool_response.choices[0].message.get("content", "")
1386
+ tool_calls = tool_response.choices[0].message.get("tool_calls", [])
1387
+
1388
+ if verbose:
1389
+ # Display the complete response at once
1390
+ display_interaction(
1391
+ original_prompt,
1392
+ response_text,
1393
+ markdown=markdown,
1394
+ generation_time=time.time() - start_time,
1395
+ console=console
1396
+ )
1401
1397
 
1402
- # Now handle tools if we have them (either from streaming or non-streaming)
1403
- if tools and execute_tool_fn and tool_calls:
1404
-
1405
- if tool_calls:
1398
+ # Now handle tools if we have them (either from streaming or non-streaming)
1399
+ if tools and execute_tool_fn and tool_calls:
1406
1400
  # Convert tool_calls to a serializable format for all providers
1407
1401
  serializable_tool_calls = self._serialize_tool_calls(tool_calls)
1408
1402
  messages.append({
@@ -1483,9 +1477,16 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1483
1477
  console=console
1484
1478
  )
1485
1479
 
1486
- # Return the final response after processing Ollama's follow-up
1480
+ # Store the response for potential final return
1487
1481
  if final_response_text:
1488
- return final_response_text
1482
+ # Update messages with the response to maintain conversation context
1483
+ messages.append({
1484
+ "role": "assistant",
1485
+ "content": final_response_text
1486
+ })
1487
+ # Continue the loop to check if more tools are needed
1488
+ iteration_count += 1
1489
+ continue
1489
1490
  else:
1490
1491
  logging.warning("[OLLAMA_DEBUG] Ollama follow-up returned empty response")
1491
1492
 
@@ -1551,6 +1552,27 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1551
1552
  response_text += chunk.choices[0].delta.content
1552
1553
 
1553
1554
  response_text = response_text.strip()
1555
+
1556
+ # After tool execution, update messages and continue the loop
1557
+ if response_text:
1558
+ messages.append({
1559
+ "role": "assistant",
1560
+ "content": response_text
1561
+ })
1562
+
1563
+ # Store reasoning content if captured
1564
+ if reasoning_steps and reasoning_content:
1565
+ stored_reasoning_content = reasoning_content
1566
+
1567
+ # Continue the loop to check if more tools are needed
1568
+ iteration_count += 1
1569
+ continue
1570
+ else:
1571
+ # No tool calls, we're done with this iteration
1572
+ # If we've executed tools in previous iterations, this response contains the final answer
1573
+ if iteration_count > 0:
1574
+ final_response_text = response_text.strip()
1575
+ break
1554
1576
 
1555
1577
  # Handle output formatting
1556
1578
  if output_json or output_pydantic:
@@ -1562,13 +1584,27 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1562
1584
  return response_text
1563
1585
 
1564
1586
  if not self_reflect:
1587
+ # Use final_response_text if we went through tool iterations
1588
+ display_text = final_response_text if final_response_text else response_text
1589
+
1590
+ # Display with stored reasoning content if available
1565
1591
  if verbose:
1566
- display_interaction(original_prompt, response_text, markdown=markdown,
1567
- generation_time=time.time() - start_time, console=console)
1568
- # Return reasoning content if reasoning_steps is True
1569
- if reasoning_steps and reasoning_content:
1570
- return reasoning_content
1571
- return response_text
1592
+ if stored_reasoning_content:
1593
+ display_interaction(
1594
+ original_prompt,
1595
+ f"Reasoning:\n{stored_reasoning_content}\n\nAnswer:\n{display_text}",
1596
+ markdown=markdown,
1597
+ generation_time=time.time() - start_time,
1598
+ console=console
1599
+ )
1600
+ else:
1601
+ display_interaction(original_prompt, display_text, markdown=markdown,
1602
+ generation_time=time.time() - start_time, console=console)
1603
+
1604
+ # Return reasoning content if reasoning_steps is True and we have it
1605
+ if reasoning_steps and stored_reasoning_content:
1606
+ return stored_reasoning_content
1607
+ return display_text
1572
1608
 
1573
1609
  # Handle self-reflection
1574
1610
  reflection_prompt = f"""