praisonaiagents 0.0.124__tar.gz → 0.0.126__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/PKG-INFO +2 -1
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/__init__.py +24 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/agent/agent.py +2 -1
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/knowledge/knowledge.py +0 -3
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/llm/__init__.py +6 -9
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/llm/llm.py +232 -196
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/main.py +1 -18
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/mcp/mcp.py +46 -8
- praisonaiagents-0.0.126/praisonaiagents/mcp/mcp_http_stream.py +466 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/mcp/mcp_sse.py +19 -2
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/process/process.py +88 -3
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/task/task.py +1 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents.egg-info/PKG-INFO +2 -1
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents.egg-info/SOURCES.txt +5 -1
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents.egg-info/requires.txt +1 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/pyproject.toml +3 -2
- praisonaiagents-0.0.126/tests/test_fix_comprehensive.py +75 -0
- praisonaiagents-0.0.126/tests/test_http_stream_basic.py +58 -0
- praisonaiagents-0.0.126/tests/test_validation_feedback.py +252 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/README.md +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/agent/__init__.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/agent/handoff.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/agent/image_agent.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/agents/__init__.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/agents/agents.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/agents/autoagents.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/approval.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/guardrails/__init__.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/guardrails/guardrail_result.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/guardrails/llm_guardrail.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/knowledge/__init__.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/knowledge/chunking.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/llm/model_capabilities.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/llm/openai_client.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/mcp/__init__.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/memory/__init__.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/memory/memory.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/process/__init__.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/session.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/task/__init__.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/telemetry/__init__.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/telemetry/integration.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/telemetry/telemetry.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/README.md +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/__init__.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/arxiv_tools.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/calculator_tools.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/csv_tools.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/duckdb_tools.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/duckduckgo_tools.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/excel_tools.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/file_tools.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/json_tools.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/newspaper_tools.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/pandas_tools.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/python_tools.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/searxng_tools.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/shell_tools.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/spider_tools.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/test.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/tools.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/train/data/generatecot.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/wikipedia_tools.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/xml_tools.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/yaml_tools.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents/tools/yfinance_tools.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents.egg-info/dependency_links.txt +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/praisonaiagents.egg-info/top_level.txt +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/setup.cfg +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/tests/test-graph-memory.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/tests/test.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/tests/test_handoff_compatibility.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/tests/test_ollama_async_fix.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/tests/test_ollama_fix.py +0 -0
- {praisonaiagents-0.0.124 → praisonaiagents-0.0.126}/tests/test_posthog_fixed.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: praisonaiagents
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.126
|
4
4
|
Summary: Praison AI agents for completing complex tasks with Self Reflection Agents
|
5
5
|
Author: Mervin Praison
|
6
6
|
Requires-Python: >=3.10
|
@@ -9,6 +9,7 @@ Requires-Dist: rich
|
|
9
9
|
Requires-Dist: openai
|
10
10
|
Requires-Dist: mcp>=1.6.0
|
11
11
|
Requires-Dist: posthog>=3.0.0
|
12
|
+
Requires-Dist: aiohttp>=3.8.0
|
12
13
|
Provides-Extra: mcp
|
13
14
|
Requires-Dist: mcp>=1.6.0; extra == "mcp"
|
14
15
|
Requires-Dist: fastapi>=0.115.0; extra == "mcp"
|
@@ -2,6 +2,30 @@
|
|
2
2
|
Praison AI Agents - A package for hierarchical AI agent task execution
|
3
3
|
"""
|
4
4
|
|
5
|
+
# Configure logging before any other imports
|
6
|
+
import os
|
7
|
+
import logging
|
8
|
+
from rich.logging import RichHandler
|
9
|
+
|
10
|
+
# Get log level from environment variable
|
11
|
+
LOGLEVEL = os.environ.get('LOGLEVEL', 'INFO').upper()
|
12
|
+
|
13
|
+
# Configure root logger
|
14
|
+
logging.basicConfig(
|
15
|
+
level=getattr(logging, LOGLEVEL, logging.INFO),
|
16
|
+
format="%(asctime)s %(filename)s:%(lineno)d %(levelname)s %(message)s",
|
17
|
+
datefmt="[%X]",
|
18
|
+
handlers=[RichHandler(rich_tracebacks=True)]
|
19
|
+
)
|
20
|
+
|
21
|
+
# Suppress specific noisy loggers
|
22
|
+
logging.getLogger("litellm").setLevel(logging.WARNING)
|
23
|
+
logging.getLogger("litellm.utils").setLevel(logging.WARNING)
|
24
|
+
logging.getLogger("markdown_it").setLevel(logging.WARNING)
|
25
|
+
logging.getLogger("rich.markdown").setLevel(logging.WARNING)
|
26
|
+
logging.getLogger("httpx").setLevel(logging.WARNING)
|
27
|
+
logging.getLogger("httpcore").setLevel(logging.WARNING)
|
28
|
+
|
5
29
|
from .agent.agent import Agent
|
6
30
|
from .agent.image_agent import ImageAgent
|
7
31
|
from .agents.agents import PraisonAIAgents
|
@@ -1235,7 +1235,8 @@ Your Goal: {self.goal}"""
|
|
1235
1235
|
agent_role=self.role,
|
1236
1236
|
agent_tools=[t.__name__ if hasattr(t, '__name__') else str(t) for t in (tools if tools is not None else self.tools)],
|
1237
1237
|
execute_tool_fn=self.execute_tool, # Pass tool execution function
|
1238
|
-
reasoning_steps=reasoning_steps
|
1238
|
+
reasoning_steps=reasoning_steps,
|
1239
|
+
stream=stream # Pass the stream parameter from chat method
|
1239
1240
|
)
|
1240
1241
|
|
1241
1242
|
self.chat_history.append({"role": "assistant", "content": response_text})
|
@@ -67,9 +67,6 @@ class Knowledge:
|
|
67
67
|
|
68
68
|
# Disable OpenAI API request logging
|
69
69
|
logging.getLogger('openai').setLevel(logging.WARNING)
|
70
|
-
|
71
|
-
# Set root logger to warning to catch any uncategorized logs
|
72
|
-
logging.getLogger().setLevel(logging.WARNING)
|
73
70
|
|
74
71
|
@cached_property
|
75
72
|
def _deps(self):
|
@@ -5,19 +5,16 @@ import os
|
|
5
5
|
# Disable litellm telemetry before any imports
|
6
6
|
os.environ["LITELLM_TELEMETRY"] = "False"
|
7
7
|
|
8
|
-
# Suppress all relevant logs at module level
|
9
|
-
logging.getLogger("litellm").setLevel(logging.
|
10
|
-
logging.getLogger("openai").setLevel(logging.
|
11
|
-
logging.getLogger("httpx").setLevel(logging.
|
12
|
-
logging.getLogger("httpcore").setLevel(logging.
|
13
|
-
logging.getLogger("pydantic").setLevel(logging.
|
8
|
+
# Suppress all relevant logs at module level - consistent with main __init__.py
|
9
|
+
logging.getLogger("litellm").setLevel(logging.WARNING)
|
10
|
+
logging.getLogger("openai").setLevel(logging.WARNING)
|
11
|
+
logging.getLogger("httpx").setLevel(logging.WARNING)
|
12
|
+
logging.getLogger("httpcore").setLevel(logging.WARNING)
|
13
|
+
logging.getLogger("pydantic").setLevel(logging.WARNING)
|
14
14
|
|
15
15
|
# Suppress pydantic warnings
|
16
16
|
warnings.filterwarnings("ignore", category=UserWarning, module="pydantic")
|
17
17
|
|
18
|
-
# Configure logging to suppress all INFO messages
|
19
|
-
logging.basicConfig(level=logging.WARNING)
|
20
|
-
|
21
18
|
# Import after suppressing warnings
|
22
19
|
from .llm import LLM, LLMContextLengthExceededException
|
23
20
|
from .openai_client import (
|
@@ -680,6 +680,7 @@ class LLM:
|
|
680
680
|
max_iterations = 10 # Prevent infinite loops
|
681
681
|
iteration_count = 0
|
682
682
|
final_response_text = ""
|
683
|
+
stored_reasoning_content = None # Store reasoning content from tool execution
|
683
684
|
|
684
685
|
while iteration_count < max_iterations:
|
685
686
|
try:
|
@@ -857,39 +858,49 @@ class LLM:
|
|
857
858
|
iteration_count += 1
|
858
859
|
continue
|
859
860
|
|
860
|
-
# If we reach here, no more tool calls needed - get final response
|
861
|
-
# Make one more call to get the final summary response
|
862
861
|
# Special handling for Ollama models that don't automatically process tool results
|
863
862
|
ollama_handled = False
|
864
863
|
ollama_params = self._handle_ollama_model(response_text, tool_results, messages, original_prompt)
|
865
864
|
|
866
865
|
if ollama_params:
|
867
|
-
# Get response
|
868
|
-
if
|
869
|
-
|
866
|
+
# Get response based on streaming mode
|
867
|
+
if stream:
|
868
|
+
# Streaming approach
|
869
|
+
if verbose:
|
870
|
+
with Live(display_generating("", start_time), console=console, refresh_per_second=4) as live:
|
871
|
+
response_text = ""
|
872
|
+
for chunk in litellm.completion(
|
873
|
+
**self._build_completion_params(
|
874
|
+
messages=ollama_params["follow_up_messages"],
|
875
|
+
temperature=temperature,
|
876
|
+
stream=True
|
877
|
+
)
|
878
|
+
):
|
879
|
+
if chunk and chunk.choices and chunk.choices[0].delta.content:
|
880
|
+
content = chunk.choices[0].delta.content
|
881
|
+
response_text += content
|
882
|
+
live.update(display_generating(response_text, start_time))
|
883
|
+
else:
|
870
884
|
response_text = ""
|
871
885
|
for chunk in litellm.completion(
|
872
886
|
**self._build_completion_params(
|
873
887
|
messages=ollama_params["follow_up_messages"],
|
874
888
|
temperature=temperature,
|
875
|
-
stream=
|
889
|
+
stream=True
|
876
890
|
)
|
877
891
|
):
|
878
892
|
if chunk and chunk.choices and chunk.choices[0].delta.content:
|
879
|
-
|
880
|
-
response_text += content
|
881
|
-
live.update(display_generating(response_text, start_time))
|
893
|
+
response_text += chunk.choices[0].delta.content
|
882
894
|
else:
|
883
|
-
|
884
|
-
|
895
|
+
# Non-streaming approach
|
896
|
+
resp = litellm.completion(
|
885
897
|
**self._build_completion_params(
|
886
898
|
messages=ollama_params["follow_up_messages"],
|
887
899
|
temperature=temperature,
|
888
|
-
stream=
|
900
|
+
stream=False
|
889
901
|
)
|
890
|
-
)
|
891
|
-
|
892
|
-
response_text += chunk.choices[0].delta.content
|
902
|
+
)
|
903
|
+
response_text = resp.get("choices", [{}])[0].get("message", {}).get("content", "") or ""
|
893
904
|
|
894
905
|
# Set flag to indicate Ollama was handled
|
895
906
|
ollama_handled = True
|
@@ -906,15 +917,23 @@ class LLM:
|
|
906
917
|
console=console
|
907
918
|
)
|
908
919
|
|
909
|
-
#
|
920
|
+
# Update messages and continue the loop instead of returning
|
910
921
|
if final_response_text:
|
911
|
-
|
922
|
+
# Update messages with the response to maintain conversation context
|
923
|
+
messages.append({
|
924
|
+
"role": "assistant",
|
925
|
+
"content": final_response_text
|
926
|
+
})
|
927
|
+
# Continue the loop to check if more tools are needed
|
928
|
+
iteration_count += 1
|
929
|
+
continue
|
912
930
|
else:
|
913
931
|
logging.warning("[OLLAMA_DEBUG] Ollama follow-up returned empty response")
|
914
932
|
|
915
|
-
#
|
933
|
+
# Handle reasoning_steps after tool execution if not already handled by Ollama
|
916
934
|
if reasoning_steps and not ollama_handled:
|
917
|
-
|
935
|
+
# Make a non-streaming call to capture reasoning content
|
936
|
+
reasoning_resp = litellm.completion(
|
918
937
|
**self._build_completion_params(
|
919
938
|
messages=messages,
|
920
939
|
temperature=temperature,
|
@@ -922,75 +941,28 @@ class LLM:
|
|
922
941
|
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
|
923
942
|
)
|
924
943
|
)
|
925
|
-
reasoning_content =
|
926
|
-
response_text =
|
944
|
+
reasoning_content = reasoning_resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
|
945
|
+
response_text = reasoning_resp["choices"][0]["message"]["content"]
|
927
946
|
|
928
|
-
#
|
929
|
-
if
|
930
|
-
|
931
|
-
original_prompt,
|
932
|
-
f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
|
933
|
-
markdown=markdown,
|
934
|
-
generation_time=time.time() - start_time,
|
935
|
-
console=console
|
936
|
-
)
|
937
|
-
else:
|
938
|
-
display_interaction(
|
939
|
-
original_prompt,
|
940
|
-
response_text,
|
941
|
-
markdown=markdown,
|
942
|
-
generation_time=time.time() - start_time,
|
943
|
-
console=console
|
944
|
-
)
|
945
|
-
|
946
|
-
# Otherwise do the existing streaming approach if not already handled
|
947
|
-
elif not ollama_handled:
|
948
|
-
# Get response after tool calls with streaming
|
949
|
-
if verbose:
|
950
|
-
with Live(display_generating("", current_time), console=console, refresh_per_second=4) as live:
|
951
|
-
final_response_text = ""
|
952
|
-
for chunk in litellm.completion(
|
953
|
-
**self._build_completion_params(
|
954
|
-
messages=messages,
|
955
|
-
tools=formatted_tools,
|
956
|
-
temperature=temperature,
|
957
|
-
stream=True,
|
958
|
-
**kwargs
|
959
|
-
)
|
960
|
-
):
|
961
|
-
if chunk and chunk.choices and chunk.choices[0].delta.content:
|
962
|
-
content = chunk.choices[0].delta.content
|
963
|
-
final_response_text += content
|
964
|
-
live.update(display_generating(final_response_text, current_time))
|
965
|
-
else:
|
966
|
-
final_response_text = ""
|
967
|
-
for chunk in litellm.completion(
|
968
|
-
**self._build_completion_params(
|
969
|
-
messages=messages,
|
970
|
-
tools=formatted_tools,
|
971
|
-
temperature=temperature,
|
972
|
-
stream=stream,
|
973
|
-
**kwargs
|
974
|
-
)
|
975
|
-
):
|
976
|
-
if chunk and chunk.choices and chunk.choices[0].delta.content:
|
977
|
-
final_response_text += chunk.choices[0].delta.content
|
947
|
+
# Store reasoning content for later use
|
948
|
+
if reasoning_content:
|
949
|
+
stored_reasoning_content = reasoning_content
|
978
950
|
|
979
|
-
|
980
|
-
|
981
|
-
|
982
|
-
|
983
|
-
|
984
|
-
original_prompt,
|
985
|
-
final_response_text,
|
986
|
-
markdown=markdown,
|
987
|
-
generation_time=time.time() - start_time,
|
988
|
-
console=console
|
989
|
-
)
|
951
|
+
# Update messages with the response
|
952
|
+
messages.append({
|
953
|
+
"role": "assistant",
|
954
|
+
"content": response_text
|
955
|
+
})
|
990
956
|
|
991
|
-
|
957
|
+
# After tool execution, continue the loop to check if more tools are needed
|
958
|
+
# instead of immediately trying to get a final response
|
959
|
+
iteration_count += 1
|
960
|
+
continue
|
992
961
|
else:
|
993
962
|
# No tool calls, we're done with this iteration
|
963
|
+
# If we've executed tools in previous iterations, this response contains the final answer
|
964
|
+
if iteration_count > 0:
|
965
|
+
final_response_text = response_text.strip()
|
994
966
|
break
|
995
967
|
|
996
968
|
except Exception as e:
|
@@ -1003,16 +975,30 @@ class LLM:
|
|
1003
975
|
|
1004
976
|
# No tool calls were made in this iteration, return the response
|
1005
977
|
if verbose:
|
1006
|
-
|
1007
|
-
|
1008
|
-
|
1009
|
-
|
1010
|
-
|
1011
|
-
|
1012
|
-
|
978
|
+
# If we have stored reasoning content from tool execution, display it
|
979
|
+
if stored_reasoning_content:
|
980
|
+
display_interaction(
|
981
|
+
original_prompt,
|
982
|
+
f"Reasoning:\n{stored_reasoning_content}\n\nAnswer:\n{response_text}",
|
983
|
+
markdown=markdown,
|
984
|
+
generation_time=time.time() - start_time,
|
985
|
+
console=console
|
986
|
+
)
|
987
|
+
else:
|
988
|
+
display_interaction(
|
989
|
+
original_prompt,
|
990
|
+
response_text,
|
991
|
+
markdown=markdown,
|
992
|
+
generation_time=time.time() - start_time,
|
993
|
+
console=console
|
994
|
+
)
|
1013
995
|
|
1014
996
|
response_text = response_text.strip()
|
1015
997
|
|
998
|
+
# Return reasoning content if reasoning_steps is True and we have it
|
999
|
+
if reasoning_steps and stored_reasoning_content:
|
1000
|
+
return stored_reasoning_content
|
1001
|
+
|
1016
1002
|
# Handle output formatting
|
1017
1003
|
if output_json or output_pydantic:
|
1018
1004
|
self.chat_history.append({"role": "user", "content": original_prompt})
|
@@ -1027,8 +1013,8 @@ class LLM:
|
|
1027
1013
|
display_interaction(original_prompt, response_text, markdown=markdown,
|
1028
1014
|
generation_time=time.time() - start_time, console=console)
|
1029
1015
|
# Return reasoning content if reasoning_steps is True
|
1030
|
-
if reasoning_steps and
|
1031
|
-
return
|
1016
|
+
if reasoning_steps and stored_reasoning_content:
|
1017
|
+
return stored_reasoning_content
|
1032
1018
|
return response_text
|
1033
1019
|
|
1034
1020
|
# Handle self-reflection loop
|
@@ -1291,118 +1277,126 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1291
1277
|
# Format tools for LiteLLM using the shared helper
|
1292
1278
|
formatted_tools = self._format_tools_for_litellm(tools)
|
1293
1279
|
|
1294
|
-
|
1295
|
-
|
1296
|
-
|
1297
|
-
|
1298
|
-
|
1299
|
-
|
1280
|
+
# Initialize variables for iteration loop
|
1281
|
+
max_iterations = 10 # Prevent infinite loops
|
1282
|
+
iteration_count = 0
|
1283
|
+
final_response_text = ""
|
1284
|
+
stored_reasoning_content = None # Store reasoning content from tool execution
|
1285
|
+
|
1286
|
+
while iteration_count < max_iterations:
|
1287
|
+
response_text = ""
|
1288
|
+
reasoning_content = None
|
1289
|
+
tool_calls = []
|
1290
|
+
|
1291
|
+
if reasoning_steps and iteration_count == 0:
|
1292
|
+
# Non-streaming call to capture reasoning
|
1293
|
+
resp = await litellm.acompletion(
|
1294
|
+
**self._build_completion_params(
|
1295
|
+
messages=messages,
|
1300
1296
|
temperature=temperature,
|
1301
1297
|
stream=False, # force non-streaming
|
1302
1298
|
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
|
1303
1299
|
)
|
1304
|
-
)
|
1305
|
-
reasoning_content = resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
|
1306
|
-
response_text = resp["choices"][0]["message"]["content"]
|
1307
|
-
|
1308
|
-
if verbose and reasoning_content:
|
1309
|
-
display_interaction(
|
1310
|
-
"Initial reasoning:",
|
1311
|
-
f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
|
1312
|
-
markdown=markdown,
|
1313
|
-
generation_time=time.time() - start_time,
|
1314
|
-
console=console
|
1315
|
-
)
|
1316
|
-
elif verbose:
|
1317
|
-
display_interaction(
|
1318
|
-
"Initial response:",
|
1319
|
-
response_text,
|
1320
|
-
markdown=markdown,
|
1321
|
-
generation_time=time.time() - start_time,
|
1322
|
-
console=console
|
1323
1300
|
)
|
1324
|
-
|
1325
|
-
|
1326
|
-
use_streaming = stream
|
1327
|
-
if formatted_tools and not self._supports_streaming_tools():
|
1328
|
-
# Provider doesn't support streaming with tools, use non-streaming
|
1329
|
-
use_streaming = False
|
1330
|
-
|
1331
|
-
if use_streaming:
|
1332
|
-
# Streaming approach (with or without tools)
|
1333
|
-
tool_calls = []
|
1301
|
+
reasoning_content = resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
|
1302
|
+
response_text = resp["choices"][0]["message"]["content"]
|
1334
1303
|
|
1335
|
-
if verbose:
|
1336
|
-
|
1337
|
-
|
1338
|
-
|
1339
|
-
|
1340
|
-
|
1341
|
-
|
1342
|
-
|
1343
|
-
|
1344
|
-
|
1345
|
-
|
1346
|
-
|
1347
|
-
|
1348
|
-
|
1304
|
+
if verbose and reasoning_content:
|
1305
|
+
display_interaction(
|
1306
|
+
"Initial reasoning:",
|
1307
|
+
f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
|
1308
|
+
markdown=markdown,
|
1309
|
+
generation_time=time.time() - start_time,
|
1310
|
+
console=console
|
1311
|
+
)
|
1312
|
+
elif verbose:
|
1313
|
+
display_interaction(
|
1314
|
+
"Initial response:",
|
1315
|
+
response_text,
|
1316
|
+
markdown=markdown,
|
1317
|
+
generation_time=time.time() - start_time,
|
1318
|
+
console=console
|
1319
|
+
)
|
1320
|
+
else:
|
1321
|
+
# Determine if we should use streaming based on tool support
|
1322
|
+
use_streaming = stream
|
1323
|
+
if formatted_tools and not self._supports_streaming_tools():
|
1324
|
+
# Provider doesn't support streaming with tools, use non-streaming
|
1325
|
+
use_streaming = False
|
1326
|
+
|
1327
|
+
if use_streaming:
|
1328
|
+
# Streaming approach (with or without tools)
|
1329
|
+
tool_calls = []
|
1330
|
+
|
1331
|
+
if verbose:
|
1332
|
+
async for chunk in await litellm.acompletion(
|
1333
|
+
**self._build_completion_params(
|
1334
|
+
messages=messages,
|
1335
|
+
temperature=temperature,
|
1336
|
+
stream=True,
|
1337
|
+
tools=formatted_tools,
|
1338
|
+
**kwargs
|
1349
1339
|
)
|
1350
|
-
|
1351
|
-
|
1352
|
-
|
1340
|
+
):
|
1341
|
+
if chunk and chunk.choices and chunk.choices[0].delta:
|
1342
|
+
delta = chunk.choices[0].delta
|
1343
|
+
response_text, tool_calls = self._process_stream_delta(
|
1344
|
+
delta, response_text, tool_calls, formatted_tools
|
1345
|
+
)
|
1346
|
+
if delta.content:
|
1347
|
+
print("\033[K", end="\r")
|
1348
|
+
print(f"Generating... {time.time() - start_time:.1f}s", end="\r")
|
1353
1349
|
|
1350
|
+
else:
|
1351
|
+
# Non-verbose streaming
|
1352
|
+
async for chunk in await litellm.acompletion(
|
1353
|
+
**self._build_completion_params(
|
1354
|
+
messages=messages,
|
1355
|
+
temperature=temperature,
|
1356
|
+
stream=True,
|
1357
|
+
tools=formatted_tools,
|
1358
|
+
**kwargs
|
1359
|
+
)
|
1360
|
+
):
|
1361
|
+
if chunk and chunk.choices and chunk.choices[0].delta:
|
1362
|
+
delta = chunk.choices[0].delta
|
1363
|
+
if delta.content:
|
1364
|
+
response_text += delta.content
|
1365
|
+
|
1366
|
+
# Capture tool calls from streaming chunks if provider supports it
|
1367
|
+
if formatted_tools and self._supports_streaming_tools():
|
1368
|
+
tool_calls = self._process_tool_calls_from_stream(delta, tool_calls)
|
1369
|
+
|
1370
|
+
response_text = response_text.strip()
|
1371
|
+
|
1372
|
+
# We already have tool_calls from streaming if supported
|
1373
|
+
# No need for a second API call!
|
1354
1374
|
else:
|
1355
|
-
# Non-
|
1356
|
-
|
1375
|
+
# Non-streaming approach (when tools require it or streaming is disabled)
|
1376
|
+
tool_response = await litellm.acompletion(
|
1357
1377
|
**self._build_completion_params(
|
1358
1378
|
messages=messages,
|
1359
1379
|
temperature=temperature,
|
1360
|
-
stream=
|
1380
|
+
stream=False,
|
1361
1381
|
tools=formatted_tools,
|
1362
|
-
**kwargs
|
1382
|
+
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
|
1363
1383
|
)
|
1364
|
-
):
|
1365
|
-
if chunk and chunk.choices and chunk.choices[0].delta:
|
1366
|
-
delta = chunk.choices[0].delta
|
1367
|
-
if delta.content:
|
1368
|
-
response_text += delta.content
|
1369
|
-
|
1370
|
-
# Capture tool calls from streaming chunks if provider supports it
|
1371
|
-
if formatted_tools and self._supports_streaming_tools():
|
1372
|
-
tool_calls = self._process_tool_calls_from_stream(delta, tool_calls)
|
1373
|
-
|
1374
|
-
response_text = response_text.strip()
|
1375
|
-
|
1376
|
-
# We already have tool_calls from streaming if supported
|
1377
|
-
# No need for a second API call!
|
1378
|
-
else:
|
1379
|
-
# Non-streaming approach (when tools require it or streaming is disabled)
|
1380
|
-
tool_response = await litellm.acompletion(
|
1381
|
-
**self._build_completion_params(
|
1382
|
-
messages=messages,
|
1383
|
-
temperature=temperature,
|
1384
|
-
stream=False,
|
1385
|
-
tools=formatted_tools,
|
1386
|
-
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
|
1387
|
-
)
|
1388
|
-
)
|
1389
|
-
response_text = tool_response.choices[0].message.get("content", "")
|
1390
|
-
tool_calls = tool_response.choices[0].message.get("tool_calls", [])
|
1391
|
-
|
1392
|
-
if verbose:
|
1393
|
-
# Display the complete response at once
|
1394
|
-
display_interaction(
|
1395
|
-
original_prompt,
|
1396
|
-
response_text,
|
1397
|
-
markdown=markdown,
|
1398
|
-
generation_time=time.time() - start_time,
|
1399
|
-
console=console
|
1400
1384
|
)
|
1385
|
+
response_text = tool_response.choices[0].message.get("content", "")
|
1386
|
+
tool_calls = tool_response.choices[0].message.get("tool_calls", [])
|
1387
|
+
|
1388
|
+
if verbose:
|
1389
|
+
# Display the complete response at once
|
1390
|
+
display_interaction(
|
1391
|
+
original_prompt,
|
1392
|
+
response_text,
|
1393
|
+
markdown=markdown,
|
1394
|
+
generation_time=time.time() - start_time,
|
1395
|
+
console=console
|
1396
|
+
)
|
1401
1397
|
|
1402
|
-
|
1403
|
-
|
1404
|
-
|
1405
|
-
if tool_calls:
|
1398
|
+
# Now handle tools if we have them (either from streaming or non-streaming)
|
1399
|
+
if tools and execute_tool_fn and tool_calls:
|
1406
1400
|
# Convert tool_calls to a serializable format for all providers
|
1407
1401
|
serializable_tool_calls = self._serialize_tool_calls(tool_calls)
|
1408
1402
|
messages.append({
|
@@ -1483,9 +1477,16 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1483
1477
|
console=console
|
1484
1478
|
)
|
1485
1479
|
|
1486
|
-
#
|
1480
|
+
# Store the response for potential final return
|
1487
1481
|
if final_response_text:
|
1488
|
-
|
1482
|
+
# Update messages with the response to maintain conversation context
|
1483
|
+
messages.append({
|
1484
|
+
"role": "assistant",
|
1485
|
+
"content": final_response_text
|
1486
|
+
})
|
1487
|
+
# Continue the loop to check if more tools are needed
|
1488
|
+
iteration_count += 1
|
1489
|
+
continue
|
1489
1490
|
else:
|
1490
1491
|
logging.warning("[OLLAMA_DEBUG] Ollama follow-up returned empty response")
|
1491
1492
|
|
@@ -1551,6 +1552,27 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1551
1552
|
response_text += chunk.choices[0].delta.content
|
1552
1553
|
|
1553
1554
|
response_text = response_text.strip()
|
1555
|
+
|
1556
|
+
# After tool execution, update messages and continue the loop
|
1557
|
+
if response_text:
|
1558
|
+
messages.append({
|
1559
|
+
"role": "assistant",
|
1560
|
+
"content": response_text
|
1561
|
+
})
|
1562
|
+
|
1563
|
+
# Store reasoning content if captured
|
1564
|
+
if reasoning_steps and reasoning_content:
|
1565
|
+
stored_reasoning_content = reasoning_content
|
1566
|
+
|
1567
|
+
# Continue the loop to check if more tools are needed
|
1568
|
+
iteration_count += 1
|
1569
|
+
continue
|
1570
|
+
else:
|
1571
|
+
# No tool calls, we're done with this iteration
|
1572
|
+
# If we've executed tools in previous iterations, this response contains the final answer
|
1573
|
+
if iteration_count > 0:
|
1574
|
+
final_response_text = response_text.strip()
|
1575
|
+
break
|
1554
1576
|
|
1555
1577
|
# Handle output formatting
|
1556
1578
|
if output_json or output_pydantic:
|
@@ -1562,13 +1584,27 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1562
1584
|
return response_text
|
1563
1585
|
|
1564
1586
|
if not self_reflect:
|
1587
|
+
# Use final_response_text if we went through tool iterations
|
1588
|
+
display_text = final_response_text if final_response_text else response_text
|
1589
|
+
|
1590
|
+
# Display with stored reasoning content if available
|
1565
1591
|
if verbose:
|
1566
|
-
|
1567
|
-
|
1568
|
-
|
1569
|
-
|
1570
|
-
|
1571
|
-
|
1592
|
+
if stored_reasoning_content:
|
1593
|
+
display_interaction(
|
1594
|
+
original_prompt,
|
1595
|
+
f"Reasoning:\n{stored_reasoning_content}\n\nAnswer:\n{display_text}",
|
1596
|
+
markdown=markdown,
|
1597
|
+
generation_time=time.time() - start_time,
|
1598
|
+
console=console
|
1599
|
+
)
|
1600
|
+
else:
|
1601
|
+
display_interaction(original_prompt, display_text, markdown=markdown,
|
1602
|
+
generation_time=time.time() - start_time, console=console)
|
1603
|
+
|
1604
|
+
# Return reasoning content if reasoning_steps is True and we have it
|
1605
|
+
if reasoning_steps and stored_reasoning_content:
|
1606
|
+
return stored_reasoning_content
|
1607
|
+
return display_text
|
1572
1608
|
|
1573
1609
|
# Handle self-reflection
|
1574
1610
|
reflection_prompt = f"""
|