praisonaiagents 0.0.128__py3-none-any.whl → 0.0.130__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- praisonaiagents/agent/__init__.py +2 -1
- praisonaiagents/agent/router_agent.py +334 -0
- praisonaiagents/llm/__init__.py +11 -1
- praisonaiagents/llm/llm.py +240 -57
- praisonaiagents/llm/model_capabilities.py +20 -3
- praisonaiagents/llm/model_router.py +348 -0
- praisonaiagents/process/process.py +76 -63
- praisonaiagents/task/task.py +17 -4
- praisonaiagents/tools/README.md +13 -0
- praisonaiagents/tools/__init__.py +1 -0
- {praisonaiagents-0.0.128.dist-info → praisonaiagents-0.0.130.dist-info}/METADATA +1 -1
- {praisonaiagents-0.0.128.dist-info → praisonaiagents-0.0.130.dist-info}/RECORD +14 -12
- {praisonaiagents-0.0.128.dist-info → praisonaiagents-0.0.130.dist-info}/WHEEL +0 -0
- {praisonaiagents-0.0.128.dist-info → praisonaiagents-0.0.130.dist-info}/top_level.txt +0 -0
praisonaiagents/llm/llm.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
import logging
|
2
2
|
import os
|
3
3
|
import warnings
|
4
|
+
import re
|
4
5
|
from typing import Any, Dict, List, Optional, Union, Literal, Callable
|
5
6
|
from pydantic import BaseModel
|
6
7
|
import time
|
@@ -87,6 +88,10 @@ class LLM:
|
|
87
88
|
"llama-3.2-90b-text-preview": 6144 # 8,192 actual
|
88
89
|
}
|
89
90
|
|
91
|
+
# Ollama-specific prompt constants
|
92
|
+
OLLAMA_TOOL_USAGE_PROMPT = "Please analyze the request and use the available tools to help answer the question. Start by identifying what information you need."
|
93
|
+
OLLAMA_FINAL_ANSWER_PROMPT = "Based on the tool results above, please provide the final answer to the original question."
|
94
|
+
|
90
95
|
def _log_llm_config(self, method_name: str, **config):
|
91
96
|
"""Centralized debug logging for LLM configuration and parameters.
|
92
97
|
|
@@ -277,15 +282,32 @@ class LLM:
|
|
277
282
|
# Direct ollama/ prefix
|
278
283
|
if self.model.startswith("ollama/"):
|
279
284
|
return True
|
285
|
+
|
286
|
+
# Check base_url if provided
|
287
|
+
if self.base_url and "ollama" in self.base_url.lower():
|
288
|
+
return True
|
280
289
|
|
281
290
|
# Check environment variables for Ollama base URL
|
282
291
|
base_url = os.getenv("OPENAI_BASE_URL", "")
|
283
292
|
api_base = os.getenv("OPENAI_API_BASE", "")
|
284
293
|
|
285
|
-
# Common Ollama endpoints
|
286
|
-
|
294
|
+
# Common Ollama endpoints (including custom ports)
|
295
|
+
if any(url and ("ollama" in url.lower() or ":11434" in url)
|
296
|
+
for url in [base_url, api_base, self.base_url or ""]):
|
297
|
+
return True
|
287
298
|
|
288
|
-
return
|
299
|
+
return False
|
300
|
+
|
301
|
+
def _format_ollama_tool_result_message(self, function_name: str, tool_result: Any) -> Dict[str, str]:
|
302
|
+
"""
|
303
|
+
Format tool result message for Ollama provider.
|
304
|
+
Simplified approach without hardcoded regex extraction.
|
305
|
+
"""
|
306
|
+
tool_result_str = str(tool_result)
|
307
|
+
return {
|
308
|
+
"role": "user",
|
309
|
+
"content": f"The {function_name} function returned: {tool_result_str}"
|
310
|
+
}
|
289
311
|
|
290
312
|
def _process_stream_delta(self, delta, response_text: str, tool_calls: List[Dict], formatted_tools: Optional[List] = None) -> tuple:
|
291
313
|
"""
|
@@ -422,13 +444,22 @@ class LLM:
|
|
422
444
|
"""
|
423
445
|
messages = []
|
424
446
|
|
447
|
+
# Check if this is a Gemini model that supports native structured outputs
|
448
|
+
is_gemini_with_structured_output = False
|
449
|
+
if output_json or output_pydantic:
|
450
|
+
from .model_capabilities import supports_structured_outputs
|
451
|
+
is_gemini_with_structured_output = (
|
452
|
+
self._is_gemini_model() and
|
453
|
+
supports_structured_outputs(self.model)
|
454
|
+
)
|
455
|
+
|
425
456
|
# Handle system prompt
|
426
457
|
if system_prompt:
|
427
|
-
#
|
428
|
-
if output_json:
|
429
|
-
|
430
|
-
|
431
|
-
|
458
|
+
# Only append JSON schema for non-Gemini models or Gemini models without structured output support
|
459
|
+
if (output_json or output_pydantic) and not is_gemini_with_structured_output:
|
460
|
+
schema_model = output_json or output_pydantic
|
461
|
+
if schema_model and hasattr(schema_model, 'model_json_schema'):
|
462
|
+
system_prompt += f"\nReturn ONLY a JSON object that matches this Pydantic model: {json.dumps(schema_model.model_json_schema())}"
|
432
463
|
|
433
464
|
# Skip system messages for legacy o1 models as they don't support them
|
434
465
|
if not self._needs_system_message_skip():
|
@@ -440,7 +471,8 @@ class LLM:
|
|
440
471
|
|
441
472
|
# Handle prompt modifications for JSON output
|
442
473
|
original_prompt = prompt
|
443
|
-
if output_json or output_pydantic:
|
474
|
+
if (output_json or output_pydantic) and not is_gemini_with_structured_output:
|
475
|
+
# Only modify prompt for non-Gemini models
|
444
476
|
if isinstance(prompt, str):
|
445
477
|
prompt = prompt + "\nReturn ONLY a valid JSON object. No other text or explanation."
|
446
478
|
elif isinstance(prompt, list):
|
@@ -660,6 +692,7 @@ class LLM:
|
|
660
692
|
|
661
693
|
start_time = time.time()
|
662
694
|
reflection_count = 0
|
695
|
+
interaction_displayed = False # Track if interaction has been displayed
|
663
696
|
|
664
697
|
# Display initial instruction once
|
665
698
|
if verbose:
|
@@ -695,6 +728,8 @@ class LLM:
|
|
695
728
|
temperature=temperature,
|
696
729
|
stream=False, # force non-streaming
|
697
730
|
tools=formatted_tools,
|
731
|
+
output_json=output_json,
|
732
|
+
output_pydantic=output_pydantic,
|
698
733
|
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
|
699
734
|
)
|
700
735
|
)
|
@@ -703,7 +738,7 @@ class LLM:
|
|
703
738
|
final_response = resp
|
704
739
|
|
705
740
|
# Optionally display reasoning if present
|
706
|
-
if verbose and reasoning_content:
|
741
|
+
if verbose and reasoning_content and not interaction_displayed:
|
707
742
|
display_interaction(
|
708
743
|
original_prompt,
|
709
744
|
f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
|
@@ -711,7 +746,8 @@ class LLM:
|
|
711
746
|
generation_time=time.time() - current_time,
|
712
747
|
console=console
|
713
748
|
)
|
714
|
-
|
749
|
+
interaction_displayed = True
|
750
|
+
elif verbose and not interaction_displayed:
|
715
751
|
display_interaction(
|
716
752
|
original_prompt,
|
717
753
|
response_text,
|
@@ -719,6 +755,7 @@ class LLM:
|
|
719
755
|
generation_time=time.time() - current_time,
|
720
756
|
console=console
|
721
757
|
)
|
758
|
+
interaction_displayed = True
|
722
759
|
|
723
760
|
# Otherwise do the existing streaming approach
|
724
761
|
else:
|
@@ -741,6 +778,8 @@ class LLM:
|
|
741
778
|
tools=formatted_tools,
|
742
779
|
temperature=temperature,
|
743
780
|
stream=True,
|
781
|
+
output_json=output_json,
|
782
|
+
output_pydantic=output_pydantic,
|
744
783
|
**kwargs
|
745
784
|
)
|
746
785
|
):
|
@@ -760,6 +799,8 @@ class LLM:
|
|
760
799
|
tools=formatted_tools,
|
761
800
|
temperature=temperature,
|
762
801
|
stream=True,
|
802
|
+
output_json=output_json,
|
803
|
+
output_pydantic=output_pydantic,
|
763
804
|
**kwargs
|
764
805
|
)
|
765
806
|
):
|
@@ -772,7 +813,7 @@ class LLM:
|
|
772
813
|
if formatted_tools and self._supports_streaming_tools():
|
773
814
|
tool_calls = self._process_tool_calls_from_stream(delta, tool_calls)
|
774
815
|
|
775
|
-
response_text = response_text.strip() if response_text else ""
|
816
|
+
response_text = response_text.strip() if response_text else ""
|
776
817
|
|
777
818
|
# Create a mock final_response with the captured data
|
778
819
|
final_response = {
|
@@ -791,12 +832,14 @@ class LLM:
|
|
791
832
|
tools=formatted_tools,
|
792
833
|
temperature=temperature,
|
793
834
|
stream=False,
|
835
|
+
output_json=output_json,
|
836
|
+
output_pydantic=output_pydantic,
|
794
837
|
**kwargs
|
795
838
|
)
|
796
839
|
)
|
797
840
|
response_text = final_response["choices"][0]["message"]["content"]
|
798
841
|
|
799
|
-
if verbose:
|
842
|
+
if verbose and not interaction_displayed:
|
800
843
|
# Display the complete response at once
|
801
844
|
display_interaction(
|
802
845
|
original_prompt,
|
@@ -805,18 +848,37 @@ class LLM:
|
|
805
848
|
generation_time=time.time() - current_time,
|
806
849
|
console=console
|
807
850
|
)
|
851
|
+
interaction_displayed = True
|
808
852
|
|
809
853
|
tool_calls = final_response["choices"][0]["message"].get("tool_calls")
|
810
854
|
|
855
|
+
# For Ollama, if response is empty but we have tools, prompt for tool usage
|
856
|
+
if self._is_ollama_provider() and (not response_text or response_text.strip() == "") and formatted_tools and iteration_count == 0:
|
857
|
+
messages.append({
|
858
|
+
"role": "user",
|
859
|
+
"content": self.OLLAMA_TOOL_USAGE_PROMPT
|
860
|
+
})
|
861
|
+
iteration_count += 1
|
862
|
+
continue
|
863
|
+
|
811
864
|
# Handle tool calls - Sequential tool calling logic
|
812
865
|
if tool_calls and execute_tool_fn:
|
813
866
|
# Convert tool_calls to a serializable format for all providers
|
814
867
|
serializable_tool_calls = self._serialize_tool_calls(tool_calls)
|
815
|
-
|
816
|
-
|
817
|
-
|
818
|
-
|
819
|
-
|
868
|
+
# Check if this is Ollama provider
|
869
|
+
if self._is_ollama_provider():
|
870
|
+
# For Ollama, only include role and content
|
871
|
+
messages.append({
|
872
|
+
"role": "assistant",
|
873
|
+
"content": response_text
|
874
|
+
})
|
875
|
+
else:
|
876
|
+
# For other providers, include tool_calls
|
877
|
+
messages.append({
|
878
|
+
"role": "assistant",
|
879
|
+
"content": response_text,
|
880
|
+
"tool_calls": serializable_tool_calls
|
881
|
+
})
|
820
882
|
|
821
883
|
should_continue = False
|
822
884
|
tool_results = [] # Store all tool results
|
@@ -842,11 +904,17 @@ class LLM:
|
|
842
904
|
logging.debug(f"[TOOL_EXEC_DEBUG] About to display tool call with message: {display_message}")
|
843
905
|
display_tool_call(display_message, console=console)
|
844
906
|
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
|
849
|
-
|
907
|
+
# Check if this is Ollama provider
|
908
|
+
if self._is_ollama_provider():
|
909
|
+
# For Ollama, use user role and format as natural language
|
910
|
+
messages.append(self._format_ollama_tool_result_message(function_name, tool_result))
|
911
|
+
else:
|
912
|
+
# For other providers, use tool role with tool_call_id
|
913
|
+
messages.append({
|
914
|
+
"role": "tool",
|
915
|
+
"tool_call_id": tool_call_id,
|
916
|
+
"content": json.dumps(tool_result) if tool_result is not None else "Function returned an empty output"
|
917
|
+
})
|
850
918
|
|
851
919
|
# Check if we should continue (for tools like sequential thinking)
|
852
920
|
# This mimics the logic from agent.py lines 1004-1007
|
@@ -858,6 +926,14 @@ class LLM:
|
|
858
926
|
iteration_count += 1
|
859
927
|
continue
|
860
928
|
|
929
|
+
# For Ollama, add explicit prompt if we need a final answer
|
930
|
+
if self._is_ollama_provider() and iteration_count > 0:
|
931
|
+
# Add an explicit prompt for Ollama to generate the final answer
|
932
|
+
messages.append({
|
933
|
+
"role": "user",
|
934
|
+
"content": self.OLLAMA_FINAL_ANSWER_PROMPT
|
935
|
+
})
|
936
|
+
|
861
937
|
# After tool execution, continue the loop to check if more tools are needed
|
862
938
|
# instead of immediately trying to get a final response
|
863
939
|
iteration_count += 1
|
@@ -878,7 +954,7 @@ class LLM:
|
|
878
954
|
return final_response_text
|
879
955
|
|
880
956
|
# No tool calls were made in this iteration, return the response
|
881
|
-
if verbose:
|
957
|
+
if verbose and not interaction_displayed:
|
882
958
|
# If we have stored reasoning content from tool execution, display it
|
883
959
|
if stored_reasoning_content:
|
884
960
|
display_interaction(
|
@@ -896,6 +972,7 @@ class LLM:
|
|
896
972
|
generation_time=time.time() - start_time,
|
897
973
|
console=console
|
898
974
|
)
|
975
|
+
interaction_displayed = True
|
899
976
|
|
900
977
|
response_text = response_text.strip() if response_text else ""
|
901
978
|
|
@@ -907,15 +984,17 @@ class LLM:
|
|
907
984
|
if output_json or output_pydantic:
|
908
985
|
self.chat_history.append({"role": "user", "content": original_prompt})
|
909
986
|
self.chat_history.append({"role": "assistant", "content": response_text})
|
910
|
-
if verbose:
|
987
|
+
if verbose and not interaction_displayed:
|
911
988
|
display_interaction(original_prompt, response_text, markdown=markdown,
|
912
989
|
generation_time=time.time() - start_time, console=console)
|
990
|
+
interaction_displayed = True
|
913
991
|
return response_text
|
914
992
|
|
915
993
|
if not self_reflect:
|
916
|
-
if verbose:
|
994
|
+
if verbose and not interaction_displayed:
|
917
995
|
display_interaction(original_prompt, response_text, markdown=markdown,
|
918
996
|
generation_time=time.time() - start_time, console=console)
|
997
|
+
interaction_displayed = True
|
919
998
|
# Return reasoning content if reasoning_steps is True
|
920
999
|
if reasoning_steps and stored_reasoning_content:
|
921
1000
|
return stored_reasoning_content
|
@@ -944,6 +1023,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
944
1023
|
temperature=temperature,
|
945
1024
|
stream=False, # Force non-streaming
|
946
1025
|
response_format={"type": "json_object"},
|
1026
|
+
output_json=output_json,
|
1027
|
+
output_pydantic=output_pydantic,
|
947
1028
|
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
|
948
1029
|
)
|
949
1030
|
)
|
@@ -979,6 +1060,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
979
1060
|
temperature=temperature,
|
980
1061
|
stream=stream,
|
981
1062
|
response_format={"type": "json_object"},
|
1063
|
+
output_json=output_json,
|
1064
|
+
output_pydantic=output_pydantic,
|
982
1065
|
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
|
983
1066
|
)
|
984
1067
|
):
|
@@ -994,6 +1077,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
994
1077
|
temperature=temperature,
|
995
1078
|
stream=stream,
|
996
1079
|
response_format={"type": "json_object"},
|
1080
|
+
output_json=output_json,
|
1081
|
+
output_pydantic=output_pydantic,
|
997
1082
|
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
|
998
1083
|
)
|
999
1084
|
):
|
@@ -1011,15 +1096,17 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1011
1096
|
)
|
1012
1097
|
|
1013
1098
|
if satisfactory and reflection_count >= min_reflect - 1:
|
1014
|
-
if verbose:
|
1099
|
+
if verbose and not interaction_displayed:
|
1015
1100
|
display_interaction(prompt, response_text, markdown=markdown,
|
1016
1101
|
generation_time=time.time() - start_time, console=console)
|
1102
|
+
interaction_displayed = True
|
1017
1103
|
return response_text
|
1018
1104
|
|
1019
1105
|
if reflection_count >= max_reflect - 1:
|
1020
|
-
if verbose:
|
1106
|
+
if verbose and not interaction_displayed:
|
1021
1107
|
display_interaction(prompt, response_text, markdown=markdown,
|
1022
1108
|
generation_time=time.time() - start_time, console=console)
|
1109
|
+
interaction_displayed = True
|
1023
1110
|
return response_text
|
1024
1111
|
|
1025
1112
|
reflection_count += 1
|
@@ -1039,6 +1126,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1039
1126
|
messages=messages,
|
1040
1127
|
temperature=temperature,
|
1041
1128
|
stream=True,
|
1129
|
+
output_json=output_json,
|
1130
|
+
output_pydantic=output_pydantic,
|
1042
1131
|
**kwargs
|
1043
1132
|
)
|
1044
1133
|
):
|
@@ -1053,21 +1142,24 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1053
1142
|
messages=messages,
|
1054
1143
|
temperature=temperature,
|
1055
1144
|
stream=True,
|
1145
|
+
output_json=output_json,
|
1146
|
+
output_pydantic=output_pydantic,
|
1056
1147
|
**kwargs
|
1057
1148
|
)
|
1058
1149
|
):
|
1059
1150
|
if chunk and chunk.choices and chunk.choices[0].delta.content:
|
1060
1151
|
response_text += chunk.choices[0].delta.content
|
1061
1152
|
|
1062
|
-
response_text = response_text.strip() if response_text else ""
|
1153
|
+
response_text = response_text.strip() if response_text else ""
|
1063
1154
|
continue
|
1064
1155
|
|
1065
1156
|
except json.JSONDecodeError:
|
1066
1157
|
reflection_count += 1
|
1067
1158
|
if reflection_count >= max_reflect:
|
1068
|
-
if verbose:
|
1159
|
+
if verbose and not interaction_displayed:
|
1069
1160
|
display_interaction(prompt, response_text, markdown=markdown,
|
1070
1161
|
generation_time=time.time() - start_time, console=console)
|
1162
|
+
interaction_displayed = True
|
1071
1163
|
return response_text
|
1072
1164
|
continue
|
1073
1165
|
except Exception as e:
|
@@ -1075,9 +1167,10 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1075
1167
|
return None
|
1076
1168
|
|
1077
1169
|
# If we've exhausted reflection attempts
|
1078
|
-
if verbose:
|
1170
|
+
if verbose and not interaction_displayed:
|
1079
1171
|
display_interaction(prompt, response_text, markdown=markdown,
|
1080
1172
|
generation_time=time.time() - start_time, console=console)
|
1173
|
+
interaction_displayed = True
|
1081
1174
|
return response_text
|
1082
1175
|
|
1083
1176
|
except Exception as error:
|
@@ -1089,6 +1182,12 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1089
1182
|
total_time = time.time() - start_time
|
1090
1183
|
logging.debug(f"get_response completed in {total_time:.2f} seconds")
|
1091
1184
|
|
1185
|
+
def _is_gemini_model(self) -> bool:
|
1186
|
+
"""Check if the model is a Gemini model."""
|
1187
|
+
if not self.model:
|
1188
|
+
return False
|
1189
|
+
return any(prefix in self.model.lower() for prefix in ['gemini', 'gemini/', 'google/gemini'])
|
1190
|
+
|
1092
1191
|
async def get_response_async(
|
1093
1192
|
self,
|
1094
1193
|
prompt: Union[str, List[Dict]],
|
@@ -1177,6 +1276,7 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1177
1276
|
|
1178
1277
|
start_time = time.time()
|
1179
1278
|
reflection_count = 0
|
1279
|
+
interaction_displayed = False # Track if interaction has been displayed
|
1180
1280
|
|
1181
1281
|
# Format tools for LiteLLM using the shared helper
|
1182
1282
|
formatted_tools = self._format_tools_for_litellm(tools)
|
@@ -1197,15 +1297,17 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1197
1297
|
resp = await litellm.acompletion(
|
1198
1298
|
**self._build_completion_params(
|
1199
1299
|
messages=messages,
|
1200
|
-
|
1201
|
-
|
1202
|
-
|
1203
|
-
|
1300
|
+
temperature=temperature,
|
1301
|
+
stream=False, # force non-streaming
|
1302
|
+
output_json=output_json,
|
1303
|
+
output_pydantic=output_pydantic,
|
1304
|
+
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
|
1305
|
+
)
|
1204
1306
|
)
|
1205
1307
|
reasoning_content = resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
|
1206
1308
|
response_text = resp["choices"][0]["message"]["content"]
|
1207
1309
|
|
1208
|
-
if verbose and reasoning_content:
|
1310
|
+
if verbose and reasoning_content and not interaction_displayed:
|
1209
1311
|
display_interaction(
|
1210
1312
|
"Initial reasoning:",
|
1211
1313
|
f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
|
@@ -1213,7 +1315,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1213
1315
|
generation_time=time.time() - start_time,
|
1214
1316
|
console=console
|
1215
1317
|
)
|
1216
|
-
|
1318
|
+
interaction_displayed = True
|
1319
|
+
elif verbose and not interaction_displayed:
|
1217
1320
|
display_interaction(
|
1218
1321
|
"Initial response:",
|
1219
1322
|
response_text,
|
@@ -1221,6 +1324,7 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1221
1324
|
generation_time=time.time() - start_time,
|
1222
1325
|
console=console
|
1223
1326
|
)
|
1327
|
+
interaction_displayed = True
|
1224
1328
|
else:
|
1225
1329
|
# Determine if we should use streaming based on tool support
|
1226
1330
|
use_streaming = stream
|
@@ -1239,6 +1343,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1239
1343
|
temperature=temperature,
|
1240
1344
|
stream=True,
|
1241
1345
|
tools=formatted_tools,
|
1346
|
+
output_json=output_json,
|
1347
|
+
output_pydantic=output_pydantic,
|
1242
1348
|
**kwargs
|
1243
1349
|
)
|
1244
1350
|
):
|
@@ -1259,6 +1365,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1259
1365
|
temperature=temperature,
|
1260
1366
|
stream=True,
|
1261
1367
|
tools=formatted_tools,
|
1368
|
+
output_json=output_json,
|
1369
|
+
output_pydantic=output_pydantic,
|
1262
1370
|
**kwargs
|
1263
1371
|
)
|
1264
1372
|
):
|
@@ -1271,7 +1379,7 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1271
1379
|
if formatted_tools and self._supports_streaming_tools():
|
1272
1380
|
tool_calls = self._process_tool_calls_from_stream(delta, tool_calls)
|
1273
1381
|
|
1274
|
-
response_text = response_text.strip() if response_text else ""
|
1382
|
+
response_text = response_text.strip() if response_text else ""
|
1275
1383
|
|
1276
1384
|
# We already have tool_calls from streaming if supported
|
1277
1385
|
# No need for a second API call!
|
@@ -1283,13 +1391,15 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1283
1391
|
temperature=temperature,
|
1284
1392
|
stream=False,
|
1285
1393
|
tools=formatted_tools,
|
1394
|
+
output_json=output_json,
|
1395
|
+
output_pydantic=output_pydantic,
|
1286
1396
|
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
|
1287
1397
|
)
|
1288
1398
|
)
|
1289
1399
|
response_text = tool_response.choices[0].message.get("content", "")
|
1290
1400
|
tool_calls = tool_response.choices[0].message.get("tool_calls", [])
|
1291
1401
|
|
1292
|
-
if verbose:
|
1402
|
+
if verbose and not interaction_displayed:
|
1293
1403
|
# Display the complete response at once
|
1294
1404
|
display_interaction(
|
1295
1405
|
original_prompt,
|
@@ -1298,16 +1408,35 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1298
1408
|
generation_time=time.time() - start_time,
|
1299
1409
|
console=console
|
1300
1410
|
)
|
1411
|
+
interaction_displayed = True
|
1301
1412
|
|
1413
|
+
# For Ollama, if response is empty but we have tools, prompt for tool usage
|
1414
|
+
if self._is_ollama_provider() and (not response_text or response_text.strip() == "") and formatted_tools and iteration_count == 0:
|
1415
|
+
messages.append({
|
1416
|
+
"role": "user",
|
1417
|
+
"content": self.OLLAMA_TOOL_USAGE_PROMPT
|
1418
|
+
})
|
1419
|
+
iteration_count += 1
|
1420
|
+
continue
|
1421
|
+
|
1302
1422
|
# Now handle tools if we have them (either from streaming or non-streaming)
|
1303
1423
|
if tools and execute_tool_fn and tool_calls:
|
1304
1424
|
# Convert tool_calls to a serializable format for all providers
|
1305
1425
|
serializable_tool_calls = self._serialize_tool_calls(tool_calls)
|
1306
|
-
|
1307
|
-
|
1308
|
-
|
1309
|
-
|
1310
|
-
|
1426
|
+
# Check if it's Ollama provider
|
1427
|
+
if self._is_ollama_provider():
|
1428
|
+
# For Ollama, only include role and content
|
1429
|
+
messages.append({
|
1430
|
+
"role": "assistant",
|
1431
|
+
"content": response_text
|
1432
|
+
})
|
1433
|
+
else:
|
1434
|
+
# For other providers, include tool_calls
|
1435
|
+
messages.append({
|
1436
|
+
"role": "assistant",
|
1437
|
+
"content": response_text,
|
1438
|
+
"tool_calls": serializable_tool_calls
|
1439
|
+
})
|
1311
1440
|
|
1312
1441
|
tool_results = [] # Store all tool results
|
1313
1442
|
for tool_call in tool_calls:
|
@@ -1325,12 +1454,26 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1325
1454
|
else:
|
1326
1455
|
display_message += "Function returned no output"
|
1327
1456
|
display_tool_call(display_message, console=console)
|
1457
|
+
# Check if it's Ollama provider
|
1458
|
+
if self._is_ollama_provider():
|
1459
|
+
# For Ollama, use user role and format as natural language
|
1460
|
+
messages.append(self._format_ollama_tool_result_message(function_name, tool_result))
|
1461
|
+
else:
|
1462
|
+
# For other providers, use tool role with tool_call_id
|
1463
|
+
messages.append({
|
1464
|
+
"role": "tool",
|
1465
|
+
"tool_call_id": tool_call_id,
|
1466
|
+
"content": json.dumps(tool_result) if tool_result is not None else "Function returned an empty output"
|
1467
|
+
})
|
1468
|
+
|
1469
|
+
# For Ollama, add explicit prompt if we need a final answer
|
1470
|
+
if self._is_ollama_provider() and iteration_count > 0:
|
1471
|
+
# Add an explicit prompt for Ollama to generate the final answer
|
1328
1472
|
messages.append({
|
1329
|
-
"role": "
|
1330
|
-
"
|
1331
|
-
"content": json.dumps(tool_result) if tool_result is not None else "Function returned an empty output"
|
1473
|
+
"role": "user",
|
1474
|
+
"content": self.OLLAMA_FINAL_ANSWER_PROMPT
|
1332
1475
|
})
|
1333
|
-
|
1476
|
+
|
1334
1477
|
# Get response after tool calls
|
1335
1478
|
response_text = ""
|
1336
1479
|
|
@@ -1343,13 +1486,15 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1343
1486
|
temperature=temperature,
|
1344
1487
|
stream=False, # force non-streaming
|
1345
1488
|
tools=formatted_tools, # Include tools
|
1489
|
+
output_json=output_json,
|
1490
|
+
output_pydantic=output_pydantic,
|
1346
1491
|
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
|
1347
1492
|
)
|
1348
1493
|
)
|
1349
1494
|
reasoning_content = resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
|
1350
1495
|
response_text = resp["choices"][0]["message"]["content"]
|
1351
1496
|
|
1352
|
-
if verbose and reasoning_content:
|
1497
|
+
if verbose and reasoning_content and not interaction_displayed:
|
1353
1498
|
display_interaction(
|
1354
1499
|
"Tool response reasoning:",
|
1355
1500
|
f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
|
@@ -1357,7 +1502,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1357
1502
|
generation_time=time.time() - start_time,
|
1358
1503
|
console=console
|
1359
1504
|
)
|
1360
|
-
|
1505
|
+
interaction_displayed = True
|
1506
|
+
elif verbose and not interaction_displayed:
|
1361
1507
|
display_interaction(
|
1362
1508
|
"Tool response:",
|
1363
1509
|
response_text,
|
@@ -1365,6 +1511,7 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1365
1511
|
generation_time=time.time() - start_time,
|
1366
1512
|
console=console
|
1367
1513
|
)
|
1514
|
+
interaction_displayed = True
|
1368
1515
|
else:
|
1369
1516
|
# Get response after tool calls with streaming if not already handled
|
1370
1517
|
if verbose:
|
@@ -1374,6 +1521,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1374
1521
|
temperature=temperature,
|
1375
1522
|
stream=stream,
|
1376
1523
|
tools=formatted_tools,
|
1524
|
+
output_json=output_json,
|
1525
|
+
output_pydantic=output_pydantic,
|
1377
1526
|
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
|
1378
1527
|
)
|
1379
1528
|
):
|
@@ -1389,13 +1538,15 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1389
1538
|
messages=messages,
|
1390
1539
|
temperature=temperature,
|
1391
1540
|
stream=stream,
|
1541
|
+
output_json=output_json,
|
1542
|
+
output_pydantic=output_pydantic,
|
1392
1543
|
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
|
1393
1544
|
)
|
1394
1545
|
):
|
1395
1546
|
if chunk and chunk.choices and chunk.choices[0].delta.content:
|
1396
1547
|
response_text += chunk.choices[0].delta.content
|
1397
1548
|
|
1398
|
-
response_text = response_text.strip() if response_text else ""
|
1549
|
+
response_text = response_text.strip() if response_text else ""
|
1399
1550
|
|
1400
1551
|
# After tool execution, update messages and continue the loop
|
1401
1552
|
if response_text:
|
@@ -1422,9 +1573,10 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1422
1573
|
if output_json or output_pydantic:
|
1423
1574
|
self.chat_history.append({"role": "user", "content": original_prompt})
|
1424
1575
|
self.chat_history.append({"role": "assistant", "content": response_text})
|
1425
|
-
if verbose:
|
1576
|
+
if verbose and not interaction_displayed:
|
1426
1577
|
display_interaction(original_prompt, response_text, markdown=markdown,
|
1427
1578
|
generation_time=time.time() - start_time, console=console)
|
1579
|
+
interaction_displayed = True
|
1428
1580
|
return response_text
|
1429
1581
|
|
1430
1582
|
if not self_reflect:
|
@@ -1432,7 +1584,7 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1432
1584
|
display_text = final_response_text if final_response_text else response_text
|
1433
1585
|
|
1434
1586
|
# Display with stored reasoning content if available
|
1435
|
-
if verbose:
|
1587
|
+
if verbose and not interaction_displayed:
|
1436
1588
|
if stored_reasoning_content:
|
1437
1589
|
display_interaction(
|
1438
1590
|
original_prompt,
|
@@ -1444,6 +1596,7 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1444
1596
|
else:
|
1445
1597
|
display_interaction(original_prompt, display_text, markdown=markdown,
|
1446
1598
|
generation_time=time.time() - start_time, console=console)
|
1599
|
+
interaction_displayed = True
|
1447
1600
|
|
1448
1601
|
# Return reasoning content if reasoning_steps is True and we have it
|
1449
1602
|
if reasoning_steps and stored_reasoning_content:
|
@@ -1471,6 +1624,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1471
1624
|
temperature=temperature,
|
1472
1625
|
stream=False, # Force non-streaming
|
1473
1626
|
response_format={"type": "json_object"},
|
1627
|
+
output_json=output_json,
|
1628
|
+
output_pydantic=output_pydantic,
|
1474
1629
|
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
|
1475
1630
|
)
|
1476
1631
|
)
|
@@ -1506,6 +1661,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1506
1661
|
temperature=temperature,
|
1507
1662
|
stream=stream,
|
1508
1663
|
response_format={"type": "json_object"},
|
1664
|
+
output_json=output_json,
|
1665
|
+
output_pydantic=output_pydantic,
|
1509
1666
|
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
|
1510
1667
|
)
|
1511
1668
|
):
|
@@ -1521,6 +1678,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1521
1678
|
temperature=temperature,
|
1522
1679
|
stream=stream,
|
1523
1680
|
response_format={"type": "json_object"},
|
1681
|
+
output_json=output_json,
|
1682
|
+
output_pydantic=output_pydantic,
|
1524
1683
|
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
|
1525
1684
|
)
|
1526
1685
|
):
|
@@ -1539,15 +1698,17 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1539
1698
|
)
|
1540
1699
|
|
1541
1700
|
if satisfactory and reflection_count >= min_reflect - 1:
|
1542
|
-
if verbose:
|
1701
|
+
if verbose and not interaction_displayed:
|
1543
1702
|
display_interaction(prompt, response_text, markdown=markdown,
|
1544
1703
|
generation_time=time.time() - start_time, console=console)
|
1704
|
+
interaction_displayed = True
|
1545
1705
|
return response_text
|
1546
1706
|
|
1547
1707
|
if reflection_count >= max_reflect - 1:
|
1548
|
-
if verbose:
|
1708
|
+
if verbose and not interaction_displayed:
|
1549
1709
|
display_interaction(prompt, response_text, markdown=markdown,
|
1550
1710
|
generation_time=time.time() - start_time, console=console)
|
1711
|
+
interaction_displayed = True
|
1551
1712
|
return response_text
|
1552
1713
|
|
1553
1714
|
reflection_count += 1
|
@@ -1678,11 +1839,33 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1678
1839
|
# Override with any provided parameters
|
1679
1840
|
params.update(override_params)
|
1680
1841
|
|
1842
|
+
# Handle structured output parameters
|
1843
|
+
output_json = override_params.get('output_json')
|
1844
|
+
output_pydantic = override_params.get('output_pydantic')
|
1845
|
+
|
1846
|
+
if output_json or output_pydantic:
|
1847
|
+
# Always remove these from params as they're not native litellm parameters
|
1848
|
+
params.pop('output_json', None)
|
1849
|
+
params.pop('output_pydantic', None)
|
1850
|
+
|
1851
|
+
# Check if this is a Gemini model that supports native structured outputs
|
1852
|
+
if self._is_gemini_model():
|
1853
|
+
from .model_capabilities import supports_structured_outputs
|
1854
|
+
schema_model = output_json or output_pydantic
|
1855
|
+
|
1856
|
+
if schema_model and hasattr(schema_model, 'model_json_schema') and supports_structured_outputs(self.model):
|
1857
|
+
schema = schema_model.model_json_schema()
|
1858
|
+
|
1859
|
+
# Gemini uses response_mime_type and response_schema
|
1860
|
+
params['response_mime_type'] = 'application/json'
|
1861
|
+
params['response_schema'] = schema
|
1862
|
+
|
1863
|
+
logging.debug(f"Using Gemini native structured output with schema: {json.dumps(schema, indent=2)}")
|
1864
|
+
|
1681
1865
|
# Add tool_choice="auto" when tools are provided (unless already specified)
|
1682
1866
|
if 'tools' in params and params['tools'] and 'tool_choice' not in params:
|
1683
1867
|
# For Gemini models, use tool_choice to encourage tool usage
|
1684
|
-
|
1685
|
-
if any(prefix in self.model.lower() for prefix in ['gemini', 'gemini/', 'google/gemini']):
|
1868
|
+
if self._is_gemini_model():
|
1686
1869
|
try:
|
1687
1870
|
import litellm
|
1688
1871
|
# Check if model supports function calling before setting tool_choice
|