praisonaiagents 0.0.127__py3-none-any.whl → 0.0.129__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- praisonaiagents/agent/__init__.py +2 -1
- praisonaiagents/agent/router_agent.py +334 -0
- praisonaiagents/agents/agents.py +15 -17
- praisonaiagents/agents/autoagents.py +1 -1
- praisonaiagents/llm/__init__.py +11 -1
- praisonaiagents/llm/llm.py +240 -274
- praisonaiagents/llm/model_capabilities.py +20 -3
- praisonaiagents/llm/model_router.py +348 -0
- praisonaiagents/process/process.py +71 -61
- praisonaiagents/task/task.py +17 -4
- {praisonaiagents-0.0.127.dist-info → praisonaiagents-0.0.129.dist-info}/METADATA +1 -1
- {praisonaiagents-0.0.127.dist-info → praisonaiagents-0.0.129.dist-info}/RECORD +14 -12
- {praisonaiagents-0.0.127.dist-info → praisonaiagents-0.0.129.dist-info}/WHEEL +0 -0
- {praisonaiagents-0.0.127.dist-info → praisonaiagents-0.0.129.dist-info}/top_level.txt +0 -0
praisonaiagents/llm/llm.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
import logging
|
2
2
|
import os
|
3
3
|
import warnings
|
4
|
+
import re
|
4
5
|
from typing import Any, Dict, List, Optional, Union, Literal, Callable
|
5
6
|
from pydantic import BaseModel
|
6
7
|
import time
|
@@ -87,6 +88,10 @@ class LLM:
|
|
87
88
|
"llama-3.2-90b-text-preview": 6144 # 8,192 actual
|
88
89
|
}
|
89
90
|
|
91
|
+
# Ollama-specific prompt constants
|
92
|
+
OLLAMA_TOOL_USAGE_PROMPT = "Please analyze the request and use the available tools to help answer the question. Start by identifying what information you need."
|
93
|
+
OLLAMA_FINAL_ANSWER_PROMPT = "Based on the tool results above, please provide the final answer to the original question."
|
94
|
+
|
90
95
|
def _log_llm_config(self, method_name: str, **config):
|
91
96
|
"""Centralized debug logging for LLM configuration and parameters.
|
92
97
|
|
@@ -277,15 +282,32 @@ class LLM:
|
|
277
282
|
# Direct ollama/ prefix
|
278
283
|
if self.model.startswith("ollama/"):
|
279
284
|
return True
|
285
|
+
|
286
|
+
# Check base_url if provided
|
287
|
+
if self.base_url and "ollama" in self.base_url.lower():
|
288
|
+
return True
|
280
289
|
|
281
290
|
# Check environment variables for Ollama base URL
|
282
291
|
base_url = os.getenv("OPENAI_BASE_URL", "")
|
283
292
|
api_base = os.getenv("OPENAI_API_BASE", "")
|
284
293
|
|
285
|
-
# Common Ollama endpoints
|
286
|
-
|
294
|
+
# Common Ollama endpoints (including custom ports)
|
295
|
+
if any(url and ("ollama" in url.lower() or ":11434" in url)
|
296
|
+
for url in [base_url, api_base, self.base_url or ""]):
|
297
|
+
return True
|
287
298
|
|
288
|
-
return
|
299
|
+
return False
|
300
|
+
|
301
|
+
def _format_ollama_tool_result_message(self, function_name: str, tool_result: Any) -> Dict[str, str]:
|
302
|
+
"""
|
303
|
+
Format tool result message for Ollama provider.
|
304
|
+
Simplified approach without hardcoded regex extraction.
|
305
|
+
"""
|
306
|
+
tool_result_str = str(tool_result)
|
307
|
+
return {
|
308
|
+
"role": "user",
|
309
|
+
"content": f"The {function_name} function returned: {tool_result_str}"
|
310
|
+
}
|
289
311
|
|
290
312
|
def _process_stream_delta(self, delta, response_text: str, tool_calls: List[Dict], formatted_tools: Optional[List] = None) -> tuple:
|
291
313
|
"""
|
@@ -422,13 +444,22 @@ class LLM:
|
|
422
444
|
"""
|
423
445
|
messages = []
|
424
446
|
|
447
|
+
# Check if this is a Gemini model that supports native structured outputs
|
448
|
+
is_gemini_with_structured_output = False
|
449
|
+
if output_json or output_pydantic:
|
450
|
+
from .model_capabilities import supports_structured_outputs
|
451
|
+
is_gemini_with_structured_output = (
|
452
|
+
self._is_gemini_model() and
|
453
|
+
supports_structured_outputs(self.model)
|
454
|
+
)
|
455
|
+
|
425
456
|
# Handle system prompt
|
426
457
|
if system_prompt:
|
427
|
-
#
|
428
|
-
if output_json:
|
429
|
-
|
430
|
-
|
431
|
-
|
458
|
+
# Only append JSON schema for non-Gemini models or Gemini models without structured output support
|
459
|
+
if (output_json or output_pydantic) and not is_gemini_with_structured_output:
|
460
|
+
schema_model = output_json or output_pydantic
|
461
|
+
if schema_model and hasattr(schema_model, 'model_json_schema'):
|
462
|
+
system_prompt += f"\nReturn ONLY a JSON object that matches this Pydantic model: {json.dumps(schema_model.model_json_schema())}"
|
432
463
|
|
433
464
|
# Skip system messages for legacy o1 models as they don't support them
|
434
465
|
if not self._needs_system_message_skip():
|
@@ -440,7 +471,8 @@ class LLM:
|
|
440
471
|
|
441
472
|
# Handle prompt modifications for JSON output
|
442
473
|
original_prompt = prompt
|
443
|
-
if output_json or output_pydantic:
|
474
|
+
if (output_json or output_pydantic) and not is_gemini_with_structured_output:
|
475
|
+
# Only modify prompt for non-Gemini models
|
444
476
|
if isinstance(prompt, str):
|
445
477
|
prompt = prompt + "\nReturn ONLY a valid JSON object. No other text or explanation."
|
446
478
|
elif isinstance(prompt, list):
|
@@ -660,6 +692,7 @@ class LLM:
|
|
660
692
|
|
661
693
|
start_time = time.time()
|
662
694
|
reflection_count = 0
|
695
|
+
interaction_displayed = False # Track if interaction has been displayed
|
663
696
|
|
664
697
|
# Display initial instruction once
|
665
698
|
if verbose:
|
@@ -695,6 +728,8 @@ class LLM:
|
|
695
728
|
temperature=temperature,
|
696
729
|
stream=False, # force non-streaming
|
697
730
|
tools=formatted_tools,
|
731
|
+
output_json=output_json,
|
732
|
+
output_pydantic=output_pydantic,
|
698
733
|
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
|
699
734
|
)
|
700
735
|
)
|
@@ -703,7 +738,7 @@ class LLM:
|
|
703
738
|
final_response = resp
|
704
739
|
|
705
740
|
# Optionally display reasoning if present
|
706
|
-
if verbose and reasoning_content:
|
741
|
+
if verbose and reasoning_content and not interaction_displayed:
|
707
742
|
display_interaction(
|
708
743
|
original_prompt,
|
709
744
|
f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
|
@@ -711,7 +746,8 @@ class LLM:
|
|
711
746
|
generation_time=time.time() - current_time,
|
712
747
|
console=console
|
713
748
|
)
|
714
|
-
|
749
|
+
interaction_displayed = True
|
750
|
+
elif verbose and not interaction_displayed:
|
715
751
|
display_interaction(
|
716
752
|
original_prompt,
|
717
753
|
response_text,
|
@@ -719,6 +755,7 @@ class LLM:
|
|
719
755
|
generation_time=time.time() - current_time,
|
720
756
|
console=console
|
721
757
|
)
|
758
|
+
interaction_displayed = True
|
722
759
|
|
723
760
|
# Otherwise do the existing streaming approach
|
724
761
|
else:
|
@@ -741,6 +778,8 @@ class LLM:
|
|
741
778
|
tools=formatted_tools,
|
742
779
|
temperature=temperature,
|
743
780
|
stream=True,
|
781
|
+
output_json=output_json,
|
782
|
+
output_pydantic=output_pydantic,
|
744
783
|
**kwargs
|
745
784
|
)
|
746
785
|
):
|
@@ -760,6 +799,8 @@ class LLM:
|
|
760
799
|
tools=formatted_tools,
|
761
800
|
temperature=temperature,
|
762
801
|
stream=True,
|
802
|
+
output_json=output_json,
|
803
|
+
output_pydantic=output_pydantic,
|
763
804
|
**kwargs
|
764
805
|
)
|
765
806
|
):
|
@@ -772,7 +813,7 @@ class LLM:
|
|
772
813
|
if formatted_tools and self._supports_streaming_tools():
|
773
814
|
tool_calls = self._process_tool_calls_from_stream(delta, tool_calls)
|
774
815
|
|
775
|
-
response_text = response_text.strip() if response_text else ""
|
816
|
+
response_text = response_text.strip() if response_text else ""
|
776
817
|
|
777
818
|
# Create a mock final_response with the captured data
|
778
819
|
final_response = {
|
@@ -791,12 +832,14 @@ class LLM:
|
|
791
832
|
tools=formatted_tools,
|
792
833
|
temperature=temperature,
|
793
834
|
stream=False,
|
835
|
+
output_json=output_json,
|
836
|
+
output_pydantic=output_pydantic,
|
794
837
|
**kwargs
|
795
838
|
)
|
796
839
|
)
|
797
840
|
response_text = final_response["choices"][0]["message"]["content"]
|
798
841
|
|
799
|
-
if verbose:
|
842
|
+
if verbose and not interaction_displayed:
|
800
843
|
# Display the complete response at once
|
801
844
|
display_interaction(
|
802
845
|
original_prompt,
|
@@ -805,18 +848,37 @@ class LLM:
|
|
805
848
|
generation_time=time.time() - current_time,
|
806
849
|
console=console
|
807
850
|
)
|
851
|
+
interaction_displayed = True
|
808
852
|
|
809
853
|
tool_calls = final_response["choices"][0]["message"].get("tool_calls")
|
810
854
|
|
855
|
+
# For Ollama, if response is empty but we have tools, prompt for tool usage
|
856
|
+
if self._is_ollama_provider() and (not response_text or response_text.strip() == "") and formatted_tools and iteration_count == 0:
|
857
|
+
messages.append({
|
858
|
+
"role": "user",
|
859
|
+
"content": self.OLLAMA_TOOL_USAGE_PROMPT
|
860
|
+
})
|
861
|
+
iteration_count += 1
|
862
|
+
continue
|
863
|
+
|
811
864
|
# Handle tool calls - Sequential tool calling logic
|
812
865
|
if tool_calls and execute_tool_fn:
|
813
866
|
# Convert tool_calls to a serializable format for all providers
|
814
867
|
serializable_tool_calls = self._serialize_tool_calls(tool_calls)
|
815
|
-
|
816
|
-
|
817
|
-
|
818
|
-
|
819
|
-
|
868
|
+
# Check if this is Ollama provider
|
869
|
+
if self._is_ollama_provider():
|
870
|
+
# For Ollama, only include role and content
|
871
|
+
messages.append({
|
872
|
+
"role": "assistant",
|
873
|
+
"content": response_text
|
874
|
+
})
|
875
|
+
else:
|
876
|
+
# For other providers, include tool_calls
|
877
|
+
messages.append({
|
878
|
+
"role": "assistant",
|
879
|
+
"content": response_text,
|
880
|
+
"tool_calls": serializable_tool_calls
|
881
|
+
})
|
820
882
|
|
821
883
|
should_continue = False
|
822
884
|
tool_results = [] # Store all tool results
|
@@ -842,11 +904,17 @@ class LLM:
|
|
842
904
|
logging.debug(f"[TOOL_EXEC_DEBUG] About to display tool call with message: {display_message}")
|
843
905
|
display_tool_call(display_message, console=console)
|
844
906
|
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
|
849
|
-
|
907
|
+
# Check if this is Ollama provider
|
908
|
+
if self._is_ollama_provider():
|
909
|
+
# For Ollama, use user role and format as natural language
|
910
|
+
messages.append(self._format_ollama_tool_result_message(function_name, tool_result))
|
911
|
+
else:
|
912
|
+
# For other providers, use tool role with tool_call_id
|
913
|
+
messages.append({
|
914
|
+
"role": "tool",
|
915
|
+
"tool_call_id": tool_call_id,
|
916
|
+
"content": json.dumps(tool_result) if tool_result is not None else "Function returned an empty output"
|
917
|
+
})
|
850
918
|
|
851
919
|
# Check if we should continue (for tools like sequential thinking)
|
852
920
|
# This mimics the logic from agent.py lines 1004-1007
|
@@ -858,100 +926,12 @@ class LLM:
|
|
858
926
|
iteration_count += 1
|
859
927
|
continue
|
860
928
|
|
861
|
-
#
|
862
|
-
|
863
|
-
|
864
|
-
|
865
|
-
if ollama_params:
|
866
|
-
# Get response based on streaming mode
|
867
|
-
if stream:
|
868
|
-
# Streaming approach
|
869
|
-
if verbose:
|
870
|
-
with Live(display_generating("", start_time), console=console, refresh_per_second=4) as live:
|
871
|
-
response_text = ""
|
872
|
-
for chunk in litellm.completion(
|
873
|
-
**self._build_completion_params(
|
874
|
-
messages=ollama_params["follow_up_messages"],
|
875
|
-
temperature=temperature,
|
876
|
-
stream=True
|
877
|
-
)
|
878
|
-
):
|
879
|
-
if chunk and chunk.choices and chunk.choices[0].delta.content:
|
880
|
-
content = chunk.choices[0].delta.content
|
881
|
-
response_text += content
|
882
|
-
live.update(display_generating(response_text, start_time))
|
883
|
-
else:
|
884
|
-
response_text = ""
|
885
|
-
for chunk in litellm.completion(
|
886
|
-
**self._build_completion_params(
|
887
|
-
messages=ollama_params["follow_up_messages"],
|
888
|
-
temperature=temperature,
|
889
|
-
stream=True
|
890
|
-
)
|
891
|
-
):
|
892
|
-
if chunk and chunk.choices and chunk.choices[0].delta.content:
|
893
|
-
response_text += chunk.choices[0].delta.content
|
894
|
-
else:
|
895
|
-
# Non-streaming approach
|
896
|
-
resp = litellm.completion(
|
897
|
-
**self._build_completion_params(
|
898
|
-
messages=ollama_params["follow_up_messages"],
|
899
|
-
temperature=temperature,
|
900
|
-
stream=False
|
901
|
-
)
|
902
|
-
)
|
903
|
-
response_text = resp.get("choices", [{}])[0].get("message", {}).get("content", "") or ""
|
904
|
-
|
905
|
-
# Set flag to indicate Ollama was handled
|
906
|
-
ollama_handled = True
|
907
|
-
final_response_text = response_text.strip() if response_text else ""
|
908
|
-
logging.debug(f"[OLLAMA_DEBUG] Ollama follow-up response: {final_response_text[:200]}...")
|
909
|
-
|
910
|
-
# Display the response if we got one
|
911
|
-
if final_response_text and verbose:
|
912
|
-
display_interaction(
|
913
|
-
ollama_params["original_prompt"],
|
914
|
-
final_response_text,
|
915
|
-
markdown=markdown,
|
916
|
-
generation_time=time.time() - start_time,
|
917
|
-
console=console
|
918
|
-
)
|
919
|
-
|
920
|
-
# Update messages and continue the loop instead of returning
|
921
|
-
if final_response_text:
|
922
|
-
# Update messages with the response to maintain conversation context
|
923
|
-
messages.append({
|
924
|
-
"role": "assistant",
|
925
|
-
"content": final_response_text
|
926
|
-
})
|
927
|
-
# Continue the loop to check if more tools are needed
|
928
|
-
iteration_count += 1
|
929
|
-
continue
|
930
|
-
else:
|
931
|
-
logging.warning("[OLLAMA_DEBUG] Ollama follow-up returned empty response")
|
932
|
-
|
933
|
-
# Handle reasoning_steps after tool execution if not already handled by Ollama
|
934
|
-
if reasoning_steps and not ollama_handled:
|
935
|
-
# Make a non-streaming call to capture reasoning content
|
936
|
-
reasoning_resp = litellm.completion(
|
937
|
-
**self._build_completion_params(
|
938
|
-
messages=messages,
|
939
|
-
temperature=temperature,
|
940
|
-
stream=False, # force non-streaming
|
941
|
-
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
|
942
|
-
)
|
943
|
-
)
|
944
|
-
reasoning_content = reasoning_resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
|
945
|
-
response_text = reasoning_resp["choices"][0]["message"]["content"]
|
946
|
-
|
947
|
-
# Store reasoning content for later use
|
948
|
-
if reasoning_content:
|
949
|
-
stored_reasoning_content = reasoning_content
|
950
|
-
|
951
|
-
# Update messages with the response
|
929
|
+
# For Ollama, add explicit prompt if we need a final answer
|
930
|
+
if self._is_ollama_provider() and iteration_count > 0:
|
931
|
+
# Add an explicit prompt for Ollama to generate the final answer
|
952
932
|
messages.append({
|
953
|
-
"role": "
|
954
|
-
"content":
|
933
|
+
"role": "user",
|
934
|
+
"content": self.OLLAMA_FINAL_ANSWER_PROMPT
|
955
935
|
})
|
956
936
|
|
957
937
|
# After tool execution, continue the loop to check if more tools are needed
|
@@ -974,7 +954,7 @@ class LLM:
|
|
974
954
|
return final_response_text
|
975
955
|
|
976
956
|
# No tool calls were made in this iteration, return the response
|
977
|
-
if verbose:
|
957
|
+
if verbose and not interaction_displayed:
|
978
958
|
# If we have stored reasoning content from tool execution, display it
|
979
959
|
if stored_reasoning_content:
|
980
960
|
display_interaction(
|
@@ -992,6 +972,7 @@ class LLM:
|
|
992
972
|
generation_time=time.time() - start_time,
|
993
973
|
console=console
|
994
974
|
)
|
975
|
+
interaction_displayed = True
|
995
976
|
|
996
977
|
response_text = response_text.strip() if response_text else ""
|
997
978
|
|
@@ -1003,15 +984,17 @@ class LLM:
|
|
1003
984
|
if output_json or output_pydantic:
|
1004
985
|
self.chat_history.append({"role": "user", "content": original_prompt})
|
1005
986
|
self.chat_history.append({"role": "assistant", "content": response_text})
|
1006
|
-
if verbose:
|
987
|
+
if verbose and not interaction_displayed:
|
1007
988
|
display_interaction(original_prompt, response_text, markdown=markdown,
|
1008
989
|
generation_time=time.time() - start_time, console=console)
|
990
|
+
interaction_displayed = True
|
1009
991
|
return response_text
|
1010
992
|
|
1011
993
|
if not self_reflect:
|
1012
|
-
if verbose:
|
994
|
+
if verbose and not interaction_displayed:
|
1013
995
|
display_interaction(original_prompt, response_text, markdown=markdown,
|
1014
996
|
generation_time=time.time() - start_time, console=console)
|
997
|
+
interaction_displayed = True
|
1015
998
|
# Return reasoning content if reasoning_steps is True
|
1016
999
|
if reasoning_steps and stored_reasoning_content:
|
1017
1000
|
return stored_reasoning_content
|
@@ -1040,6 +1023,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1040
1023
|
temperature=temperature,
|
1041
1024
|
stream=False, # Force non-streaming
|
1042
1025
|
response_format={"type": "json_object"},
|
1026
|
+
output_json=output_json,
|
1027
|
+
output_pydantic=output_pydantic,
|
1043
1028
|
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
|
1044
1029
|
)
|
1045
1030
|
)
|
@@ -1075,6 +1060,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1075
1060
|
temperature=temperature,
|
1076
1061
|
stream=stream,
|
1077
1062
|
response_format={"type": "json_object"},
|
1063
|
+
output_json=output_json,
|
1064
|
+
output_pydantic=output_pydantic,
|
1078
1065
|
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
|
1079
1066
|
)
|
1080
1067
|
):
|
@@ -1090,6 +1077,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1090
1077
|
temperature=temperature,
|
1091
1078
|
stream=stream,
|
1092
1079
|
response_format={"type": "json_object"},
|
1080
|
+
output_json=output_json,
|
1081
|
+
output_pydantic=output_pydantic,
|
1093
1082
|
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
|
1094
1083
|
)
|
1095
1084
|
):
|
@@ -1107,15 +1096,17 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1107
1096
|
)
|
1108
1097
|
|
1109
1098
|
if satisfactory and reflection_count >= min_reflect - 1:
|
1110
|
-
if verbose:
|
1099
|
+
if verbose and not interaction_displayed:
|
1111
1100
|
display_interaction(prompt, response_text, markdown=markdown,
|
1112
1101
|
generation_time=time.time() - start_time, console=console)
|
1102
|
+
interaction_displayed = True
|
1113
1103
|
return response_text
|
1114
1104
|
|
1115
1105
|
if reflection_count >= max_reflect - 1:
|
1116
|
-
if verbose:
|
1106
|
+
if verbose and not interaction_displayed:
|
1117
1107
|
display_interaction(prompt, response_text, markdown=markdown,
|
1118
1108
|
generation_time=time.time() - start_time, console=console)
|
1109
|
+
interaction_displayed = True
|
1119
1110
|
return response_text
|
1120
1111
|
|
1121
1112
|
reflection_count += 1
|
@@ -1135,6 +1126,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1135
1126
|
messages=messages,
|
1136
1127
|
temperature=temperature,
|
1137
1128
|
stream=True,
|
1129
|
+
output_json=output_json,
|
1130
|
+
output_pydantic=output_pydantic,
|
1138
1131
|
**kwargs
|
1139
1132
|
)
|
1140
1133
|
):
|
@@ -1149,21 +1142,24 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1149
1142
|
messages=messages,
|
1150
1143
|
temperature=temperature,
|
1151
1144
|
stream=True,
|
1145
|
+
output_json=output_json,
|
1146
|
+
output_pydantic=output_pydantic,
|
1152
1147
|
**kwargs
|
1153
1148
|
)
|
1154
1149
|
):
|
1155
1150
|
if chunk and chunk.choices and chunk.choices[0].delta.content:
|
1156
1151
|
response_text += chunk.choices[0].delta.content
|
1157
1152
|
|
1158
|
-
response_text = response_text.strip() if response_text else ""
|
1153
|
+
response_text = response_text.strip() if response_text else ""
|
1159
1154
|
continue
|
1160
1155
|
|
1161
1156
|
except json.JSONDecodeError:
|
1162
1157
|
reflection_count += 1
|
1163
1158
|
if reflection_count >= max_reflect:
|
1164
|
-
if verbose:
|
1159
|
+
if verbose and not interaction_displayed:
|
1165
1160
|
display_interaction(prompt, response_text, markdown=markdown,
|
1166
1161
|
generation_time=time.time() - start_time, console=console)
|
1162
|
+
interaction_displayed = True
|
1167
1163
|
return response_text
|
1168
1164
|
continue
|
1169
1165
|
except Exception as e:
|
@@ -1171,9 +1167,10 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1171
1167
|
return None
|
1172
1168
|
|
1173
1169
|
# If we've exhausted reflection attempts
|
1174
|
-
if verbose:
|
1170
|
+
if verbose and not interaction_displayed:
|
1175
1171
|
display_interaction(prompt, response_text, markdown=markdown,
|
1176
1172
|
generation_time=time.time() - start_time, console=console)
|
1173
|
+
interaction_displayed = True
|
1177
1174
|
return response_text
|
1178
1175
|
|
1179
1176
|
except Exception as error:
|
@@ -1185,6 +1182,12 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1185
1182
|
total_time = time.time() - start_time
|
1186
1183
|
logging.debug(f"get_response completed in {total_time:.2f} seconds")
|
1187
1184
|
|
1185
|
+
def _is_gemini_model(self) -> bool:
|
1186
|
+
"""Check if the model is a Gemini model."""
|
1187
|
+
if not self.model:
|
1188
|
+
return False
|
1189
|
+
return any(prefix in self.model.lower() for prefix in ['gemini', 'gemini/', 'google/gemini'])
|
1190
|
+
|
1188
1191
|
async def get_response_async(
|
1189
1192
|
self,
|
1190
1193
|
prompt: Union[str, List[Dict]],
|
@@ -1273,6 +1276,7 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1273
1276
|
|
1274
1277
|
start_time = time.time()
|
1275
1278
|
reflection_count = 0
|
1279
|
+
interaction_displayed = False # Track if interaction has been displayed
|
1276
1280
|
|
1277
1281
|
# Format tools for LiteLLM using the shared helper
|
1278
1282
|
formatted_tools = self._format_tools_for_litellm(tools)
|
@@ -1293,15 +1297,17 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1293
1297
|
resp = await litellm.acompletion(
|
1294
1298
|
**self._build_completion_params(
|
1295
1299
|
messages=messages,
|
1296
|
-
|
1297
|
-
|
1298
|
-
|
1299
|
-
|
1300
|
+
temperature=temperature,
|
1301
|
+
stream=False, # force non-streaming
|
1302
|
+
output_json=output_json,
|
1303
|
+
output_pydantic=output_pydantic,
|
1304
|
+
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
|
1305
|
+
)
|
1300
1306
|
)
|
1301
1307
|
reasoning_content = resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
|
1302
1308
|
response_text = resp["choices"][0]["message"]["content"]
|
1303
1309
|
|
1304
|
-
if verbose and reasoning_content:
|
1310
|
+
if verbose and reasoning_content and not interaction_displayed:
|
1305
1311
|
display_interaction(
|
1306
1312
|
"Initial reasoning:",
|
1307
1313
|
f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
|
@@ -1309,7 +1315,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1309
1315
|
generation_time=time.time() - start_time,
|
1310
1316
|
console=console
|
1311
1317
|
)
|
1312
|
-
|
1318
|
+
interaction_displayed = True
|
1319
|
+
elif verbose and not interaction_displayed:
|
1313
1320
|
display_interaction(
|
1314
1321
|
"Initial response:",
|
1315
1322
|
response_text,
|
@@ -1317,6 +1324,7 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1317
1324
|
generation_time=time.time() - start_time,
|
1318
1325
|
console=console
|
1319
1326
|
)
|
1327
|
+
interaction_displayed = True
|
1320
1328
|
else:
|
1321
1329
|
# Determine if we should use streaming based on tool support
|
1322
1330
|
use_streaming = stream
|
@@ -1335,6 +1343,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1335
1343
|
temperature=temperature,
|
1336
1344
|
stream=True,
|
1337
1345
|
tools=formatted_tools,
|
1346
|
+
output_json=output_json,
|
1347
|
+
output_pydantic=output_pydantic,
|
1338
1348
|
**kwargs
|
1339
1349
|
)
|
1340
1350
|
):
|
@@ -1355,6 +1365,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1355
1365
|
temperature=temperature,
|
1356
1366
|
stream=True,
|
1357
1367
|
tools=formatted_tools,
|
1368
|
+
output_json=output_json,
|
1369
|
+
output_pydantic=output_pydantic,
|
1358
1370
|
**kwargs
|
1359
1371
|
)
|
1360
1372
|
):
|
@@ -1367,7 +1379,7 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1367
1379
|
if formatted_tools and self._supports_streaming_tools():
|
1368
1380
|
tool_calls = self._process_tool_calls_from_stream(delta, tool_calls)
|
1369
1381
|
|
1370
|
-
response_text = response_text.strip() if response_text else ""
|
1382
|
+
response_text = response_text.strip() if response_text else ""
|
1371
1383
|
|
1372
1384
|
# We already have tool_calls from streaming if supported
|
1373
1385
|
# No need for a second API call!
|
@@ -1379,13 +1391,15 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1379
1391
|
temperature=temperature,
|
1380
1392
|
stream=False,
|
1381
1393
|
tools=formatted_tools,
|
1394
|
+
output_json=output_json,
|
1395
|
+
output_pydantic=output_pydantic,
|
1382
1396
|
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
|
1383
1397
|
)
|
1384
1398
|
)
|
1385
1399
|
response_text = tool_response.choices[0].message.get("content", "")
|
1386
1400
|
tool_calls = tool_response.choices[0].message.get("tool_calls", [])
|
1387
1401
|
|
1388
|
-
if verbose:
|
1402
|
+
if verbose and not interaction_displayed:
|
1389
1403
|
# Display the complete response at once
|
1390
1404
|
display_interaction(
|
1391
1405
|
original_prompt,
|
@@ -1394,16 +1408,35 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1394
1408
|
generation_time=time.time() - start_time,
|
1395
1409
|
console=console
|
1396
1410
|
)
|
1411
|
+
interaction_displayed = True
|
1397
1412
|
|
1413
|
+
# For Ollama, if response is empty but we have tools, prompt for tool usage
|
1414
|
+
if self._is_ollama_provider() and (not response_text or response_text.strip() == "") and formatted_tools and iteration_count == 0:
|
1415
|
+
messages.append({
|
1416
|
+
"role": "user",
|
1417
|
+
"content": self.OLLAMA_TOOL_USAGE_PROMPT
|
1418
|
+
})
|
1419
|
+
iteration_count += 1
|
1420
|
+
continue
|
1421
|
+
|
1398
1422
|
# Now handle tools if we have them (either from streaming or non-streaming)
|
1399
1423
|
if tools and execute_tool_fn and tool_calls:
|
1400
1424
|
# Convert tool_calls to a serializable format for all providers
|
1401
1425
|
serializable_tool_calls = self._serialize_tool_calls(tool_calls)
|
1402
|
-
|
1403
|
-
|
1404
|
-
|
1405
|
-
|
1406
|
-
|
1426
|
+
# Check if it's Ollama provider
|
1427
|
+
if self._is_ollama_provider():
|
1428
|
+
# For Ollama, only include role and content
|
1429
|
+
messages.append({
|
1430
|
+
"role": "assistant",
|
1431
|
+
"content": response_text
|
1432
|
+
})
|
1433
|
+
else:
|
1434
|
+
# For other providers, include tool_calls
|
1435
|
+
messages.append({
|
1436
|
+
"role": "assistant",
|
1437
|
+
"content": response_text,
|
1438
|
+
"tool_calls": serializable_tool_calls
|
1439
|
+
})
|
1407
1440
|
|
1408
1441
|
tool_results = [] # Store all tool results
|
1409
1442
|
for tool_call in tool_calls:
|
@@ -1421,77 +1454,31 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1421
1454
|
else:
|
1422
1455
|
display_message += "Function returned no output"
|
1423
1456
|
display_tool_call(display_message, console=console)
|
1457
|
+
# Check if it's Ollama provider
|
1458
|
+
if self._is_ollama_provider():
|
1459
|
+
# For Ollama, use user role and format as natural language
|
1460
|
+
messages.append(self._format_ollama_tool_result_message(function_name, tool_result))
|
1461
|
+
else:
|
1462
|
+
# For other providers, use tool role with tool_call_id
|
1463
|
+
messages.append({
|
1464
|
+
"role": "tool",
|
1465
|
+
"tool_call_id": tool_call_id,
|
1466
|
+
"content": json.dumps(tool_result) if tool_result is not None else "Function returned an empty output"
|
1467
|
+
})
|
1468
|
+
|
1469
|
+
# For Ollama, add explicit prompt if we need a final answer
|
1470
|
+
if self._is_ollama_provider() and iteration_count > 0:
|
1471
|
+
# Add an explicit prompt for Ollama to generate the final answer
|
1424
1472
|
messages.append({
|
1425
|
-
"role": "
|
1426
|
-
"
|
1427
|
-
"content": json.dumps(tool_result) if tool_result is not None else "Function returned an empty output"
|
1473
|
+
"role": "user",
|
1474
|
+
"content": self.OLLAMA_FINAL_ANSWER_PROMPT
|
1428
1475
|
})
|
1429
|
-
|
1476
|
+
|
1430
1477
|
# Get response after tool calls
|
1431
1478
|
response_text = ""
|
1432
1479
|
|
1433
|
-
#
|
1434
|
-
|
1435
|
-
ollama_params = self._handle_ollama_model(response_text, tool_results, messages, original_prompt)
|
1436
|
-
|
1437
|
-
if ollama_params:
|
1438
|
-
# Get response with streaming
|
1439
|
-
if verbose:
|
1440
|
-
response_text = ""
|
1441
|
-
async for chunk in await litellm.acompletion(
|
1442
|
-
**self._build_completion_params(
|
1443
|
-
messages=ollama_params["follow_up_messages"],
|
1444
|
-
temperature=temperature,
|
1445
|
-
stream=stream
|
1446
|
-
)
|
1447
|
-
):
|
1448
|
-
if chunk and chunk.choices and chunk.choices[0].delta.content:
|
1449
|
-
content = chunk.choices[0].delta.content
|
1450
|
-
response_text += content
|
1451
|
-
print("\033[K", end="\r")
|
1452
|
-
print(f"Processing results... {time.time() - start_time:.1f}s", end="\r")
|
1453
|
-
else:
|
1454
|
-
response_text = ""
|
1455
|
-
async for chunk in await litellm.acompletion(
|
1456
|
-
**self._build_completion_params(
|
1457
|
-
messages=ollama_params["follow_up_messages"],
|
1458
|
-
temperature=temperature,
|
1459
|
-
stream=stream
|
1460
|
-
)
|
1461
|
-
):
|
1462
|
-
if chunk and chunk.choices and chunk.choices[0].delta.content:
|
1463
|
-
response_text += chunk.choices[0].delta.content
|
1464
|
-
|
1465
|
-
# Set flag to indicate Ollama was handled
|
1466
|
-
ollama_handled = True
|
1467
|
-
final_response_text = response_text.strip()
|
1468
|
-
logging.debug(f"[OLLAMA_DEBUG] Ollama follow-up response: {final_response_text[:200]}...")
|
1469
|
-
|
1470
|
-
# Display the response if we got one
|
1471
|
-
if final_response_text and verbose:
|
1472
|
-
display_interaction(
|
1473
|
-
ollama_params["original_prompt"],
|
1474
|
-
final_response_text,
|
1475
|
-
markdown=markdown,
|
1476
|
-
generation_time=time.time() - start_time,
|
1477
|
-
console=console
|
1478
|
-
)
|
1479
|
-
|
1480
|
-
# Store the response for potential final return
|
1481
|
-
if final_response_text:
|
1482
|
-
# Update messages with the response to maintain conversation context
|
1483
|
-
messages.append({
|
1484
|
-
"role": "assistant",
|
1485
|
-
"content": final_response_text
|
1486
|
-
})
|
1487
|
-
# Continue the loop to check if more tools are needed
|
1488
|
-
iteration_count += 1
|
1489
|
-
continue
|
1490
|
-
else:
|
1491
|
-
logging.warning("[OLLAMA_DEBUG] Ollama follow-up returned empty response")
|
1492
|
-
|
1493
|
-
# If no special handling was needed or if it's not an Ollama model
|
1494
|
-
if reasoning_steps and not ollama_handled:
|
1480
|
+
# If no special handling was needed
|
1481
|
+
if reasoning_steps:
|
1495
1482
|
# Non-streaming call to capture reasoning
|
1496
1483
|
resp = await litellm.acompletion(
|
1497
1484
|
**self._build_completion_params(
|
@@ -1499,13 +1486,15 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1499
1486
|
temperature=temperature,
|
1500
1487
|
stream=False, # force non-streaming
|
1501
1488
|
tools=formatted_tools, # Include tools
|
1489
|
+
output_json=output_json,
|
1490
|
+
output_pydantic=output_pydantic,
|
1502
1491
|
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
|
1503
1492
|
)
|
1504
1493
|
)
|
1505
1494
|
reasoning_content = resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
|
1506
1495
|
response_text = resp["choices"][0]["message"]["content"]
|
1507
1496
|
|
1508
|
-
if verbose and reasoning_content:
|
1497
|
+
if verbose and reasoning_content and not interaction_displayed:
|
1509
1498
|
display_interaction(
|
1510
1499
|
"Tool response reasoning:",
|
1511
1500
|
f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
|
@@ -1513,7 +1502,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1513
1502
|
generation_time=time.time() - start_time,
|
1514
1503
|
console=console
|
1515
1504
|
)
|
1516
|
-
|
1505
|
+
interaction_displayed = True
|
1506
|
+
elif verbose and not interaction_displayed:
|
1517
1507
|
display_interaction(
|
1518
1508
|
"Tool response:",
|
1519
1509
|
response_text,
|
@@ -1521,7 +1511,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1521
1511
|
generation_time=time.time() - start_time,
|
1522
1512
|
console=console
|
1523
1513
|
)
|
1524
|
-
|
1514
|
+
interaction_displayed = True
|
1515
|
+
else:
|
1525
1516
|
# Get response after tool calls with streaming if not already handled
|
1526
1517
|
if verbose:
|
1527
1518
|
async for chunk in await litellm.acompletion(
|
@@ -1530,6 +1521,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1530
1521
|
temperature=temperature,
|
1531
1522
|
stream=stream,
|
1532
1523
|
tools=formatted_tools,
|
1524
|
+
output_json=output_json,
|
1525
|
+
output_pydantic=output_pydantic,
|
1533
1526
|
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
|
1534
1527
|
)
|
1535
1528
|
):
|
@@ -1545,13 +1538,15 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1545
1538
|
messages=messages,
|
1546
1539
|
temperature=temperature,
|
1547
1540
|
stream=stream,
|
1541
|
+
output_json=output_json,
|
1542
|
+
output_pydantic=output_pydantic,
|
1548
1543
|
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
|
1549
1544
|
)
|
1550
1545
|
):
|
1551
1546
|
if chunk and chunk.choices and chunk.choices[0].delta.content:
|
1552
1547
|
response_text += chunk.choices[0].delta.content
|
1553
1548
|
|
1554
|
-
response_text = response_text.strip() if response_text else ""
|
1549
|
+
response_text = response_text.strip() if response_text else ""
|
1555
1550
|
|
1556
1551
|
# After tool execution, update messages and continue the loop
|
1557
1552
|
if response_text:
|
@@ -1578,9 +1573,10 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1578
1573
|
if output_json or output_pydantic:
|
1579
1574
|
self.chat_history.append({"role": "user", "content": original_prompt})
|
1580
1575
|
self.chat_history.append({"role": "assistant", "content": response_text})
|
1581
|
-
if verbose:
|
1576
|
+
if verbose and not interaction_displayed:
|
1582
1577
|
display_interaction(original_prompt, response_text, markdown=markdown,
|
1583
1578
|
generation_time=time.time() - start_time, console=console)
|
1579
|
+
interaction_displayed = True
|
1584
1580
|
return response_text
|
1585
1581
|
|
1586
1582
|
if not self_reflect:
|
@@ -1588,7 +1584,7 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1588
1584
|
display_text = final_response_text if final_response_text else response_text
|
1589
1585
|
|
1590
1586
|
# Display with stored reasoning content if available
|
1591
|
-
if verbose:
|
1587
|
+
if verbose and not interaction_displayed:
|
1592
1588
|
if stored_reasoning_content:
|
1593
1589
|
display_interaction(
|
1594
1590
|
original_prompt,
|
@@ -1600,6 +1596,7 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1600
1596
|
else:
|
1601
1597
|
display_interaction(original_prompt, display_text, markdown=markdown,
|
1602
1598
|
generation_time=time.time() - start_time, console=console)
|
1599
|
+
interaction_displayed = True
|
1603
1600
|
|
1604
1601
|
# Return reasoning content if reasoning_steps is True and we have it
|
1605
1602
|
if reasoning_steps and stored_reasoning_content:
|
@@ -1627,6 +1624,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1627
1624
|
temperature=temperature,
|
1628
1625
|
stream=False, # Force non-streaming
|
1629
1626
|
response_format={"type": "json_object"},
|
1627
|
+
output_json=output_json,
|
1628
|
+
output_pydantic=output_pydantic,
|
1630
1629
|
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
|
1631
1630
|
)
|
1632
1631
|
)
|
@@ -1662,6 +1661,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1662
1661
|
temperature=temperature,
|
1663
1662
|
stream=stream,
|
1664
1663
|
response_format={"type": "json_object"},
|
1664
|
+
output_json=output_json,
|
1665
|
+
output_pydantic=output_pydantic,
|
1665
1666
|
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
|
1666
1667
|
)
|
1667
1668
|
):
|
@@ -1677,6 +1678,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1677
1678
|
temperature=temperature,
|
1678
1679
|
stream=stream,
|
1679
1680
|
response_format={"type": "json_object"},
|
1681
|
+
output_json=output_json,
|
1682
|
+
output_pydantic=output_pydantic,
|
1680
1683
|
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
|
1681
1684
|
)
|
1682
1685
|
):
|
@@ -1695,15 +1698,17 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1695
1698
|
)
|
1696
1699
|
|
1697
1700
|
if satisfactory and reflection_count >= min_reflect - 1:
|
1698
|
-
if verbose:
|
1701
|
+
if verbose and not interaction_displayed:
|
1699
1702
|
display_interaction(prompt, response_text, markdown=markdown,
|
1700
1703
|
generation_time=time.time() - start_time, console=console)
|
1704
|
+
interaction_displayed = True
|
1701
1705
|
return response_text
|
1702
1706
|
|
1703
1707
|
if reflection_count >= max_reflect - 1:
|
1704
|
-
if verbose:
|
1708
|
+
if verbose and not interaction_displayed:
|
1705
1709
|
display_interaction(prompt, response_text, markdown=markdown,
|
1706
1710
|
generation_time=time.time() - start_time, console=console)
|
1711
|
+
interaction_displayed = True
|
1707
1712
|
return response_text
|
1708
1713
|
|
1709
1714
|
reflection_count += 1
|
@@ -1790,67 +1795,6 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1790
1795
|
|
1791
1796
|
litellm.callbacks = events
|
1792
1797
|
|
1793
|
-
def _handle_ollama_model(self, response_text: str, tool_results: List[Any], messages: List[Dict], original_prompt: Union[str, List[Dict]]) -> Optional[Dict[str, Any]]:
|
1794
|
-
"""
|
1795
|
-
Handle special Ollama model requirements when processing tool results.
|
1796
|
-
|
1797
|
-
Args:
|
1798
|
-
response_text: The initial response text from the model
|
1799
|
-
tool_results: List of tool execution results
|
1800
|
-
messages: The conversation messages list
|
1801
|
-
original_prompt: The original user prompt
|
1802
|
-
|
1803
|
-
Returns:
|
1804
|
-
Dict with follow-up parameters if Ollama needs special handling, None otherwise
|
1805
|
-
"""
|
1806
|
-
if not self._is_ollama_provider() or not tool_results:
|
1807
|
-
return None
|
1808
|
-
|
1809
|
-
# Check if the response is just a JSON tool call
|
1810
|
-
try:
|
1811
|
-
json_response = json.loads(response_text.strip() if response_text else "{}")
|
1812
|
-
if not (('name' in json_response or 'function' in json_response) and
|
1813
|
-
not any(word in response_text.lower() for word in ['summary', 'option', 'result', 'found'])):
|
1814
|
-
return None
|
1815
|
-
|
1816
|
-
logging.debug("Detected Ollama returning only tool call JSON, preparing follow-up call to process results")
|
1817
|
-
|
1818
|
-
# Extract the original user query from messages
|
1819
|
-
original_query = ""
|
1820
|
-
for msg in reversed(messages): # Look from the end to find the most recent user message
|
1821
|
-
if msg.get("role") == "user":
|
1822
|
-
content = msg.get("content", "")
|
1823
|
-
# Handle list content (multimodal)
|
1824
|
-
if isinstance(content, list):
|
1825
|
-
for item in content:
|
1826
|
-
if isinstance(item, dict) and item.get("type") == "text":
|
1827
|
-
original_query = item.get("text", "")
|
1828
|
-
break
|
1829
|
-
else:
|
1830
|
-
original_query = content
|
1831
|
-
if original_query:
|
1832
|
-
break
|
1833
|
-
|
1834
|
-
# Create a shorter follow-up prompt with all tool results
|
1835
|
-
# If there's only one result, use it directly; otherwise combine them
|
1836
|
-
if len(tool_results) == 1:
|
1837
|
-
results_text = json.dumps(tool_results[0], indent=2)
|
1838
|
-
else:
|
1839
|
-
results_text = json.dumps(tool_results, indent=2)
|
1840
|
-
|
1841
|
-
follow_up_prompt = f"Results:\n{results_text}\nProvide Answer to this Original Question based on the above results: '{original_query}'"
|
1842
|
-
logging.debug(f"[OLLAMA_DEBUG] Original query extracted: {original_query}")
|
1843
|
-
logging.debug(f"[OLLAMA_DEBUG] Follow-up prompt: {follow_up_prompt[:200]}...")
|
1844
|
-
|
1845
|
-
# Return parameters for follow-up call
|
1846
|
-
return {
|
1847
|
-
"follow_up_messages": [{"role": "user", "content": follow_up_prompt}],
|
1848
|
-
"original_prompt": original_prompt
|
1849
|
-
}
|
1850
|
-
|
1851
|
-
except (json.JSONDecodeError, KeyError):
|
1852
|
-
# Not a JSON response or not a tool call format
|
1853
|
-
return None
|
1854
1798
|
|
1855
1799
|
def _build_completion_params(self, **override_params) -> Dict[str, Any]:
|
1856
1800
|
"""Build parameters for litellm completion calls with all necessary config"""
|
@@ -1895,11 +1839,33 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
|
|
1895
1839
|
# Override with any provided parameters
|
1896
1840
|
params.update(override_params)
|
1897
1841
|
|
1842
|
+
# Handle structured output parameters
|
1843
|
+
output_json = override_params.get('output_json')
|
1844
|
+
output_pydantic = override_params.get('output_pydantic')
|
1845
|
+
|
1846
|
+
if output_json or output_pydantic:
|
1847
|
+
# Always remove these from params as they're not native litellm parameters
|
1848
|
+
params.pop('output_json', None)
|
1849
|
+
params.pop('output_pydantic', None)
|
1850
|
+
|
1851
|
+
# Check if this is a Gemini model that supports native structured outputs
|
1852
|
+
if self._is_gemini_model():
|
1853
|
+
from .model_capabilities import supports_structured_outputs
|
1854
|
+
schema_model = output_json or output_pydantic
|
1855
|
+
|
1856
|
+
if schema_model and hasattr(schema_model, 'model_json_schema') and supports_structured_outputs(self.model):
|
1857
|
+
schema = schema_model.model_json_schema()
|
1858
|
+
|
1859
|
+
# Gemini uses response_mime_type and response_schema
|
1860
|
+
params['response_mime_type'] = 'application/json'
|
1861
|
+
params['response_schema'] = schema
|
1862
|
+
|
1863
|
+
logging.debug(f"Using Gemini native structured output with schema: {json.dumps(schema, indent=2)}")
|
1864
|
+
|
1898
1865
|
# Add tool_choice="auto" when tools are provided (unless already specified)
|
1899
1866
|
if 'tools' in params and params['tools'] and 'tool_choice' not in params:
|
1900
1867
|
# For Gemini models, use tool_choice to encourage tool usage
|
1901
|
-
|
1902
|
-
if any(prefix in self.model.lower() for prefix in ['gemini', 'gemini/', 'google/gemini']):
|
1868
|
+
if self._is_gemini_model():
|
1903
1869
|
try:
|
1904
1870
|
import litellm
|
1905
1871
|
# Check if model supports function calling before setting tool_choice
|