praisonaiagents 0.0.128__py3-none-any.whl → 0.0.130__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  import logging
2
2
  import os
3
3
  import warnings
4
+ import re
4
5
  from typing import Any, Dict, List, Optional, Union, Literal, Callable
5
6
  from pydantic import BaseModel
6
7
  import time
@@ -87,6 +88,10 @@ class LLM:
87
88
  "llama-3.2-90b-text-preview": 6144 # 8,192 actual
88
89
  }
89
90
 
91
+ # Ollama-specific prompt constants
92
+ OLLAMA_TOOL_USAGE_PROMPT = "Please analyze the request and use the available tools to help answer the question. Start by identifying what information you need."
93
+ OLLAMA_FINAL_ANSWER_PROMPT = "Based on the tool results above, please provide the final answer to the original question."
94
+
90
95
  def _log_llm_config(self, method_name: str, **config):
91
96
  """Centralized debug logging for LLM configuration and parameters.
92
97
 
@@ -277,15 +282,32 @@ class LLM:
277
282
  # Direct ollama/ prefix
278
283
  if self.model.startswith("ollama/"):
279
284
  return True
285
+
286
+ # Check base_url if provided
287
+ if self.base_url and "ollama" in self.base_url.lower():
288
+ return True
280
289
 
281
290
  # Check environment variables for Ollama base URL
282
291
  base_url = os.getenv("OPENAI_BASE_URL", "")
283
292
  api_base = os.getenv("OPENAI_API_BASE", "")
284
293
 
285
- # Common Ollama endpoints
286
- ollama_endpoints = ["localhost:11434", "127.0.0.1:11434", ":11434"]
294
+ # Common Ollama endpoints (including custom ports)
295
+ if any(url and ("ollama" in url.lower() or ":11434" in url)
296
+ for url in [base_url, api_base, self.base_url or ""]):
297
+ return True
287
298
 
288
- return any(endpoint in base_url or endpoint in api_base for endpoint in ollama_endpoints)
299
+ return False
300
+
301
+ def _format_ollama_tool_result_message(self, function_name: str, tool_result: Any) -> Dict[str, str]:
302
+ """
303
+ Format tool result message for Ollama provider.
304
+ Simplified approach without hardcoded regex extraction.
305
+ """
306
+ tool_result_str = str(tool_result)
307
+ return {
308
+ "role": "user",
309
+ "content": f"The {function_name} function returned: {tool_result_str}"
310
+ }
289
311
 
290
312
  def _process_stream_delta(self, delta, response_text: str, tool_calls: List[Dict], formatted_tools: Optional[List] = None) -> tuple:
291
313
  """
@@ -422,13 +444,22 @@ class LLM:
422
444
  """
423
445
  messages = []
424
446
 
447
+ # Check if this is a Gemini model that supports native structured outputs
448
+ is_gemini_with_structured_output = False
449
+ if output_json or output_pydantic:
450
+ from .model_capabilities import supports_structured_outputs
451
+ is_gemini_with_structured_output = (
452
+ self._is_gemini_model() and
453
+ supports_structured_outputs(self.model)
454
+ )
455
+
425
456
  # Handle system prompt
426
457
  if system_prompt:
427
- # Append JSON schema if needed
428
- if output_json:
429
- system_prompt += f"\nReturn ONLY a JSON object that matches this Pydantic model: {json.dumps(output_json.model_json_schema())}"
430
- elif output_pydantic:
431
- system_prompt += f"\nReturn ONLY a JSON object that matches this Pydantic model: {json.dumps(output_pydantic.model_json_schema())}"
458
+ # Only append JSON schema for non-Gemini models or Gemini models without structured output support
459
+ if (output_json or output_pydantic) and not is_gemini_with_structured_output:
460
+ schema_model = output_json or output_pydantic
461
+ if schema_model and hasattr(schema_model, 'model_json_schema'):
462
+ system_prompt += f"\nReturn ONLY a JSON object that matches this Pydantic model: {json.dumps(schema_model.model_json_schema())}"
432
463
 
433
464
  # Skip system messages for legacy o1 models as they don't support them
434
465
  if not self._needs_system_message_skip():
@@ -440,7 +471,8 @@ class LLM:
440
471
 
441
472
  # Handle prompt modifications for JSON output
442
473
  original_prompt = prompt
443
- if output_json or output_pydantic:
474
+ if (output_json or output_pydantic) and not is_gemini_with_structured_output:
475
+ # Only modify prompt for non-Gemini models
444
476
  if isinstance(prompt, str):
445
477
  prompt = prompt + "\nReturn ONLY a valid JSON object. No other text or explanation."
446
478
  elif isinstance(prompt, list):
@@ -660,6 +692,7 @@ class LLM:
660
692
 
661
693
  start_time = time.time()
662
694
  reflection_count = 0
695
+ interaction_displayed = False # Track if interaction has been displayed
663
696
 
664
697
  # Display initial instruction once
665
698
  if verbose:
@@ -695,6 +728,8 @@ class LLM:
695
728
  temperature=temperature,
696
729
  stream=False, # force non-streaming
697
730
  tools=formatted_tools,
731
+ output_json=output_json,
732
+ output_pydantic=output_pydantic,
698
733
  **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
699
734
  )
700
735
  )
@@ -703,7 +738,7 @@ class LLM:
703
738
  final_response = resp
704
739
 
705
740
  # Optionally display reasoning if present
706
- if verbose and reasoning_content:
741
+ if verbose and reasoning_content and not interaction_displayed:
707
742
  display_interaction(
708
743
  original_prompt,
709
744
  f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
@@ -711,7 +746,8 @@ class LLM:
711
746
  generation_time=time.time() - current_time,
712
747
  console=console
713
748
  )
714
- else:
749
+ interaction_displayed = True
750
+ elif verbose and not interaction_displayed:
715
751
  display_interaction(
716
752
  original_prompt,
717
753
  response_text,
@@ -719,6 +755,7 @@ class LLM:
719
755
  generation_time=time.time() - current_time,
720
756
  console=console
721
757
  )
758
+ interaction_displayed = True
722
759
 
723
760
  # Otherwise do the existing streaming approach
724
761
  else:
@@ -741,6 +778,8 @@ class LLM:
741
778
  tools=formatted_tools,
742
779
  temperature=temperature,
743
780
  stream=True,
781
+ output_json=output_json,
782
+ output_pydantic=output_pydantic,
744
783
  **kwargs
745
784
  )
746
785
  ):
@@ -760,6 +799,8 @@ class LLM:
760
799
  tools=formatted_tools,
761
800
  temperature=temperature,
762
801
  stream=True,
802
+ output_json=output_json,
803
+ output_pydantic=output_pydantic,
763
804
  **kwargs
764
805
  )
765
806
  ):
@@ -772,7 +813,7 @@ class LLM:
772
813
  if formatted_tools and self._supports_streaming_tools():
773
814
  tool_calls = self._process_tool_calls_from_stream(delta, tool_calls)
774
815
 
775
- response_text = response_text.strip() if response_text else "" if response_text else "" if response_text else "" if response_text else ""
816
+ response_text = response_text.strip() if response_text else ""
776
817
 
777
818
  # Create a mock final_response with the captured data
778
819
  final_response = {
@@ -791,12 +832,14 @@ class LLM:
791
832
  tools=formatted_tools,
792
833
  temperature=temperature,
793
834
  stream=False,
835
+ output_json=output_json,
836
+ output_pydantic=output_pydantic,
794
837
  **kwargs
795
838
  )
796
839
  )
797
840
  response_text = final_response["choices"][0]["message"]["content"]
798
841
 
799
- if verbose:
842
+ if verbose and not interaction_displayed:
800
843
  # Display the complete response at once
801
844
  display_interaction(
802
845
  original_prompt,
@@ -805,18 +848,37 @@ class LLM:
805
848
  generation_time=time.time() - current_time,
806
849
  console=console
807
850
  )
851
+ interaction_displayed = True
808
852
 
809
853
  tool_calls = final_response["choices"][0]["message"].get("tool_calls")
810
854
 
855
+ # For Ollama, if response is empty but we have tools, prompt for tool usage
856
+ if self._is_ollama_provider() and (not response_text or response_text.strip() == "") and formatted_tools and iteration_count == 0:
857
+ messages.append({
858
+ "role": "user",
859
+ "content": self.OLLAMA_TOOL_USAGE_PROMPT
860
+ })
861
+ iteration_count += 1
862
+ continue
863
+
811
864
  # Handle tool calls - Sequential tool calling logic
812
865
  if tool_calls and execute_tool_fn:
813
866
  # Convert tool_calls to a serializable format for all providers
814
867
  serializable_tool_calls = self._serialize_tool_calls(tool_calls)
815
- messages.append({
816
- "role": "assistant",
817
- "content": response_text,
818
- "tool_calls": serializable_tool_calls
819
- })
868
+ # Check if this is Ollama provider
869
+ if self._is_ollama_provider():
870
+ # For Ollama, only include role and content
871
+ messages.append({
872
+ "role": "assistant",
873
+ "content": response_text
874
+ })
875
+ else:
876
+ # For other providers, include tool_calls
877
+ messages.append({
878
+ "role": "assistant",
879
+ "content": response_text,
880
+ "tool_calls": serializable_tool_calls
881
+ })
820
882
 
821
883
  should_continue = False
822
884
  tool_results = [] # Store all tool results
@@ -842,11 +904,17 @@ class LLM:
842
904
  logging.debug(f"[TOOL_EXEC_DEBUG] About to display tool call with message: {display_message}")
843
905
  display_tool_call(display_message, console=console)
844
906
 
845
- messages.append({
846
- "role": "tool",
847
- "tool_call_id": tool_call_id,
848
- "content": json.dumps(tool_result) if tool_result is not None else "Function returned an empty output"
849
- })
907
+ # Check if this is Ollama provider
908
+ if self._is_ollama_provider():
909
+ # For Ollama, use user role and format as natural language
910
+ messages.append(self._format_ollama_tool_result_message(function_name, tool_result))
911
+ else:
912
+ # For other providers, use tool role with tool_call_id
913
+ messages.append({
914
+ "role": "tool",
915
+ "tool_call_id": tool_call_id,
916
+ "content": json.dumps(tool_result) if tool_result is not None else "Function returned an empty output"
917
+ })
850
918
 
851
919
  # Check if we should continue (for tools like sequential thinking)
852
920
  # This mimics the logic from agent.py lines 1004-1007
@@ -858,6 +926,14 @@ class LLM:
858
926
  iteration_count += 1
859
927
  continue
860
928
 
929
+ # For Ollama, add explicit prompt if we need a final answer
930
+ if self._is_ollama_provider() and iteration_count > 0:
931
+ # Add an explicit prompt for Ollama to generate the final answer
932
+ messages.append({
933
+ "role": "user",
934
+ "content": self.OLLAMA_FINAL_ANSWER_PROMPT
935
+ })
936
+
861
937
  # After tool execution, continue the loop to check if more tools are needed
862
938
  # instead of immediately trying to get a final response
863
939
  iteration_count += 1
@@ -878,7 +954,7 @@ class LLM:
878
954
  return final_response_text
879
955
 
880
956
  # No tool calls were made in this iteration, return the response
881
- if verbose:
957
+ if verbose and not interaction_displayed:
882
958
  # If we have stored reasoning content from tool execution, display it
883
959
  if stored_reasoning_content:
884
960
  display_interaction(
@@ -896,6 +972,7 @@ class LLM:
896
972
  generation_time=time.time() - start_time,
897
973
  console=console
898
974
  )
975
+ interaction_displayed = True
899
976
 
900
977
  response_text = response_text.strip() if response_text else ""
901
978
 
@@ -907,15 +984,17 @@ class LLM:
907
984
  if output_json or output_pydantic:
908
985
  self.chat_history.append({"role": "user", "content": original_prompt})
909
986
  self.chat_history.append({"role": "assistant", "content": response_text})
910
- if verbose:
987
+ if verbose and not interaction_displayed:
911
988
  display_interaction(original_prompt, response_text, markdown=markdown,
912
989
  generation_time=time.time() - start_time, console=console)
990
+ interaction_displayed = True
913
991
  return response_text
914
992
 
915
993
  if not self_reflect:
916
- if verbose:
994
+ if verbose and not interaction_displayed:
917
995
  display_interaction(original_prompt, response_text, markdown=markdown,
918
996
  generation_time=time.time() - start_time, console=console)
997
+ interaction_displayed = True
919
998
  # Return reasoning content if reasoning_steps is True
920
999
  if reasoning_steps and stored_reasoning_content:
921
1000
  return stored_reasoning_content
@@ -944,6 +1023,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
944
1023
  temperature=temperature,
945
1024
  stream=False, # Force non-streaming
946
1025
  response_format={"type": "json_object"},
1026
+ output_json=output_json,
1027
+ output_pydantic=output_pydantic,
947
1028
  **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
948
1029
  )
949
1030
  )
@@ -979,6 +1060,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
979
1060
  temperature=temperature,
980
1061
  stream=stream,
981
1062
  response_format={"type": "json_object"},
1063
+ output_json=output_json,
1064
+ output_pydantic=output_pydantic,
982
1065
  **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
983
1066
  )
984
1067
  ):
@@ -994,6 +1077,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
994
1077
  temperature=temperature,
995
1078
  stream=stream,
996
1079
  response_format={"type": "json_object"},
1080
+ output_json=output_json,
1081
+ output_pydantic=output_pydantic,
997
1082
  **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
998
1083
  )
999
1084
  ):
@@ -1011,15 +1096,17 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1011
1096
  )
1012
1097
 
1013
1098
  if satisfactory and reflection_count >= min_reflect - 1:
1014
- if verbose:
1099
+ if verbose and not interaction_displayed:
1015
1100
  display_interaction(prompt, response_text, markdown=markdown,
1016
1101
  generation_time=time.time() - start_time, console=console)
1102
+ interaction_displayed = True
1017
1103
  return response_text
1018
1104
 
1019
1105
  if reflection_count >= max_reflect - 1:
1020
- if verbose:
1106
+ if verbose and not interaction_displayed:
1021
1107
  display_interaction(prompt, response_text, markdown=markdown,
1022
1108
  generation_time=time.time() - start_time, console=console)
1109
+ interaction_displayed = True
1023
1110
  return response_text
1024
1111
 
1025
1112
  reflection_count += 1
@@ -1039,6 +1126,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1039
1126
  messages=messages,
1040
1127
  temperature=temperature,
1041
1128
  stream=True,
1129
+ output_json=output_json,
1130
+ output_pydantic=output_pydantic,
1042
1131
  **kwargs
1043
1132
  )
1044
1133
  ):
@@ -1053,21 +1142,24 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1053
1142
  messages=messages,
1054
1143
  temperature=temperature,
1055
1144
  stream=True,
1145
+ output_json=output_json,
1146
+ output_pydantic=output_pydantic,
1056
1147
  **kwargs
1057
1148
  )
1058
1149
  ):
1059
1150
  if chunk and chunk.choices and chunk.choices[0].delta.content:
1060
1151
  response_text += chunk.choices[0].delta.content
1061
1152
 
1062
- response_text = response_text.strip() if response_text else "" if response_text else ""
1153
+ response_text = response_text.strip() if response_text else ""
1063
1154
  continue
1064
1155
 
1065
1156
  except json.JSONDecodeError:
1066
1157
  reflection_count += 1
1067
1158
  if reflection_count >= max_reflect:
1068
- if verbose:
1159
+ if verbose and not interaction_displayed:
1069
1160
  display_interaction(prompt, response_text, markdown=markdown,
1070
1161
  generation_time=time.time() - start_time, console=console)
1162
+ interaction_displayed = True
1071
1163
  return response_text
1072
1164
  continue
1073
1165
  except Exception as e:
@@ -1075,9 +1167,10 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1075
1167
  return None
1076
1168
 
1077
1169
  # If we've exhausted reflection attempts
1078
- if verbose:
1170
+ if verbose and not interaction_displayed:
1079
1171
  display_interaction(prompt, response_text, markdown=markdown,
1080
1172
  generation_time=time.time() - start_time, console=console)
1173
+ interaction_displayed = True
1081
1174
  return response_text
1082
1175
 
1083
1176
  except Exception as error:
@@ -1089,6 +1182,12 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1089
1182
  total_time = time.time() - start_time
1090
1183
  logging.debug(f"get_response completed in {total_time:.2f} seconds")
1091
1184
 
1185
+ def _is_gemini_model(self) -> bool:
1186
+ """Check if the model is a Gemini model."""
1187
+ if not self.model:
1188
+ return False
1189
+ return any(prefix in self.model.lower() for prefix in ['gemini', 'gemini/', 'google/gemini'])
1190
+
1092
1191
  async def get_response_async(
1093
1192
  self,
1094
1193
  prompt: Union[str, List[Dict]],
@@ -1177,6 +1276,7 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1177
1276
 
1178
1277
  start_time = time.time()
1179
1278
  reflection_count = 0
1279
+ interaction_displayed = False # Track if interaction has been displayed
1180
1280
 
1181
1281
  # Format tools for LiteLLM using the shared helper
1182
1282
  formatted_tools = self._format_tools_for_litellm(tools)
@@ -1197,15 +1297,17 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1197
1297
  resp = await litellm.acompletion(
1198
1298
  **self._build_completion_params(
1199
1299
  messages=messages,
1200
- temperature=temperature,
1201
- stream=False, # force non-streaming
1202
- **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
1203
- )
1300
+ temperature=temperature,
1301
+ stream=False, # force non-streaming
1302
+ output_json=output_json,
1303
+ output_pydantic=output_pydantic,
1304
+ **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
1305
+ )
1204
1306
  )
1205
1307
  reasoning_content = resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
1206
1308
  response_text = resp["choices"][0]["message"]["content"]
1207
1309
 
1208
- if verbose and reasoning_content:
1310
+ if verbose and reasoning_content and not interaction_displayed:
1209
1311
  display_interaction(
1210
1312
  "Initial reasoning:",
1211
1313
  f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
@@ -1213,7 +1315,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1213
1315
  generation_time=time.time() - start_time,
1214
1316
  console=console
1215
1317
  )
1216
- elif verbose:
1318
+ interaction_displayed = True
1319
+ elif verbose and not interaction_displayed:
1217
1320
  display_interaction(
1218
1321
  "Initial response:",
1219
1322
  response_text,
@@ -1221,6 +1324,7 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1221
1324
  generation_time=time.time() - start_time,
1222
1325
  console=console
1223
1326
  )
1327
+ interaction_displayed = True
1224
1328
  else:
1225
1329
  # Determine if we should use streaming based on tool support
1226
1330
  use_streaming = stream
@@ -1239,6 +1343,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1239
1343
  temperature=temperature,
1240
1344
  stream=True,
1241
1345
  tools=formatted_tools,
1346
+ output_json=output_json,
1347
+ output_pydantic=output_pydantic,
1242
1348
  **kwargs
1243
1349
  )
1244
1350
  ):
@@ -1259,6 +1365,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1259
1365
  temperature=temperature,
1260
1366
  stream=True,
1261
1367
  tools=formatted_tools,
1368
+ output_json=output_json,
1369
+ output_pydantic=output_pydantic,
1262
1370
  **kwargs
1263
1371
  )
1264
1372
  ):
@@ -1271,7 +1379,7 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1271
1379
  if formatted_tools and self._supports_streaming_tools():
1272
1380
  tool_calls = self._process_tool_calls_from_stream(delta, tool_calls)
1273
1381
 
1274
- response_text = response_text.strip() if response_text else "" if response_text else "" if response_text else ""
1382
+ response_text = response_text.strip() if response_text else ""
1275
1383
 
1276
1384
  # We already have tool_calls from streaming if supported
1277
1385
  # No need for a second API call!
@@ -1283,13 +1391,15 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1283
1391
  temperature=temperature,
1284
1392
  stream=False,
1285
1393
  tools=formatted_tools,
1394
+ output_json=output_json,
1395
+ output_pydantic=output_pydantic,
1286
1396
  **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
1287
1397
  )
1288
1398
  )
1289
1399
  response_text = tool_response.choices[0].message.get("content", "")
1290
1400
  tool_calls = tool_response.choices[0].message.get("tool_calls", [])
1291
1401
 
1292
- if verbose:
1402
+ if verbose and not interaction_displayed:
1293
1403
  # Display the complete response at once
1294
1404
  display_interaction(
1295
1405
  original_prompt,
@@ -1298,16 +1408,35 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1298
1408
  generation_time=time.time() - start_time,
1299
1409
  console=console
1300
1410
  )
1411
+ interaction_displayed = True
1301
1412
 
1413
+ # For Ollama, if response is empty but we have tools, prompt for tool usage
1414
+ if self._is_ollama_provider() and (not response_text or response_text.strip() == "") and formatted_tools and iteration_count == 0:
1415
+ messages.append({
1416
+ "role": "user",
1417
+ "content": self.OLLAMA_TOOL_USAGE_PROMPT
1418
+ })
1419
+ iteration_count += 1
1420
+ continue
1421
+
1302
1422
  # Now handle tools if we have them (either from streaming or non-streaming)
1303
1423
  if tools and execute_tool_fn and tool_calls:
1304
1424
  # Convert tool_calls to a serializable format for all providers
1305
1425
  serializable_tool_calls = self._serialize_tool_calls(tool_calls)
1306
- messages.append({
1307
- "role": "assistant",
1308
- "content": response_text,
1309
- "tool_calls": serializable_tool_calls
1310
- })
1426
+ # Check if it's Ollama provider
1427
+ if self._is_ollama_provider():
1428
+ # For Ollama, only include role and content
1429
+ messages.append({
1430
+ "role": "assistant",
1431
+ "content": response_text
1432
+ })
1433
+ else:
1434
+ # For other providers, include tool_calls
1435
+ messages.append({
1436
+ "role": "assistant",
1437
+ "content": response_text,
1438
+ "tool_calls": serializable_tool_calls
1439
+ })
1311
1440
 
1312
1441
  tool_results = [] # Store all tool results
1313
1442
  for tool_call in tool_calls:
@@ -1325,12 +1454,26 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1325
1454
  else:
1326
1455
  display_message += "Function returned no output"
1327
1456
  display_tool_call(display_message, console=console)
1457
+ # Check if it's Ollama provider
1458
+ if self._is_ollama_provider():
1459
+ # For Ollama, use user role and format as natural language
1460
+ messages.append(self._format_ollama_tool_result_message(function_name, tool_result))
1461
+ else:
1462
+ # For other providers, use tool role with tool_call_id
1463
+ messages.append({
1464
+ "role": "tool",
1465
+ "tool_call_id": tool_call_id,
1466
+ "content": json.dumps(tool_result) if tool_result is not None else "Function returned an empty output"
1467
+ })
1468
+
1469
+ # For Ollama, add explicit prompt if we need a final answer
1470
+ if self._is_ollama_provider() and iteration_count > 0:
1471
+ # Add an explicit prompt for Ollama to generate the final answer
1328
1472
  messages.append({
1329
- "role": "tool",
1330
- "tool_call_id": tool_call_id,
1331
- "content": json.dumps(tool_result) if tool_result is not None else "Function returned an empty output"
1473
+ "role": "user",
1474
+ "content": self.OLLAMA_FINAL_ANSWER_PROMPT
1332
1475
  })
1333
-
1476
+
1334
1477
  # Get response after tool calls
1335
1478
  response_text = ""
1336
1479
 
@@ -1343,13 +1486,15 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1343
1486
  temperature=temperature,
1344
1487
  stream=False, # force non-streaming
1345
1488
  tools=formatted_tools, # Include tools
1489
+ output_json=output_json,
1490
+ output_pydantic=output_pydantic,
1346
1491
  **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
1347
1492
  )
1348
1493
  )
1349
1494
  reasoning_content = resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
1350
1495
  response_text = resp["choices"][0]["message"]["content"]
1351
1496
 
1352
- if verbose and reasoning_content:
1497
+ if verbose and reasoning_content and not interaction_displayed:
1353
1498
  display_interaction(
1354
1499
  "Tool response reasoning:",
1355
1500
  f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
@@ -1357,7 +1502,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1357
1502
  generation_time=time.time() - start_time,
1358
1503
  console=console
1359
1504
  )
1360
- elif verbose:
1505
+ interaction_displayed = True
1506
+ elif verbose and not interaction_displayed:
1361
1507
  display_interaction(
1362
1508
  "Tool response:",
1363
1509
  response_text,
@@ -1365,6 +1511,7 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1365
1511
  generation_time=time.time() - start_time,
1366
1512
  console=console
1367
1513
  )
1514
+ interaction_displayed = True
1368
1515
  else:
1369
1516
  # Get response after tool calls with streaming if not already handled
1370
1517
  if verbose:
@@ -1374,6 +1521,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1374
1521
  temperature=temperature,
1375
1522
  stream=stream,
1376
1523
  tools=formatted_tools,
1524
+ output_json=output_json,
1525
+ output_pydantic=output_pydantic,
1377
1526
  **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
1378
1527
  )
1379
1528
  ):
@@ -1389,13 +1538,15 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1389
1538
  messages=messages,
1390
1539
  temperature=temperature,
1391
1540
  stream=stream,
1541
+ output_json=output_json,
1542
+ output_pydantic=output_pydantic,
1392
1543
  **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
1393
1544
  )
1394
1545
  ):
1395
1546
  if chunk and chunk.choices and chunk.choices[0].delta.content:
1396
1547
  response_text += chunk.choices[0].delta.content
1397
1548
 
1398
- response_text = response_text.strip() if response_text else "" if response_text else ""
1549
+ response_text = response_text.strip() if response_text else ""
1399
1550
 
1400
1551
  # After tool execution, update messages and continue the loop
1401
1552
  if response_text:
@@ -1422,9 +1573,10 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1422
1573
  if output_json or output_pydantic:
1423
1574
  self.chat_history.append({"role": "user", "content": original_prompt})
1424
1575
  self.chat_history.append({"role": "assistant", "content": response_text})
1425
- if verbose:
1576
+ if verbose and not interaction_displayed:
1426
1577
  display_interaction(original_prompt, response_text, markdown=markdown,
1427
1578
  generation_time=time.time() - start_time, console=console)
1579
+ interaction_displayed = True
1428
1580
  return response_text
1429
1581
 
1430
1582
  if not self_reflect:
@@ -1432,7 +1584,7 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1432
1584
  display_text = final_response_text if final_response_text else response_text
1433
1585
 
1434
1586
  # Display with stored reasoning content if available
1435
- if verbose:
1587
+ if verbose and not interaction_displayed:
1436
1588
  if stored_reasoning_content:
1437
1589
  display_interaction(
1438
1590
  original_prompt,
@@ -1444,6 +1596,7 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1444
1596
  else:
1445
1597
  display_interaction(original_prompt, display_text, markdown=markdown,
1446
1598
  generation_time=time.time() - start_time, console=console)
1599
+ interaction_displayed = True
1447
1600
 
1448
1601
  # Return reasoning content if reasoning_steps is True and we have it
1449
1602
  if reasoning_steps and stored_reasoning_content:
@@ -1471,6 +1624,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1471
1624
  temperature=temperature,
1472
1625
  stream=False, # Force non-streaming
1473
1626
  response_format={"type": "json_object"},
1627
+ output_json=output_json,
1628
+ output_pydantic=output_pydantic,
1474
1629
  **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
1475
1630
  )
1476
1631
  )
@@ -1506,6 +1661,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1506
1661
  temperature=temperature,
1507
1662
  stream=stream,
1508
1663
  response_format={"type": "json_object"},
1664
+ output_json=output_json,
1665
+ output_pydantic=output_pydantic,
1509
1666
  **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
1510
1667
  )
1511
1668
  ):
@@ -1521,6 +1678,8 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1521
1678
  temperature=temperature,
1522
1679
  stream=stream,
1523
1680
  response_format={"type": "json_object"},
1681
+ output_json=output_json,
1682
+ output_pydantic=output_pydantic,
1524
1683
  **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
1525
1684
  )
1526
1685
  ):
@@ -1539,15 +1698,17 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1539
1698
  )
1540
1699
 
1541
1700
  if satisfactory and reflection_count >= min_reflect - 1:
1542
- if verbose:
1701
+ if verbose and not interaction_displayed:
1543
1702
  display_interaction(prompt, response_text, markdown=markdown,
1544
1703
  generation_time=time.time() - start_time, console=console)
1704
+ interaction_displayed = True
1545
1705
  return response_text
1546
1706
 
1547
1707
  if reflection_count >= max_reflect - 1:
1548
- if verbose:
1708
+ if verbose and not interaction_displayed:
1549
1709
  display_interaction(prompt, response_text, markdown=markdown,
1550
1710
  generation_time=time.time() - start_time, console=console)
1711
+ interaction_displayed = True
1551
1712
  return response_text
1552
1713
 
1553
1714
  reflection_count += 1
@@ -1678,11 +1839,33 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
1678
1839
  # Override with any provided parameters
1679
1840
  params.update(override_params)
1680
1841
 
1842
+ # Handle structured output parameters
1843
+ output_json = override_params.get('output_json')
1844
+ output_pydantic = override_params.get('output_pydantic')
1845
+
1846
+ if output_json or output_pydantic:
1847
+ # Always remove these from params as they're not native litellm parameters
1848
+ params.pop('output_json', None)
1849
+ params.pop('output_pydantic', None)
1850
+
1851
+ # Check if this is a Gemini model that supports native structured outputs
1852
+ if self._is_gemini_model():
1853
+ from .model_capabilities import supports_structured_outputs
1854
+ schema_model = output_json or output_pydantic
1855
+
1856
+ if schema_model and hasattr(schema_model, 'model_json_schema') and supports_structured_outputs(self.model):
1857
+ schema = schema_model.model_json_schema()
1858
+
1859
+ # Gemini uses response_mime_type and response_schema
1860
+ params['response_mime_type'] = 'application/json'
1861
+ params['response_schema'] = schema
1862
+
1863
+ logging.debug(f"Using Gemini native structured output with schema: {json.dumps(schema, indent=2)}")
1864
+
1681
1865
  # Add tool_choice="auto" when tools are provided (unless already specified)
1682
1866
  if 'tools' in params and params['tools'] and 'tool_choice' not in params:
1683
1867
  # For Gemini models, use tool_choice to encourage tool usage
1684
- # More comprehensive Gemini model detection
1685
- if any(prefix in self.model.lower() for prefix in ['gemini', 'gemini/', 'google/gemini']):
1868
+ if self._is_gemini_model():
1686
1869
  try:
1687
1870
  import litellm
1688
1871
  # Check if model supports function calling before setting tool_choice