agno 2.3.1__py3-none-any.whl → 2.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. agno/agent/agent.py +514 -186
  2. agno/compression/__init__.py +3 -0
  3. agno/compression/manager.py +176 -0
  4. agno/db/dynamo/dynamo.py +11 -0
  5. agno/db/firestore/firestore.py +5 -1
  6. agno/db/gcs_json/gcs_json_db.py +5 -2
  7. agno/db/in_memory/in_memory_db.py +5 -2
  8. agno/db/json/json_db.py +5 -1
  9. agno/db/migrations/manager.py +4 -4
  10. agno/db/mongo/async_mongo.py +158 -34
  11. agno/db/mongo/mongo.py +6 -2
  12. agno/db/mysql/mysql.py +48 -54
  13. agno/db/postgres/async_postgres.py +61 -51
  14. agno/db/postgres/postgres.py +42 -50
  15. agno/db/redis/redis.py +5 -0
  16. agno/db/redis/utils.py +5 -5
  17. agno/db/schemas/memory.py +7 -5
  18. agno/db/singlestore/singlestore.py +99 -108
  19. agno/db/sqlite/async_sqlite.py +32 -30
  20. agno/db/sqlite/sqlite.py +34 -30
  21. agno/knowledge/reader/pdf_reader.py +2 -2
  22. agno/knowledge/reader/tavily_reader.py +0 -1
  23. agno/memory/__init__.py +14 -1
  24. agno/memory/manager.py +223 -8
  25. agno/memory/strategies/__init__.py +15 -0
  26. agno/memory/strategies/base.py +67 -0
  27. agno/memory/strategies/summarize.py +196 -0
  28. agno/memory/strategies/types.py +37 -0
  29. agno/models/anthropic/claude.py +84 -80
  30. agno/models/aws/bedrock.py +38 -16
  31. agno/models/aws/claude.py +97 -277
  32. agno/models/azure/ai_foundry.py +8 -4
  33. agno/models/base.py +101 -14
  34. agno/models/cerebras/cerebras.py +18 -7
  35. agno/models/cerebras/cerebras_openai.py +4 -2
  36. agno/models/cohere/chat.py +8 -4
  37. agno/models/google/gemini.py +578 -20
  38. agno/models/groq/groq.py +18 -5
  39. agno/models/huggingface/huggingface.py +17 -6
  40. agno/models/ibm/watsonx.py +16 -6
  41. agno/models/litellm/chat.py +17 -7
  42. agno/models/message.py +19 -5
  43. agno/models/meta/llama.py +20 -4
  44. agno/models/mistral/mistral.py +8 -4
  45. agno/models/ollama/chat.py +17 -6
  46. agno/models/openai/chat.py +17 -6
  47. agno/models/openai/responses.py +23 -9
  48. agno/models/vertexai/claude.py +99 -5
  49. agno/os/interfaces/agui/router.py +1 -0
  50. agno/os/interfaces/agui/utils.py +97 -57
  51. agno/os/router.py +16 -1
  52. agno/os/routers/memory/memory.py +146 -0
  53. agno/os/routers/memory/schemas.py +26 -0
  54. agno/os/schema.py +21 -6
  55. agno/os/utils.py +134 -10
  56. agno/run/base.py +2 -1
  57. agno/run/workflow.py +1 -1
  58. agno/team/team.py +571 -225
  59. agno/tools/mcp/mcp.py +1 -1
  60. agno/utils/agent.py +119 -1
  61. agno/utils/dttm.py +33 -0
  62. agno/utils/models/ai_foundry.py +9 -2
  63. agno/utils/models/claude.py +12 -5
  64. agno/utils/models/cohere.py +9 -2
  65. agno/utils/models/llama.py +9 -2
  66. agno/utils/models/mistral.py +4 -2
  67. agno/utils/print_response/agent.py +37 -2
  68. agno/utils/print_response/team.py +52 -0
  69. agno/utils/tokens.py +41 -0
  70. agno/workflow/types.py +2 -2
  71. {agno-2.3.1.dist-info → agno-2.3.3.dist-info}/METADATA +45 -40
  72. {agno-2.3.1.dist-info → agno-2.3.3.dist-info}/RECORD +75 -68
  73. {agno-2.3.1.dist-info → agno-2.3.3.dist-info}/WHEEL +0 -0
  74. {agno-2.3.1.dist-info → agno-2.3.3.dist-info}/licenses/LICENSE +0 -0
  75. {agno-2.3.1.dist-info → agno-2.3.3.dist-info}/top_level.txt +0 -0
agno/models/base.py CHANGED
@@ -312,6 +312,7 @@ class Model(ABC):
312
312
  tool_call_limit: Optional[int] = None,
313
313
  run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
314
314
  send_media_to_model: bool = True,
315
+ compression_manager: Optional[Any] = None,
315
316
  ) -> ModelResponse:
316
317
  """
317
318
  Generate a response from the model.
@@ -348,6 +349,8 @@ class Model(ABC):
348
349
  _tool_dicts = self._format_tools(tools) if tools is not None else []
349
350
  _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
350
351
 
352
+ _compress_tool_results = compression_manager is not None and compression_manager.compress_tool_results
353
+
351
354
  while True:
352
355
  # Get response from model
353
356
  assistant_message = Message(role=self.assistant_message_role)
@@ -359,13 +362,14 @@ class Model(ABC):
359
362
  tools=_tool_dicts,
360
363
  tool_choice=tool_choice or self._tool_choice,
361
364
  run_response=run_response,
365
+ compress_tool_results=_compress_tool_results,
362
366
  )
363
367
 
364
368
  # Add assistant message to messages
365
369
  messages.append(assistant_message)
366
370
 
367
371
  # Log response and metrics
368
- assistant_message.log(metrics=True)
372
+ assistant_message.log(metrics=True, use_compressed_content=_compress_tool_results)
369
373
 
370
374
  # Handle tool calls if present
371
375
  if assistant_message.tool_calls:
@@ -433,9 +437,17 @@ class Model(ABC):
433
437
  # Add a function call for each successful execution
434
438
  function_call_count += len(function_call_results)
435
439
 
440
+ all_messages = messages + function_call_results
441
+ # Compress tool results
442
+ if compression_manager and compression_manager.should_compress(all_messages):
443
+ compression_manager.compress(all_messages)
444
+
436
445
  # Format and add results to messages
437
446
  self.format_function_call_results(
438
- messages=messages, function_call_results=function_call_results, **model_response.extra or {}
447
+ messages=messages,
448
+ function_call_results=function_call_results,
449
+ compress_tool_results=_compress_tool_results,
450
+ **model_response.extra or {},
439
451
  )
440
452
 
441
453
  if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
@@ -447,7 +459,7 @@ class Model(ABC):
447
459
  )
448
460
 
449
461
  for function_call_result in function_call_results:
450
- function_call_result.log(metrics=True)
462
+ function_call_result.log(metrics=True, use_compressed_content=_compress_tool_results)
451
463
 
452
464
  # Check if we should stop after tool calls
453
465
  if any(m.stop_after_tool_call for m in function_call_results):
@@ -499,6 +511,7 @@ class Model(ABC):
499
511
  tool_call_limit: Optional[int] = None,
500
512
  run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
501
513
  send_media_to_model: bool = True,
514
+ compression_manager: Optional[Any] = None,
502
515
  ) -> ModelResponse:
503
516
  """
504
517
  Generate an asynchronous response from the model.
@@ -523,6 +536,8 @@ class Model(ABC):
523
536
  _tool_dicts = self._format_tools(tools) if tools is not None else []
524
537
  _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
525
538
 
539
+ _compress_tool_results = compression_manager is not None and compression_manager.compress_tool_results
540
+
526
541
  function_call_count = 0
527
542
 
528
543
  while True:
@@ -536,6 +551,7 @@ class Model(ABC):
536
551
  tools=_tool_dicts,
537
552
  tool_choice=tool_choice or self._tool_choice,
538
553
  run_response=run_response,
554
+ compress_tool_results=_compress_tool_results,
539
555
  )
540
556
 
541
557
  # Add assistant message to messages
@@ -609,9 +625,17 @@ class Model(ABC):
609
625
  # Add a function call for each successful execution
610
626
  function_call_count += len(function_call_results)
611
627
 
628
+ all_messages = messages + function_call_results
629
+ # Compress tool results
630
+ if compression_manager and compression_manager.should_compress(all_messages):
631
+ await compression_manager.acompress(all_messages)
632
+
612
633
  # Format and add results to messages
613
634
  self.format_function_call_results(
614
- messages=messages, function_call_results=function_call_results, **model_response.extra or {}
635
+ messages=messages,
636
+ function_call_results=function_call_results,
637
+ compress_tool_results=_compress_tool_results,
638
+ **model_response.extra or {},
615
639
  )
616
640
 
617
641
  if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
@@ -623,7 +647,7 @@ class Model(ABC):
623
647
  )
624
648
 
625
649
  for function_call_result in function_call_results:
626
- function_call_result.log(metrics=True)
650
+ function_call_result.log(metrics=True, use_compressed_content=_compress_tool_results)
627
651
 
628
652
  # Check if we should stop after tool calls
629
653
  if any(m.stop_after_tool_call for m in function_call_results):
@@ -675,6 +699,7 @@ class Model(ABC):
675
699
  tools: Optional[List[Dict[str, Any]]] = None,
676
700
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
677
701
  run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
702
+ compress_tool_results: bool = False,
678
703
  ) -> None:
679
704
  """
680
705
  Process a single model response and return the assistant message and whether to continue.
@@ -690,6 +715,7 @@ class Model(ABC):
690
715
  tools=tools,
691
716
  tool_choice=tool_choice or self._tool_choice,
692
717
  run_response=run_response,
718
+ compress_tool_results=compress_tool_results,
693
719
  )
694
720
 
695
721
  # Populate the assistant message
@@ -730,6 +756,7 @@ class Model(ABC):
730
756
  tools: Optional[List[Dict[str, Any]]] = None,
731
757
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
732
758
  run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
759
+ compress_tool_results: bool = False,
733
760
  ) -> None:
734
761
  """
735
762
  Process a single async model response and return the assistant message and whether to continue.
@@ -745,6 +772,7 @@ class Model(ABC):
745
772
  tool_choice=tool_choice or self._tool_choice,
746
773
  assistant_message=assistant_message,
747
774
  run_response=run_response,
775
+ compress_tool_results=compress_tool_results,
748
776
  )
749
777
 
750
778
  # Populate the assistant message
@@ -855,6 +883,7 @@ class Model(ABC):
855
883
  tools: Optional[List[Dict[str, Any]]] = None,
856
884
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
857
885
  run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
886
+ compress_tool_results: bool = False,
858
887
  ) -> Iterator[ModelResponse]:
859
888
  """
860
889
  Process a streaming response from the model.
@@ -867,6 +896,7 @@ class Model(ABC):
867
896
  tools=tools,
868
897
  tool_choice=tool_choice or self._tool_choice,
869
898
  run_response=run_response,
899
+ compress_tool_results=compress_tool_results,
870
900
  ):
871
901
  for model_response_delta in self._populate_stream_data(
872
902
  stream_data=stream_data,
@@ -887,6 +917,7 @@ class Model(ABC):
887
917
  stream_model_response: bool = True,
888
918
  run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
889
919
  send_media_to_model: bool = True,
920
+ compression_manager: Optional[Any] = None,
890
921
  ) -> Iterator[Union[ModelResponse, RunOutputEvent, TeamRunOutputEvent]]:
891
922
  """
892
923
  Generate a streaming response from the model.
@@ -919,6 +950,8 @@ class Model(ABC):
919
950
  _tool_dicts = self._format_tools(tools) if tools is not None else []
920
951
  _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
921
952
 
953
+ _compress_tool_results = compression_manager is not None and compression_manager.compress_tool_results
954
+
922
955
  function_call_count = 0
923
956
 
924
957
  while True:
@@ -936,6 +969,7 @@ class Model(ABC):
936
969
  tools=_tool_dicts,
937
970
  tool_choice=tool_choice or self._tool_choice,
938
971
  run_response=run_response,
972
+ compress_tool_results=_compress_tool_results,
939
973
  ):
940
974
  if self.cache_response and isinstance(response, ModelResponse):
941
975
  streaming_responses.append(response)
@@ -949,6 +983,8 @@ class Model(ABC):
949
983
  response_format=response_format,
950
984
  tools=_tool_dicts,
951
985
  tool_choice=tool_choice or self._tool_choice,
986
+ run_response=run_response,
987
+ compress_tool_results=_compress_tool_results,
952
988
  )
953
989
  if self.cache_response:
954
990
  streaming_responses.append(model_response)
@@ -980,18 +1016,31 @@ class Model(ABC):
980
1016
  # Add a function call for each successful execution
981
1017
  function_call_count += len(function_call_results)
982
1018
 
1019
+ all_messages = messages + function_call_results
1020
+ # Compress tool results
1021
+ if compression_manager and compression_manager.should_compress(all_messages):
1022
+ compression_manager.compress(all_messages)
1023
+
983
1024
  # Format and add results to messages
984
1025
  if stream_data and stream_data.extra is not None:
985
1026
  self.format_function_call_results(
986
- messages=messages, function_call_results=function_call_results, **stream_data.extra
1027
+ messages=messages,
1028
+ function_call_results=function_call_results,
1029
+ compress_tool_results=_compress_tool_results,
1030
+ **stream_data.extra,
987
1031
  )
988
1032
  elif model_response and model_response.extra is not None:
989
1033
  self.format_function_call_results(
990
- messages=messages, function_call_results=function_call_results, **model_response.extra
1034
+ messages=messages,
1035
+ function_call_results=function_call_results,
1036
+ compress_tool_results=_compress_tool_results,
1037
+ **model_response.extra,
991
1038
  )
992
1039
  else:
993
1040
  self.format_function_call_results(
994
- messages=messages, function_call_results=function_call_results
1041
+ messages=messages,
1042
+ function_call_results=function_call_results,
1043
+ compress_tool_results=_compress_tool_results,
995
1044
  )
996
1045
 
997
1046
  # Handle function call media
@@ -1003,7 +1052,7 @@ class Model(ABC):
1003
1052
  )
1004
1053
 
1005
1054
  for function_call_result in function_call_results:
1006
- function_call_result.log(metrics=True)
1055
+ function_call_result.log(metrics=True, use_compressed_content=_compress_tool_results)
1007
1056
 
1008
1057
  # Check if we should stop after tool calls
1009
1058
  if any(m.stop_after_tool_call for m in function_call_results):
@@ -1053,6 +1102,7 @@ class Model(ABC):
1053
1102
  tools: Optional[List[Dict[str, Any]]] = None,
1054
1103
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
1055
1104
  run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
1105
+ compress_tool_results: bool = False,
1056
1106
  ) -> AsyncIterator[ModelResponse]:
1057
1107
  """
1058
1108
  Process a streaming response from the model.
@@ -1064,6 +1114,7 @@ class Model(ABC):
1064
1114
  tools=tools,
1065
1115
  tool_choice=tool_choice or self._tool_choice,
1066
1116
  run_response=run_response,
1117
+ compress_tool_results=compress_tool_results,
1067
1118
  ): # type: ignore
1068
1119
  for model_response_delta in self._populate_stream_data(
1069
1120
  stream_data=stream_data,
@@ -1084,6 +1135,7 @@ class Model(ABC):
1084
1135
  stream_model_response: bool = True,
1085
1136
  run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
1086
1137
  send_media_to_model: bool = True,
1138
+ compression_manager: Optional[Any] = None,
1087
1139
  ) -> AsyncIterator[Union[ModelResponse, RunOutputEvent, TeamRunOutputEvent]]:
1088
1140
  """
1089
1141
  Generate an asynchronous streaming response from the model.
@@ -1116,6 +1168,8 @@ class Model(ABC):
1116
1168
  _tool_dicts = self._format_tools(tools) if tools is not None else []
1117
1169
  _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
1118
1170
 
1171
+ _compress_tool_results = compression_manager is not None and compression_manager.compress_tool_results
1172
+
1119
1173
  function_call_count = 0
1120
1174
 
1121
1175
  while True:
@@ -1133,6 +1187,7 @@ class Model(ABC):
1133
1187
  tools=_tool_dicts,
1134
1188
  tool_choice=tool_choice or self._tool_choice,
1135
1189
  run_response=run_response,
1190
+ compress_tool_results=_compress_tool_results,
1136
1191
  ):
1137
1192
  if self.cache_response and isinstance(model_response, ModelResponse):
1138
1193
  streaming_responses.append(model_response)
@@ -1147,6 +1202,7 @@ class Model(ABC):
1147
1202
  tools=_tool_dicts,
1148
1203
  tool_choice=tool_choice or self._tool_choice,
1149
1204
  run_response=run_response,
1205
+ compress_tool_results=_compress_tool_results,
1150
1206
  )
1151
1207
  if self.cache_response:
1152
1208
  streaming_responses.append(model_response)
@@ -1178,18 +1234,31 @@ class Model(ABC):
1178
1234
  # Add a function call for each successful execution
1179
1235
  function_call_count += len(function_call_results)
1180
1236
 
1237
+ all_messages = messages + function_call_results
1238
+ # Compress tool results
1239
+ if compression_manager and compression_manager.should_compress(all_messages):
1240
+ await compression_manager.acompress(all_messages)
1241
+
1181
1242
  # Format and add results to messages
1182
1243
  if stream_data and stream_data.extra is not None:
1183
1244
  self.format_function_call_results(
1184
- messages=messages, function_call_results=function_call_results, **stream_data.extra
1245
+ messages=messages,
1246
+ function_call_results=function_call_results,
1247
+ compress_tool_results=_compress_tool_results,
1248
+ **stream_data.extra,
1185
1249
  )
1186
1250
  elif model_response and model_response.extra is not None:
1187
1251
  self.format_function_call_results(
1188
- messages=messages, function_call_results=function_call_results, **model_response.extra or {}
1252
+ messages=messages,
1253
+ function_call_results=function_call_results,
1254
+ compress_tool_results=_compress_tool_results,
1255
+ **model_response.extra or {},
1189
1256
  )
1190
1257
  else:
1191
1258
  self.format_function_call_results(
1192
- messages=messages, function_call_results=function_call_results
1259
+ messages=messages,
1260
+ function_call_results=function_call_results,
1261
+ compress_tool_results=_compress_tool_results,
1193
1262
  )
1194
1263
 
1195
1264
  # Handle function call media
@@ -1201,7 +1270,7 @@ class Model(ABC):
1201
1270
  )
1202
1271
 
1203
1272
  for function_call_result in function_call_results:
1204
- function_call_result.log(metrics=True)
1273
+ function_call_result.log(metrics=True, use_compressed_content=_compress_tool_results)
1205
1274
 
1206
1275
  # Check if we should stop after tool calls
1207
1276
  if any(m.stop_after_tool_call for m in function_call_results):
@@ -1490,11 +1559,15 @@ class Model(ABC):
1490
1559
 
1491
1560
  # Run function calls sequentially
1492
1561
  function_execution_result: FunctionExecutionResult = FunctionExecutionResult(status="failure")
1562
+ stop_after_tool_call_from_exception = False
1493
1563
  try:
1494
1564
  function_execution_result = function_call.execute()
1495
1565
  except AgentRunException as a_exc:
1496
1566
  # Update additional messages from function call
1497
1567
  _handle_agent_exception(a_exc, additional_input)
1568
+ # If stop_execution is True, mark that we should stop after this tool call
1569
+ if a_exc.stop_execution:
1570
+ stop_after_tool_call_from_exception = True
1498
1571
  # Set function call success to False if an exception occurred
1499
1572
  except Exception as e:
1500
1573
  log_error(f"Error executing function {function_call.function.name}: {e}")
@@ -1583,6 +1656,9 @@ class Model(ABC):
1583
1656
  timer=function_call_timer,
1584
1657
  function_execution_result=function_execution_result,
1585
1658
  )
1659
+ # Override stop_after_tool_call if set by exception
1660
+ if stop_after_tool_call_from_exception:
1661
+ function_call_result.stop_after_tool_call = True
1586
1662
  yield ModelResponse(
1587
1663
  content=f"{function_call.get_call_str()} completed in {function_call_timer.elapsed:.4f}s. ",
1588
1664
  tool_executions=[
@@ -2022,10 +2098,14 @@ class Model(ABC):
2022
2098
  updated_session_state = function_execution_result.updated_session_state
2023
2099
 
2024
2100
  # Handle AgentRunException
2101
+ stop_after_tool_call_from_exception = False
2025
2102
  if isinstance(function_call_success, AgentRunException):
2026
2103
  a_exc = function_call_success
2027
2104
  # Update additional messages from function call
2028
2105
  _handle_agent_exception(a_exc, additional_input)
2106
+ # If stop_execution is True, mark that we should stop after this tool call
2107
+ if a_exc.stop_execution:
2108
+ stop_after_tool_call_from_exception = True
2029
2109
  # Set function call success to False if an exception occurred
2030
2110
  function_call_success = False
2031
2111
 
@@ -2097,6 +2177,9 @@ class Model(ABC):
2097
2177
  timer=function_call_timer,
2098
2178
  function_execution_result=function_execution_result,
2099
2179
  )
2180
+ # Override stop_after_tool_call if set by exception
2181
+ if stop_after_tool_call_from_exception:
2182
+ function_call_result.stop_after_tool_call = True
2100
2183
  yield ModelResponse(
2101
2184
  content=f"{function_call.get_call_str()} completed in {function_call_timer.elapsed:.4f}s. ",
2102
2185
  tool_executions=[
@@ -2146,7 +2229,11 @@ class Model(ABC):
2146
2229
  return function_calls_to_run
2147
2230
 
2148
2231
  def format_function_call_results(
2149
- self, messages: List[Message], function_call_results: List[Message], **kwargs
2232
+ self,
2233
+ messages: List[Message],
2234
+ function_call_results: List[Message],
2235
+ compress_tool_results: bool = False,
2236
+ **kwargs,
2150
2237
  ) -> None:
2151
2238
  """
2152
2239
  Format function call results.
@@ -212,6 +212,7 @@ class Cerebras(Model):
212
212
  tools: Optional[List[Dict[str, Any]]] = None,
213
213
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
214
214
  run_response: Optional[RunOutput] = None,
215
+ compress_tool_results: bool = False,
215
216
  ) -> ModelResponse:
216
217
  """
217
218
  Send a chat completion request to the Cerebras API.
@@ -228,7 +229,7 @@ class Cerebras(Model):
228
229
  assistant_message.metrics.start_timer()
229
230
  provider_response = self.get_client().chat.completions.create(
230
231
  model=self.id,
231
- messages=[self._format_message(m) for m in messages], # type: ignore
232
+ messages=[self._format_message(m, compress_tool_results) for m in messages], # type: ignore
232
233
  **self.get_request_params(response_format=response_format, tools=tools),
233
234
  )
234
235
  assistant_message.metrics.stop_timer()
@@ -245,6 +246,7 @@ class Cerebras(Model):
245
246
  tools: Optional[List[Dict[str, Any]]] = None,
246
247
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
247
248
  run_response: Optional[RunOutput] = None,
249
+ compress_tool_results: bool = False,
248
250
  ) -> ModelResponse:
249
251
  """
250
252
  Sends an asynchronous chat completion request to the Cerebras API.
@@ -261,7 +263,7 @@ class Cerebras(Model):
261
263
  assistant_message.metrics.start_timer()
262
264
  provider_response = await self.get_async_client().chat.completions.create(
263
265
  model=self.id,
264
- messages=[self._format_message(m) for m in messages], # type: ignore
266
+ messages=[self._format_message(m, compress_tool_results) for m in messages], # type: ignore
265
267
  **self.get_request_params(response_format=response_format, tools=tools),
266
268
  )
267
269
  assistant_message.metrics.stop_timer()
@@ -278,6 +280,7 @@ class Cerebras(Model):
278
280
  tools: Optional[List[Dict[str, Any]]] = None,
279
281
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
280
282
  run_response: Optional[RunOutput] = None,
283
+ compress_tool_results: bool = False,
281
284
  ) -> Iterator[ModelResponse]:
282
285
  """
283
286
  Send a streaming chat completion request to the Cerebras API.
@@ -295,7 +298,7 @@ class Cerebras(Model):
295
298
 
296
299
  for chunk in self.get_client().chat.completions.create(
297
300
  model=self.id,
298
- messages=[self._format_message(m) for m in messages], # type: ignore
301
+ messages=[self._format_message(m, compress_tool_results) for m in messages], # type: ignore
299
302
  stream=True,
300
303
  **self.get_request_params(response_format=response_format, tools=tools),
301
304
  ):
@@ -311,6 +314,7 @@ class Cerebras(Model):
311
314
  tools: Optional[List[Dict[str, Any]]] = None,
312
315
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
313
316
  run_response: Optional[RunOutput] = None,
317
+ compress_tool_results: bool = False,
314
318
  ) -> AsyncIterator[ModelResponse]:
315
319
  """
316
320
  Sends an asynchronous streaming chat completion request to the Cerebras API.
@@ -328,7 +332,7 @@ class Cerebras(Model):
328
332
 
329
333
  async_stream = await self.get_async_client().chat.completions.create(
330
334
  model=self.id,
331
- messages=[self._format_message(m) for m in messages], # type: ignore
335
+ messages=[self._format_message(m, compress_tool_results) for m in messages], # type: ignore
332
336
  stream=True,
333
337
  **self.get_request_params(response_format=response_format, tools=tools),
334
338
  )
@@ -338,20 +342,27 @@ class Cerebras(Model):
338
342
 
339
343
  assistant_message.metrics.stop_timer()
340
344
 
341
- def _format_message(self, message: Message) -> Dict[str, Any]:
345
+ def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
342
346
  """
343
347
  Format a message into the format expected by the Cerebras API.
344
348
 
345
349
  Args:
346
350
  message (Message): The message to format.
351
+ compress_tool_results: Whether to compress tool results.
347
352
 
348
353
  Returns:
349
354
  Dict[str, Any]: The formatted message.
350
355
  """
356
+ # Use compressed content for tool messages if compression is active
357
+ if message.role == "tool":
358
+ content = message.get_content(use_compressed_content=compress_tool_results)
359
+ else:
360
+ content = message.content if message.content is not None else ""
361
+
351
362
  # Basic message content
352
363
  message_dict: Dict[str, Any] = {
353
364
  "role": message.role,
354
- "content": message.content if message.content is not None else "",
365
+ "content": content,
355
366
  }
356
367
 
357
368
  # Add name if present
@@ -380,7 +391,7 @@ class Cerebras(Model):
380
391
  message_dict = {
381
392
  "role": "tool",
382
393
  "tool_call_id": message.tool_call_id,
383
- "content": message.content if message.content is not None else "",
394
+ "content": content,
384
395
  }
385
396
 
386
397
  # Ensure no None values in the message
@@ -61,7 +61,7 @@ class CerebrasOpenAI(OpenAILike):
61
61
  log_debug(f"Calling {self.provider} with request parameters: {request_params}", log_level=2)
62
62
  return request_params
63
63
 
64
- def _format_message(self, message: Message) -> Dict[str, Any]:
64
+ def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
65
65
  """
66
66
  Format a message into the format expected by the Cerebras API.
67
67
 
@@ -71,6 +71,7 @@ class CerebrasOpenAI(OpenAILike):
71
71
  Returns:
72
72
  Dict[str, Any]: The formatted message.
73
73
  """
74
+
74
75
  # Basic message content
75
76
  message_dict: Dict[str, Any] = {
76
77
  "role": message.role,
@@ -100,10 +101,11 @@ class CerebrasOpenAI(OpenAILike):
100
101
 
101
102
  # Handle tool responses
102
103
  if message.role == "tool" and message.tool_call_id:
104
+ content = message.get_content(use_compressed_content=compress_tool_results)
103
105
  message_dict = {
104
106
  "role": "tool",
105
107
  "tool_call_id": message.tool_call_id,
106
- "content": message.content if message.content is not None else "",
108
+ "content": content if message.content is not None else "",
107
109
  }
108
110
 
109
111
  # Ensure no None values in the message
@@ -181,6 +181,7 @@ class Cohere(Model):
181
181
  tools: Optional[List[Dict[str, Any]]] = None,
182
182
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
183
183
  run_response: Optional[RunOutput] = None,
184
+ compress_tool_results: bool = False,
184
185
  ) -> ModelResponse:
185
186
  """
186
187
  Invoke a non-streamed chat response from the Cohere API.
@@ -194,7 +195,7 @@ class Cohere(Model):
194
195
  assistant_message.metrics.start_timer()
195
196
  provider_response = self.get_client().chat(
196
197
  model=self.id,
197
- messages=format_messages(messages), # type: ignore
198
+ messages=format_messages(messages, compress_tool_results), # type: ignore
198
199
  **request_kwargs,
199
200
  ) # type: ignore
200
201
  assistant_message.metrics.stop_timer()
@@ -215,6 +216,7 @@ class Cohere(Model):
215
216
  tools: Optional[List[Dict[str, Any]]] = None,
216
217
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
217
218
  run_response: Optional[RunOutput] = None,
219
+ compress_tool_results: bool = False,
218
220
  ) -> Iterator[ModelResponse]:
219
221
  """
220
222
  Invoke a streamed chat response from the Cohere API.
@@ -231,7 +233,7 @@ class Cohere(Model):
231
233
 
232
234
  for response in self.get_client().chat_stream(
233
235
  model=self.id,
234
- messages=format_messages(messages), # type: ignore
236
+ messages=format_messages(messages, compress_tool_results), # type: ignore
235
237
  **request_kwargs,
236
238
  ):
237
239
  model_response, tool_use = self._parse_provider_response_delta(response, tool_use=tool_use)
@@ -251,6 +253,7 @@ class Cohere(Model):
251
253
  tools: Optional[List[Dict[str, Any]]] = None,
252
254
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
253
255
  run_response: Optional[RunOutput] = None,
256
+ compress_tool_results: bool = False,
254
257
  ) -> ModelResponse:
255
258
  """
256
259
  Asynchronously invoke a non-streamed chat response from the Cohere API.
@@ -264,7 +267,7 @@ class Cohere(Model):
264
267
  assistant_message.metrics.start_timer()
265
268
  provider_response = await self.get_async_client().chat(
266
269
  model=self.id,
267
- messages=format_messages(messages), # type: ignore
270
+ messages=format_messages(messages, compress_tool_results), # type: ignore
268
271
  **request_kwargs,
269
272
  )
270
273
  assistant_message.metrics.stop_timer()
@@ -285,6 +288,7 @@ class Cohere(Model):
285
288
  tools: Optional[List[Dict[str, Any]]] = None,
286
289
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
287
290
  run_response: Optional[RunOutput] = None,
291
+ compress_tool_results: bool = False,
288
292
  ) -> AsyncIterator[ModelResponse]:
289
293
  """
290
294
  Asynchronously invoke a streamed chat response from the Cohere API.
@@ -301,7 +305,7 @@ class Cohere(Model):
301
305
 
302
306
  async for response in self.get_async_client().chat_stream(
303
307
  model=self.id,
304
- messages=format_messages(messages), # type: ignore
308
+ messages=format_messages(messages, compress_tool_results), # type: ignore
305
309
  **request_kwargs,
306
310
  ):
307
311
  model_response, tool_use = self._parse_provider_response_delta(response, tool_use=tool_use)