agno 2.3.2__py3-none-any.whl → 2.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +513 -185
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +176 -0
- agno/db/dynamo/dynamo.py +11 -0
- agno/db/firestore/firestore.py +5 -1
- agno/db/gcs_json/gcs_json_db.py +5 -2
- agno/db/in_memory/in_memory_db.py +5 -2
- agno/db/json/json_db.py +5 -1
- agno/db/migrations/manager.py +4 -4
- agno/db/mongo/async_mongo.py +158 -34
- agno/db/mongo/mongo.py +6 -2
- agno/db/mysql/mysql.py +48 -54
- agno/db/postgres/async_postgres.py +66 -52
- agno/db/postgres/postgres.py +42 -50
- agno/db/redis/redis.py +5 -0
- agno/db/redis/utils.py +5 -5
- agno/db/singlestore/singlestore.py +99 -108
- agno/db/sqlite/async_sqlite.py +29 -27
- agno/db/sqlite/sqlite.py +30 -26
- agno/knowledge/reader/pdf_reader.py +2 -2
- agno/knowledge/reader/tavily_reader.py +0 -1
- agno/memory/__init__.py +14 -1
- agno/memory/manager.py +217 -4
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +67 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/aimlapi.py +18 -0
- agno/models/anthropic/claude.py +87 -81
- agno/models/aws/bedrock.py +38 -16
- agno/models/aws/claude.py +97 -277
- agno/models/azure/ai_foundry.py +8 -4
- agno/models/base.py +101 -14
- agno/models/cerebras/cerebras.py +25 -9
- agno/models/cerebras/cerebras_openai.py +22 -2
- agno/models/cohere/chat.py +18 -6
- agno/models/cometapi/cometapi.py +19 -1
- agno/models/deepinfra/deepinfra.py +19 -1
- agno/models/fireworks/fireworks.py +19 -1
- agno/models/google/gemini.py +583 -21
- agno/models/groq/groq.py +23 -6
- agno/models/huggingface/huggingface.py +22 -7
- agno/models/ibm/watsonx.py +21 -7
- agno/models/internlm/internlm.py +19 -1
- agno/models/langdb/langdb.py +10 -0
- agno/models/litellm/chat.py +17 -7
- agno/models/litellm/litellm_openai.py +19 -1
- agno/models/message.py +19 -5
- agno/models/meta/llama.py +25 -5
- agno/models/meta/llama_openai.py +18 -0
- agno/models/mistral/mistral.py +13 -5
- agno/models/nvidia/nvidia.py +19 -1
- agno/models/ollama/chat.py +17 -6
- agno/models/openai/chat.py +22 -7
- agno/models/openai/responses.py +28 -10
- agno/models/openrouter/openrouter.py +20 -0
- agno/models/perplexity/perplexity.py +17 -0
- agno/models/requesty/requesty.py +18 -0
- agno/models/sambanova/sambanova.py +19 -1
- agno/models/siliconflow/siliconflow.py +19 -1
- agno/models/together/together.py +19 -1
- agno/models/vercel/v0.py +19 -1
- agno/models/vertexai/claude.py +99 -5
- agno/models/xai/xai.py +18 -0
- agno/os/interfaces/agui/router.py +1 -0
- agno/os/interfaces/agui/utils.py +97 -57
- agno/os/router.py +16 -0
- agno/os/routers/memory/memory.py +143 -0
- agno/os/routers/memory/schemas.py +26 -0
- agno/os/schema.py +33 -6
- agno/os/utils.py +134 -10
- agno/run/base.py +2 -1
- agno/run/workflow.py +1 -1
- agno/team/team.py +566 -219
- agno/tools/mcp/mcp.py +1 -1
- agno/utils/agent.py +119 -1
- agno/utils/models/ai_foundry.py +9 -2
- agno/utils/models/claude.py +12 -5
- agno/utils/models/cohere.py +9 -2
- agno/utils/models/llama.py +9 -2
- agno/utils/models/mistral.py +4 -2
- agno/utils/print_response/agent.py +37 -2
- agno/utils/print_response/team.py +52 -0
- agno/utils/tokens.py +41 -0
- agno/workflow/types.py +2 -2
- {agno-2.3.2.dist-info → agno-2.3.4.dist-info}/METADATA +45 -40
- {agno-2.3.2.dist-info → agno-2.3.4.dist-info}/RECORD +90 -83
- {agno-2.3.2.dist-info → agno-2.3.4.dist-info}/WHEEL +0 -0
- {agno-2.3.2.dist-info → agno-2.3.4.dist-info}/licenses/LICENSE +0 -0
- {agno-2.3.2.dist-info → agno-2.3.4.dist-info}/top_level.txt +0 -0
agno/models/base.py
CHANGED
|
@@ -312,6 +312,7 @@ class Model(ABC):
|
|
|
312
312
|
tool_call_limit: Optional[int] = None,
|
|
313
313
|
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
314
314
|
send_media_to_model: bool = True,
|
|
315
|
+
compression_manager: Optional[Any] = None,
|
|
315
316
|
) -> ModelResponse:
|
|
316
317
|
"""
|
|
317
318
|
Generate a response from the model.
|
|
@@ -348,6 +349,8 @@ class Model(ABC):
|
|
|
348
349
|
_tool_dicts = self._format_tools(tools) if tools is not None else []
|
|
349
350
|
_functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
|
|
350
351
|
|
|
352
|
+
_compress_tool_results = compression_manager is not None and compression_manager.compress_tool_results
|
|
353
|
+
|
|
351
354
|
while True:
|
|
352
355
|
# Get response from model
|
|
353
356
|
assistant_message = Message(role=self.assistant_message_role)
|
|
@@ -359,13 +362,14 @@ class Model(ABC):
|
|
|
359
362
|
tools=_tool_dicts,
|
|
360
363
|
tool_choice=tool_choice or self._tool_choice,
|
|
361
364
|
run_response=run_response,
|
|
365
|
+
compress_tool_results=_compress_tool_results,
|
|
362
366
|
)
|
|
363
367
|
|
|
364
368
|
# Add assistant message to messages
|
|
365
369
|
messages.append(assistant_message)
|
|
366
370
|
|
|
367
371
|
# Log response and metrics
|
|
368
|
-
assistant_message.log(metrics=True)
|
|
372
|
+
assistant_message.log(metrics=True, use_compressed_content=_compress_tool_results)
|
|
369
373
|
|
|
370
374
|
# Handle tool calls if present
|
|
371
375
|
if assistant_message.tool_calls:
|
|
@@ -433,9 +437,17 @@ class Model(ABC):
|
|
|
433
437
|
# Add a function call for each successful execution
|
|
434
438
|
function_call_count += len(function_call_results)
|
|
435
439
|
|
|
440
|
+
all_messages = messages + function_call_results
|
|
441
|
+
# Compress tool results
|
|
442
|
+
if compression_manager and compression_manager.should_compress(all_messages):
|
|
443
|
+
compression_manager.compress(all_messages)
|
|
444
|
+
|
|
436
445
|
# Format and add results to messages
|
|
437
446
|
self.format_function_call_results(
|
|
438
|
-
messages=messages,
|
|
447
|
+
messages=messages,
|
|
448
|
+
function_call_results=function_call_results,
|
|
449
|
+
compress_tool_results=_compress_tool_results,
|
|
450
|
+
**model_response.extra or {},
|
|
439
451
|
)
|
|
440
452
|
|
|
441
453
|
if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
|
|
@@ -447,7 +459,7 @@ class Model(ABC):
|
|
|
447
459
|
)
|
|
448
460
|
|
|
449
461
|
for function_call_result in function_call_results:
|
|
450
|
-
function_call_result.log(metrics=True)
|
|
462
|
+
function_call_result.log(metrics=True, use_compressed_content=_compress_tool_results)
|
|
451
463
|
|
|
452
464
|
# Check if we should stop after tool calls
|
|
453
465
|
if any(m.stop_after_tool_call for m in function_call_results):
|
|
@@ -499,6 +511,7 @@ class Model(ABC):
|
|
|
499
511
|
tool_call_limit: Optional[int] = None,
|
|
500
512
|
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
501
513
|
send_media_to_model: bool = True,
|
|
514
|
+
compression_manager: Optional[Any] = None,
|
|
502
515
|
) -> ModelResponse:
|
|
503
516
|
"""
|
|
504
517
|
Generate an asynchronous response from the model.
|
|
@@ -523,6 +536,8 @@ class Model(ABC):
|
|
|
523
536
|
_tool_dicts = self._format_tools(tools) if tools is not None else []
|
|
524
537
|
_functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
|
|
525
538
|
|
|
539
|
+
_compress_tool_results = compression_manager is not None and compression_manager.compress_tool_results
|
|
540
|
+
|
|
526
541
|
function_call_count = 0
|
|
527
542
|
|
|
528
543
|
while True:
|
|
@@ -536,6 +551,7 @@ class Model(ABC):
|
|
|
536
551
|
tools=_tool_dicts,
|
|
537
552
|
tool_choice=tool_choice or self._tool_choice,
|
|
538
553
|
run_response=run_response,
|
|
554
|
+
compress_tool_results=_compress_tool_results,
|
|
539
555
|
)
|
|
540
556
|
|
|
541
557
|
# Add assistant message to messages
|
|
@@ -609,9 +625,17 @@ class Model(ABC):
|
|
|
609
625
|
# Add a function call for each successful execution
|
|
610
626
|
function_call_count += len(function_call_results)
|
|
611
627
|
|
|
628
|
+
all_messages = messages + function_call_results
|
|
629
|
+
# Compress tool results
|
|
630
|
+
if compression_manager and compression_manager.should_compress(all_messages):
|
|
631
|
+
await compression_manager.acompress(all_messages)
|
|
632
|
+
|
|
612
633
|
# Format and add results to messages
|
|
613
634
|
self.format_function_call_results(
|
|
614
|
-
messages=messages,
|
|
635
|
+
messages=messages,
|
|
636
|
+
function_call_results=function_call_results,
|
|
637
|
+
compress_tool_results=_compress_tool_results,
|
|
638
|
+
**model_response.extra or {},
|
|
615
639
|
)
|
|
616
640
|
|
|
617
641
|
if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
|
|
@@ -623,7 +647,7 @@ class Model(ABC):
|
|
|
623
647
|
)
|
|
624
648
|
|
|
625
649
|
for function_call_result in function_call_results:
|
|
626
|
-
function_call_result.log(metrics=True)
|
|
650
|
+
function_call_result.log(metrics=True, use_compressed_content=_compress_tool_results)
|
|
627
651
|
|
|
628
652
|
# Check if we should stop after tool calls
|
|
629
653
|
if any(m.stop_after_tool_call for m in function_call_results):
|
|
@@ -675,6 +699,7 @@ class Model(ABC):
|
|
|
675
699
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
676
700
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
677
701
|
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
702
|
+
compress_tool_results: bool = False,
|
|
678
703
|
) -> None:
|
|
679
704
|
"""
|
|
680
705
|
Process a single model response and return the assistant message and whether to continue.
|
|
@@ -690,6 +715,7 @@ class Model(ABC):
|
|
|
690
715
|
tools=tools,
|
|
691
716
|
tool_choice=tool_choice or self._tool_choice,
|
|
692
717
|
run_response=run_response,
|
|
718
|
+
compress_tool_results=compress_tool_results,
|
|
693
719
|
)
|
|
694
720
|
|
|
695
721
|
# Populate the assistant message
|
|
@@ -730,6 +756,7 @@ class Model(ABC):
|
|
|
730
756
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
731
757
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
732
758
|
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
759
|
+
compress_tool_results: bool = False,
|
|
733
760
|
) -> None:
|
|
734
761
|
"""
|
|
735
762
|
Process a single async model response and return the assistant message and whether to continue.
|
|
@@ -745,6 +772,7 @@ class Model(ABC):
|
|
|
745
772
|
tool_choice=tool_choice or self._tool_choice,
|
|
746
773
|
assistant_message=assistant_message,
|
|
747
774
|
run_response=run_response,
|
|
775
|
+
compress_tool_results=compress_tool_results,
|
|
748
776
|
)
|
|
749
777
|
|
|
750
778
|
# Populate the assistant message
|
|
@@ -855,6 +883,7 @@ class Model(ABC):
|
|
|
855
883
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
856
884
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
857
885
|
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
886
|
+
compress_tool_results: bool = False,
|
|
858
887
|
) -> Iterator[ModelResponse]:
|
|
859
888
|
"""
|
|
860
889
|
Process a streaming response from the model.
|
|
@@ -867,6 +896,7 @@ class Model(ABC):
|
|
|
867
896
|
tools=tools,
|
|
868
897
|
tool_choice=tool_choice or self._tool_choice,
|
|
869
898
|
run_response=run_response,
|
|
899
|
+
compress_tool_results=compress_tool_results,
|
|
870
900
|
):
|
|
871
901
|
for model_response_delta in self._populate_stream_data(
|
|
872
902
|
stream_data=stream_data,
|
|
@@ -887,6 +917,7 @@ class Model(ABC):
|
|
|
887
917
|
stream_model_response: bool = True,
|
|
888
918
|
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
889
919
|
send_media_to_model: bool = True,
|
|
920
|
+
compression_manager: Optional[Any] = None,
|
|
890
921
|
) -> Iterator[Union[ModelResponse, RunOutputEvent, TeamRunOutputEvent]]:
|
|
891
922
|
"""
|
|
892
923
|
Generate a streaming response from the model.
|
|
@@ -919,6 +950,8 @@ class Model(ABC):
|
|
|
919
950
|
_tool_dicts = self._format_tools(tools) if tools is not None else []
|
|
920
951
|
_functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
|
|
921
952
|
|
|
953
|
+
_compress_tool_results = compression_manager is not None and compression_manager.compress_tool_results
|
|
954
|
+
|
|
922
955
|
function_call_count = 0
|
|
923
956
|
|
|
924
957
|
while True:
|
|
@@ -936,6 +969,7 @@ class Model(ABC):
|
|
|
936
969
|
tools=_tool_dicts,
|
|
937
970
|
tool_choice=tool_choice or self._tool_choice,
|
|
938
971
|
run_response=run_response,
|
|
972
|
+
compress_tool_results=_compress_tool_results,
|
|
939
973
|
):
|
|
940
974
|
if self.cache_response and isinstance(response, ModelResponse):
|
|
941
975
|
streaming_responses.append(response)
|
|
@@ -949,6 +983,8 @@ class Model(ABC):
|
|
|
949
983
|
response_format=response_format,
|
|
950
984
|
tools=_tool_dicts,
|
|
951
985
|
tool_choice=tool_choice or self._tool_choice,
|
|
986
|
+
run_response=run_response,
|
|
987
|
+
compress_tool_results=_compress_tool_results,
|
|
952
988
|
)
|
|
953
989
|
if self.cache_response:
|
|
954
990
|
streaming_responses.append(model_response)
|
|
@@ -980,18 +1016,31 @@ class Model(ABC):
|
|
|
980
1016
|
# Add a function call for each successful execution
|
|
981
1017
|
function_call_count += len(function_call_results)
|
|
982
1018
|
|
|
1019
|
+
all_messages = messages + function_call_results
|
|
1020
|
+
# Compress tool results
|
|
1021
|
+
if compression_manager and compression_manager.should_compress(all_messages):
|
|
1022
|
+
compression_manager.compress(all_messages)
|
|
1023
|
+
|
|
983
1024
|
# Format and add results to messages
|
|
984
1025
|
if stream_data and stream_data.extra is not None:
|
|
985
1026
|
self.format_function_call_results(
|
|
986
|
-
messages=messages,
|
|
1027
|
+
messages=messages,
|
|
1028
|
+
function_call_results=function_call_results,
|
|
1029
|
+
compress_tool_results=_compress_tool_results,
|
|
1030
|
+
**stream_data.extra,
|
|
987
1031
|
)
|
|
988
1032
|
elif model_response and model_response.extra is not None:
|
|
989
1033
|
self.format_function_call_results(
|
|
990
|
-
messages=messages,
|
|
1034
|
+
messages=messages,
|
|
1035
|
+
function_call_results=function_call_results,
|
|
1036
|
+
compress_tool_results=_compress_tool_results,
|
|
1037
|
+
**model_response.extra,
|
|
991
1038
|
)
|
|
992
1039
|
else:
|
|
993
1040
|
self.format_function_call_results(
|
|
994
|
-
messages=messages,
|
|
1041
|
+
messages=messages,
|
|
1042
|
+
function_call_results=function_call_results,
|
|
1043
|
+
compress_tool_results=_compress_tool_results,
|
|
995
1044
|
)
|
|
996
1045
|
|
|
997
1046
|
# Handle function call media
|
|
@@ -1003,7 +1052,7 @@ class Model(ABC):
|
|
|
1003
1052
|
)
|
|
1004
1053
|
|
|
1005
1054
|
for function_call_result in function_call_results:
|
|
1006
|
-
function_call_result.log(metrics=True)
|
|
1055
|
+
function_call_result.log(metrics=True, use_compressed_content=_compress_tool_results)
|
|
1007
1056
|
|
|
1008
1057
|
# Check if we should stop after tool calls
|
|
1009
1058
|
if any(m.stop_after_tool_call for m in function_call_results):
|
|
@@ -1053,6 +1102,7 @@ class Model(ABC):
|
|
|
1053
1102
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
1054
1103
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
1055
1104
|
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
1105
|
+
compress_tool_results: bool = False,
|
|
1056
1106
|
) -> AsyncIterator[ModelResponse]:
|
|
1057
1107
|
"""
|
|
1058
1108
|
Process a streaming response from the model.
|
|
@@ -1064,6 +1114,7 @@ class Model(ABC):
|
|
|
1064
1114
|
tools=tools,
|
|
1065
1115
|
tool_choice=tool_choice or self._tool_choice,
|
|
1066
1116
|
run_response=run_response,
|
|
1117
|
+
compress_tool_results=compress_tool_results,
|
|
1067
1118
|
): # type: ignore
|
|
1068
1119
|
for model_response_delta in self._populate_stream_data(
|
|
1069
1120
|
stream_data=stream_data,
|
|
@@ -1084,6 +1135,7 @@ class Model(ABC):
|
|
|
1084
1135
|
stream_model_response: bool = True,
|
|
1085
1136
|
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
1086
1137
|
send_media_to_model: bool = True,
|
|
1138
|
+
compression_manager: Optional[Any] = None,
|
|
1087
1139
|
) -> AsyncIterator[Union[ModelResponse, RunOutputEvent, TeamRunOutputEvent]]:
|
|
1088
1140
|
"""
|
|
1089
1141
|
Generate an asynchronous streaming response from the model.
|
|
@@ -1116,6 +1168,8 @@ class Model(ABC):
|
|
|
1116
1168
|
_tool_dicts = self._format_tools(tools) if tools is not None else []
|
|
1117
1169
|
_functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
|
|
1118
1170
|
|
|
1171
|
+
_compress_tool_results = compression_manager is not None and compression_manager.compress_tool_results
|
|
1172
|
+
|
|
1119
1173
|
function_call_count = 0
|
|
1120
1174
|
|
|
1121
1175
|
while True:
|
|
@@ -1133,6 +1187,7 @@ class Model(ABC):
|
|
|
1133
1187
|
tools=_tool_dicts,
|
|
1134
1188
|
tool_choice=tool_choice or self._tool_choice,
|
|
1135
1189
|
run_response=run_response,
|
|
1190
|
+
compress_tool_results=_compress_tool_results,
|
|
1136
1191
|
):
|
|
1137
1192
|
if self.cache_response and isinstance(model_response, ModelResponse):
|
|
1138
1193
|
streaming_responses.append(model_response)
|
|
@@ -1147,6 +1202,7 @@ class Model(ABC):
|
|
|
1147
1202
|
tools=_tool_dicts,
|
|
1148
1203
|
tool_choice=tool_choice or self._tool_choice,
|
|
1149
1204
|
run_response=run_response,
|
|
1205
|
+
compress_tool_results=_compress_tool_results,
|
|
1150
1206
|
)
|
|
1151
1207
|
if self.cache_response:
|
|
1152
1208
|
streaming_responses.append(model_response)
|
|
@@ -1178,18 +1234,31 @@ class Model(ABC):
|
|
|
1178
1234
|
# Add a function call for each successful execution
|
|
1179
1235
|
function_call_count += len(function_call_results)
|
|
1180
1236
|
|
|
1237
|
+
all_messages = messages + function_call_results
|
|
1238
|
+
# Compress tool results
|
|
1239
|
+
if compression_manager and compression_manager.should_compress(all_messages):
|
|
1240
|
+
await compression_manager.acompress(all_messages)
|
|
1241
|
+
|
|
1181
1242
|
# Format and add results to messages
|
|
1182
1243
|
if stream_data and stream_data.extra is not None:
|
|
1183
1244
|
self.format_function_call_results(
|
|
1184
|
-
messages=messages,
|
|
1245
|
+
messages=messages,
|
|
1246
|
+
function_call_results=function_call_results,
|
|
1247
|
+
compress_tool_results=_compress_tool_results,
|
|
1248
|
+
**stream_data.extra,
|
|
1185
1249
|
)
|
|
1186
1250
|
elif model_response and model_response.extra is not None:
|
|
1187
1251
|
self.format_function_call_results(
|
|
1188
|
-
messages=messages,
|
|
1252
|
+
messages=messages,
|
|
1253
|
+
function_call_results=function_call_results,
|
|
1254
|
+
compress_tool_results=_compress_tool_results,
|
|
1255
|
+
**model_response.extra or {},
|
|
1189
1256
|
)
|
|
1190
1257
|
else:
|
|
1191
1258
|
self.format_function_call_results(
|
|
1192
|
-
messages=messages,
|
|
1259
|
+
messages=messages,
|
|
1260
|
+
function_call_results=function_call_results,
|
|
1261
|
+
compress_tool_results=_compress_tool_results,
|
|
1193
1262
|
)
|
|
1194
1263
|
|
|
1195
1264
|
# Handle function call media
|
|
@@ -1201,7 +1270,7 @@ class Model(ABC):
|
|
|
1201
1270
|
)
|
|
1202
1271
|
|
|
1203
1272
|
for function_call_result in function_call_results:
|
|
1204
|
-
function_call_result.log(metrics=True)
|
|
1273
|
+
function_call_result.log(metrics=True, use_compressed_content=_compress_tool_results)
|
|
1205
1274
|
|
|
1206
1275
|
# Check if we should stop after tool calls
|
|
1207
1276
|
if any(m.stop_after_tool_call for m in function_call_results):
|
|
@@ -1490,11 +1559,15 @@ class Model(ABC):
|
|
|
1490
1559
|
|
|
1491
1560
|
# Run function calls sequentially
|
|
1492
1561
|
function_execution_result: FunctionExecutionResult = FunctionExecutionResult(status="failure")
|
|
1562
|
+
stop_after_tool_call_from_exception = False
|
|
1493
1563
|
try:
|
|
1494
1564
|
function_execution_result = function_call.execute()
|
|
1495
1565
|
except AgentRunException as a_exc:
|
|
1496
1566
|
# Update additional messages from function call
|
|
1497
1567
|
_handle_agent_exception(a_exc, additional_input)
|
|
1568
|
+
# If stop_execution is True, mark that we should stop after this tool call
|
|
1569
|
+
if a_exc.stop_execution:
|
|
1570
|
+
stop_after_tool_call_from_exception = True
|
|
1498
1571
|
# Set function call success to False if an exception occurred
|
|
1499
1572
|
except Exception as e:
|
|
1500
1573
|
log_error(f"Error executing function {function_call.function.name}: {e}")
|
|
@@ -1583,6 +1656,9 @@ class Model(ABC):
|
|
|
1583
1656
|
timer=function_call_timer,
|
|
1584
1657
|
function_execution_result=function_execution_result,
|
|
1585
1658
|
)
|
|
1659
|
+
# Override stop_after_tool_call if set by exception
|
|
1660
|
+
if stop_after_tool_call_from_exception:
|
|
1661
|
+
function_call_result.stop_after_tool_call = True
|
|
1586
1662
|
yield ModelResponse(
|
|
1587
1663
|
content=f"{function_call.get_call_str()} completed in {function_call_timer.elapsed:.4f}s. ",
|
|
1588
1664
|
tool_executions=[
|
|
@@ -2022,10 +2098,14 @@ class Model(ABC):
|
|
|
2022
2098
|
updated_session_state = function_execution_result.updated_session_state
|
|
2023
2099
|
|
|
2024
2100
|
# Handle AgentRunException
|
|
2101
|
+
stop_after_tool_call_from_exception = False
|
|
2025
2102
|
if isinstance(function_call_success, AgentRunException):
|
|
2026
2103
|
a_exc = function_call_success
|
|
2027
2104
|
# Update additional messages from function call
|
|
2028
2105
|
_handle_agent_exception(a_exc, additional_input)
|
|
2106
|
+
# If stop_execution is True, mark that we should stop after this tool call
|
|
2107
|
+
if a_exc.stop_execution:
|
|
2108
|
+
stop_after_tool_call_from_exception = True
|
|
2029
2109
|
# Set function call success to False if an exception occurred
|
|
2030
2110
|
function_call_success = False
|
|
2031
2111
|
|
|
@@ -2097,6 +2177,9 @@ class Model(ABC):
|
|
|
2097
2177
|
timer=function_call_timer,
|
|
2098
2178
|
function_execution_result=function_execution_result,
|
|
2099
2179
|
)
|
|
2180
|
+
# Override stop_after_tool_call if set by exception
|
|
2181
|
+
if stop_after_tool_call_from_exception:
|
|
2182
|
+
function_call_result.stop_after_tool_call = True
|
|
2100
2183
|
yield ModelResponse(
|
|
2101
2184
|
content=f"{function_call.get_call_str()} completed in {function_call_timer.elapsed:.4f}s. ",
|
|
2102
2185
|
tool_executions=[
|
|
@@ -2146,7 +2229,11 @@ class Model(ABC):
|
|
|
2146
2229
|
return function_calls_to_run
|
|
2147
2230
|
|
|
2148
2231
|
def format_function_call_results(
|
|
2149
|
-
self,
|
|
2232
|
+
self,
|
|
2233
|
+
messages: List[Message],
|
|
2234
|
+
function_call_results: List[Message],
|
|
2235
|
+
compress_tool_results: bool = False,
|
|
2236
|
+
**kwargs,
|
|
2150
2237
|
) -> None:
|
|
2151
2238
|
"""
|
|
2152
2239
|
Format function call results.
|
agno/models/cerebras/cerebras.py
CHANGED
|
@@ -7,13 +7,14 @@ from typing import Any, Dict, Iterator, List, Optional, Type, Union
|
|
|
7
7
|
import httpx
|
|
8
8
|
from pydantic import BaseModel
|
|
9
9
|
|
|
10
|
+
from agno.exceptions import ModelProviderError
|
|
10
11
|
from agno.models.base import Model
|
|
11
12
|
from agno.models.message import Message
|
|
12
13
|
from agno.models.metrics import Metrics
|
|
13
14
|
from agno.models.response import ModelResponse
|
|
14
15
|
from agno.run.agent import RunOutput
|
|
15
16
|
from agno.utils.http import get_default_async_client, get_default_sync_client
|
|
16
|
-
from agno.utils.log import log_debug,
|
|
17
|
+
from agno.utils.log import log_debug, log_warning
|
|
17
18
|
|
|
18
19
|
try:
|
|
19
20
|
from cerebras.cloud.sdk import AsyncCerebras as AsyncCerebrasClient
|
|
@@ -77,7 +78,11 @@ class Cerebras(Model):
|
|
|
77
78
|
if not self.api_key:
|
|
78
79
|
self.api_key = getenv("CEREBRAS_API_KEY")
|
|
79
80
|
if not self.api_key:
|
|
80
|
-
|
|
81
|
+
raise ModelProviderError(
|
|
82
|
+
message="CEREBRAS_API_KEY not set. Please set the CEREBRAS_API_KEY environment variable.",
|
|
83
|
+
model_name=self.name,
|
|
84
|
+
model_id=self.id,
|
|
85
|
+
)
|
|
81
86
|
|
|
82
87
|
# Define base client params
|
|
83
88
|
base_params = {
|
|
@@ -212,6 +217,7 @@ class Cerebras(Model):
|
|
|
212
217
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
213
218
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
214
219
|
run_response: Optional[RunOutput] = None,
|
|
220
|
+
compress_tool_results: bool = False,
|
|
215
221
|
) -> ModelResponse:
|
|
216
222
|
"""
|
|
217
223
|
Send a chat completion request to the Cerebras API.
|
|
@@ -228,7 +234,7 @@ class Cerebras(Model):
|
|
|
228
234
|
assistant_message.metrics.start_timer()
|
|
229
235
|
provider_response = self.get_client().chat.completions.create(
|
|
230
236
|
model=self.id,
|
|
231
|
-
messages=[self._format_message(m) for m in messages], # type: ignore
|
|
237
|
+
messages=[self._format_message(m, compress_tool_results) for m in messages], # type: ignore
|
|
232
238
|
**self.get_request_params(response_format=response_format, tools=tools),
|
|
233
239
|
)
|
|
234
240
|
assistant_message.metrics.stop_timer()
|
|
@@ -245,6 +251,7 @@ class Cerebras(Model):
|
|
|
245
251
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
246
252
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
247
253
|
run_response: Optional[RunOutput] = None,
|
|
254
|
+
compress_tool_results: bool = False,
|
|
248
255
|
) -> ModelResponse:
|
|
249
256
|
"""
|
|
250
257
|
Sends an asynchronous chat completion request to the Cerebras API.
|
|
@@ -261,7 +268,7 @@ class Cerebras(Model):
|
|
|
261
268
|
assistant_message.metrics.start_timer()
|
|
262
269
|
provider_response = await self.get_async_client().chat.completions.create(
|
|
263
270
|
model=self.id,
|
|
264
|
-
messages=[self._format_message(m) for m in messages], # type: ignore
|
|
271
|
+
messages=[self._format_message(m, compress_tool_results) for m in messages], # type: ignore
|
|
265
272
|
**self.get_request_params(response_format=response_format, tools=tools),
|
|
266
273
|
)
|
|
267
274
|
assistant_message.metrics.stop_timer()
|
|
@@ -278,6 +285,7 @@ class Cerebras(Model):
|
|
|
278
285
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
279
286
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
280
287
|
run_response: Optional[RunOutput] = None,
|
|
288
|
+
compress_tool_results: bool = False,
|
|
281
289
|
) -> Iterator[ModelResponse]:
|
|
282
290
|
"""
|
|
283
291
|
Send a streaming chat completion request to the Cerebras API.
|
|
@@ -295,7 +303,7 @@ class Cerebras(Model):
|
|
|
295
303
|
|
|
296
304
|
for chunk in self.get_client().chat.completions.create(
|
|
297
305
|
model=self.id,
|
|
298
|
-
messages=[self._format_message(m) for m in messages], # type: ignore
|
|
306
|
+
messages=[self._format_message(m, compress_tool_results) for m in messages], # type: ignore
|
|
299
307
|
stream=True,
|
|
300
308
|
**self.get_request_params(response_format=response_format, tools=tools),
|
|
301
309
|
):
|
|
@@ -311,6 +319,7 @@ class Cerebras(Model):
|
|
|
311
319
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
312
320
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
313
321
|
run_response: Optional[RunOutput] = None,
|
|
322
|
+
compress_tool_results: bool = False,
|
|
314
323
|
) -> AsyncIterator[ModelResponse]:
|
|
315
324
|
"""
|
|
316
325
|
Sends an asynchronous streaming chat completion request to the Cerebras API.
|
|
@@ -328,7 +337,7 @@ class Cerebras(Model):
|
|
|
328
337
|
|
|
329
338
|
async_stream = await self.get_async_client().chat.completions.create(
|
|
330
339
|
model=self.id,
|
|
331
|
-
messages=[self._format_message(m) for m in messages], # type: ignore
|
|
340
|
+
messages=[self._format_message(m, compress_tool_results) for m in messages], # type: ignore
|
|
332
341
|
stream=True,
|
|
333
342
|
**self.get_request_params(response_format=response_format, tools=tools),
|
|
334
343
|
)
|
|
@@ -338,20 +347,27 @@ class Cerebras(Model):
|
|
|
338
347
|
|
|
339
348
|
assistant_message.metrics.stop_timer()
|
|
340
349
|
|
|
341
|
-
def _format_message(self, message: Message) -> Dict[str, Any]:
|
|
350
|
+
def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
|
|
342
351
|
"""
|
|
343
352
|
Format a message into the format expected by the Cerebras API.
|
|
344
353
|
|
|
345
354
|
Args:
|
|
346
355
|
message (Message): The message to format.
|
|
356
|
+
compress_tool_results: Whether to compress tool results.
|
|
347
357
|
|
|
348
358
|
Returns:
|
|
349
359
|
Dict[str, Any]: The formatted message.
|
|
350
360
|
"""
|
|
361
|
+
# Use compressed content for tool messages if compression is active
|
|
362
|
+
if message.role == "tool":
|
|
363
|
+
content = message.get_content(use_compressed_content=compress_tool_results)
|
|
364
|
+
else:
|
|
365
|
+
content = message.content if message.content is not None else ""
|
|
366
|
+
|
|
351
367
|
# Basic message content
|
|
352
368
|
message_dict: Dict[str, Any] = {
|
|
353
369
|
"role": message.role,
|
|
354
|
-
"content":
|
|
370
|
+
"content": content,
|
|
355
371
|
}
|
|
356
372
|
|
|
357
373
|
# Add name if present
|
|
@@ -380,7 +396,7 @@ class Cerebras(Model):
|
|
|
380
396
|
message_dict = {
|
|
381
397
|
"role": "tool",
|
|
382
398
|
"tool_call_id": message.tool_call_id,
|
|
383
|
-
"content":
|
|
399
|
+
"content": content,
|
|
384
400
|
}
|
|
385
401
|
|
|
386
402
|
# Ensure no None values in the message
|
|
@@ -5,6 +5,7 @@ from typing import Any, Dict, List, Optional, Type, Union
|
|
|
5
5
|
|
|
6
6
|
from pydantic import BaseModel
|
|
7
7
|
|
|
8
|
+
from agno.exceptions import ModelProviderError
|
|
8
9
|
from agno.models.message import Message
|
|
9
10
|
from agno.models.openai.like import OpenAILike
|
|
10
11
|
from agno.utils.log import log_debug
|
|
@@ -20,6 +21,23 @@ class CerebrasOpenAI(OpenAILike):
|
|
|
20
21
|
base_url: str = "https://api.cerebras.ai/v1"
|
|
21
22
|
api_key: Optional[str] = field(default_factory=lambda: getenv("CEREBRAS_API_KEY", None))
|
|
22
23
|
|
|
24
|
+
def _get_client_params(self) -> Dict[str, Any]:
|
|
25
|
+
"""
|
|
26
|
+
Returns client parameters for API requests, checking for CEREBRAS_API_KEY.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
Dict[str, Any]: A dictionary of client parameters for API requests.
|
|
30
|
+
"""
|
|
31
|
+
if not self.api_key:
|
|
32
|
+
self.api_key = getenv("CEREBRAS_API_KEY")
|
|
33
|
+
if not self.api_key:
|
|
34
|
+
raise ModelProviderError(
|
|
35
|
+
message="CEREBRAS_API_KEY not set. Please set the CEREBRAS_API_KEY environment variable.",
|
|
36
|
+
model_name=self.name,
|
|
37
|
+
model_id=self.id,
|
|
38
|
+
)
|
|
39
|
+
return super()._get_client_params()
|
|
40
|
+
|
|
23
41
|
def get_request_params(
|
|
24
42
|
self,
|
|
25
43
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
@@ -61,7 +79,7 @@ class CerebrasOpenAI(OpenAILike):
|
|
|
61
79
|
log_debug(f"Calling {self.provider} with request parameters: {request_params}", log_level=2)
|
|
62
80
|
return request_params
|
|
63
81
|
|
|
64
|
-
def _format_message(self, message: Message) -> Dict[str, Any]:
|
|
82
|
+
def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
|
|
65
83
|
"""
|
|
66
84
|
Format a message into the format expected by the Cerebras API.
|
|
67
85
|
|
|
@@ -71,6 +89,7 @@ class CerebrasOpenAI(OpenAILike):
|
|
|
71
89
|
Returns:
|
|
72
90
|
Dict[str, Any]: The formatted message.
|
|
73
91
|
"""
|
|
92
|
+
|
|
74
93
|
# Basic message content
|
|
75
94
|
message_dict: Dict[str, Any] = {
|
|
76
95
|
"role": message.role,
|
|
@@ -100,10 +119,11 @@ class CerebrasOpenAI(OpenAILike):
|
|
|
100
119
|
|
|
101
120
|
# Handle tool responses
|
|
102
121
|
if message.role == "tool" and message.tool_call_id:
|
|
122
|
+
content = message.get_content(use_compressed_content=compress_tool_results)
|
|
103
123
|
message_dict = {
|
|
104
124
|
"role": "tool",
|
|
105
125
|
"tool_call_id": message.tool_call_id,
|
|
106
|
-
"content":
|
|
126
|
+
"content": content if message.content is not None else "",
|
|
107
127
|
}
|
|
108
128
|
|
|
109
129
|
# Ensure no None values in the message
|
agno/models/cohere/chat.py
CHANGED
|
@@ -65,7 +65,11 @@ class Cohere(Model):
|
|
|
65
65
|
|
|
66
66
|
self.api_key = self.api_key or getenv("CO_API_KEY")
|
|
67
67
|
if not self.api_key:
|
|
68
|
-
|
|
68
|
+
raise ModelProviderError(
|
|
69
|
+
message="CO_API_KEY not set. Please set the CO_API_KEY environment variable.",
|
|
70
|
+
model_name=self.name,
|
|
71
|
+
model_id=self.id,
|
|
72
|
+
)
|
|
69
73
|
|
|
70
74
|
_client_params["api_key"] = self.api_key
|
|
71
75
|
|
|
@@ -92,7 +96,11 @@ class Cohere(Model):
|
|
|
92
96
|
self.api_key = self.api_key or getenv("CO_API_KEY")
|
|
93
97
|
|
|
94
98
|
if not self.api_key:
|
|
95
|
-
|
|
99
|
+
raise ModelProviderError(
|
|
100
|
+
message="CO_API_KEY not set. Please set the CO_API_KEY environment variable.",
|
|
101
|
+
model_name=self.name,
|
|
102
|
+
model_id=self.id,
|
|
103
|
+
)
|
|
96
104
|
|
|
97
105
|
_client_params["api_key"] = self.api_key
|
|
98
106
|
|
|
@@ -181,6 +189,7 @@ class Cohere(Model):
|
|
|
181
189
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
182
190
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
183
191
|
run_response: Optional[RunOutput] = None,
|
|
192
|
+
compress_tool_results: bool = False,
|
|
184
193
|
) -> ModelResponse:
|
|
185
194
|
"""
|
|
186
195
|
Invoke a non-streamed chat response from the Cohere API.
|
|
@@ -194,7 +203,7 @@ class Cohere(Model):
|
|
|
194
203
|
assistant_message.metrics.start_timer()
|
|
195
204
|
provider_response = self.get_client().chat(
|
|
196
205
|
model=self.id,
|
|
197
|
-
messages=format_messages(messages), # type: ignore
|
|
206
|
+
messages=format_messages(messages, compress_tool_results), # type: ignore
|
|
198
207
|
**request_kwargs,
|
|
199
208
|
) # type: ignore
|
|
200
209
|
assistant_message.metrics.stop_timer()
|
|
@@ -215,6 +224,7 @@ class Cohere(Model):
|
|
|
215
224
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
216
225
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
217
226
|
run_response: Optional[RunOutput] = None,
|
|
227
|
+
compress_tool_results: bool = False,
|
|
218
228
|
) -> Iterator[ModelResponse]:
|
|
219
229
|
"""
|
|
220
230
|
Invoke a streamed chat response from the Cohere API.
|
|
@@ -231,7 +241,7 @@ class Cohere(Model):
|
|
|
231
241
|
|
|
232
242
|
for response in self.get_client().chat_stream(
|
|
233
243
|
model=self.id,
|
|
234
|
-
messages=format_messages(messages), # type: ignore
|
|
244
|
+
messages=format_messages(messages, compress_tool_results), # type: ignore
|
|
235
245
|
**request_kwargs,
|
|
236
246
|
):
|
|
237
247
|
model_response, tool_use = self._parse_provider_response_delta(response, tool_use=tool_use)
|
|
@@ -251,6 +261,7 @@ class Cohere(Model):
|
|
|
251
261
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
252
262
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
253
263
|
run_response: Optional[RunOutput] = None,
|
|
264
|
+
compress_tool_results: bool = False,
|
|
254
265
|
) -> ModelResponse:
|
|
255
266
|
"""
|
|
256
267
|
Asynchronously invoke a non-streamed chat response from the Cohere API.
|
|
@@ -264,7 +275,7 @@ class Cohere(Model):
|
|
|
264
275
|
assistant_message.metrics.start_timer()
|
|
265
276
|
provider_response = await self.get_async_client().chat(
|
|
266
277
|
model=self.id,
|
|
267
|
-
messages=format_messages(messages), # type: ignore
|
|
278
|
+
messages=format_messages(messages, compress_tool_results), # type: ignore
|
|
268
279
|
**request_kwargs,
|
|
269
280
|
)
|
|
270
281
|
assistant_message.metrics.stop_timer()
|
|
@@ -285,6 +296,7 @@ class Cohere(Model):
|
|
|
285
296
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
286
297
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
287
298
|
run_response: Optional[RunOutput] = None,
|
|
299
|
+
compress_tool_results: bool = False,
|
|
288
300
|
) -> AsyncIterator[ModelResponse]:
|
|
289
301
|
"""
|
|
290
302
|
Asynchronously invoke a streamed chat response from the Cohere API.
|
|
@@ -301,7 +313,7 @@ class Cohere(Model):
|
|
|
301
313
|
|
|
302
314
|
async for response in self.get_async_client().chat_stream(
|
|
303
315
|
model=self.id,
|
|
304
|
-
messages=format_messages(messages), # type: ignore
|
|
316
|
+
messages=format_messages(messages, compress_tool_results), # type: ignore
|
|
305
317
|
**request_kwargs,
|
|
306
318
|
):
|
|
307
319
|
model_response, tool_use = self._parse_provider_response_delta(response, tool_use=tool_use)
|