agno 2.2.5__py3-none-any.whl → 2.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +500 -423
- agno/api/os.py +1 -1
- agno/culture/manager.py +12 -8
- agno/guardrails/prompt_injection.py +1 -0
- agno/knowledge/chunking/agentic.py +6 -2
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/knowledge.py +37 -5
- agno/memory/manager.py +9 -4
- agno/models/anthropic/claude.py +1 -2
- agno/models/azure/ai_foundry.py +31 -14
- agno/models/azure/openai_chat.py +12 -4
- agno/models/base.py +106 -65
- agno/models/cerebras/cerebras.py +11 -6
- agno/models/groq/groq.py +7 -4
- agno/models/meta/llama.py +12 -6
- agno/models/meta/llama_openai.py +5 -1
- agno/models/openai/chat.py +26 -17
- agno/models/openai/responses.py +11 -63
- agno/models/requesty/requesty.py +5 -2
- agno/models/utils.py +254 -8
- agno/models/vertexai/claude.py +9 -13
- agno/os/app.py +13 -12
- agno/os/routers/evals/evals.py +8 -8
- agno/os/routers/evals/utils.py +1 -0
- agno/os/schema.py +56 -38
- agno/os/utils.py +27 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +5 -0
- agno/run/base.py +18 -1
- agno/run/team.py +13 -9
- agno/run/workflow.py +39 -0
- agno/session/summary.py +8 -2
- agno/session/workflow.py +4 -3
- agno/team/team.py +302 -369
- agno/tools/exa.py +21 -16
- agno/tools/file.py +153 -25
- agno/tools/function.py +98 -17
- agno/tools/mcp/mcp.py +8 -1
- agno/tools/notion.py +204 -0
- agno/utils/agent.py +78 -0
- agno/utils/events.py +2 -0
- agno/utils/hooks.py +1 -1
- agno/utils/models/claude.py +25 -8
- agno/utils/print_response/workflow.py +115 -16
- agno/vectordb/__init__.py +2 -1
- agno/vectordb/milvus/milvus.py +5 -0
- agno/vectordb/redis/__init__.py +5 -0
- agno/vectordb/redis/redisdb.py +687 -0
- agno/workflow/__init__.py +2 -0
- agno/workflow/agent.py +299 -0
- agno/workflow/step.py +13 -2
- agno/workflow/workflow.py +969 -72
- {agno-2.2.5.dist-info → agno-2.2.7.dist-info}/METADATA +10 -3
- {agno-2.2.5.dist-info → agno-2.2.7.dist-info}/RECORD +57 -52
- {agno-2.2.5.dist-info → agno-2.2.7.dist-info}/WHEEL +0 -0
- {agno-2.2.5.dist-info → agno-2.2.7.dist-info}/licenses/LICENSE +0 -0
- {agno-2.2.5.dist-info → agno-2.2.7.dist-info}/top_level.txt +0 -0
agno/models/base.py
CHANGED
|
@@ -31,7 +31,8 @@ from agno.models.metrics import Metrics
|
|
|
31
31
|
from agno.models.response import ModelResponse, ModelResponseEvent, ToolExecution
|
|
32
32
|
from agno.run.agent import CustomEvent, RunContentEvent, RunOutput, RunOutputEvent
|
|
33
33
|
from agno.run.team import RunContentEvent as TeamRunContentEvent
|
|
34
|
-
from agno.run.team import TeamRunOutputEvent
|
|
34
|
+
from agno.run.team import TeamRunOutput, TeamRunOutputEvent
|
|
35
|
+
from agno.run.workflow import WorkflowRunOutputEvent
|
|
35
36
|
from agno.tools.function import Function, FunctionCall, FunctionExecutionResult, UserInputField
|
|
36
37
|
from agno.utils.log import log_debug, log_error, log_info, log_warning
|
|
37
38
|
from agno.utils.timer import Timer
|
|
@@ -52,6 +53,8 @@ class MessageData:
|
|
|
52
53
|
response_video: Optional[Video] = None
|
|
53
54
|
response_file: Optional[File] = None
|
|
54
55
|
|
|
56
|
+
response_metrics: Optional[Metrics] = None
|
|
57
|
+
|
|
55
58
|
# Data from the provider that we might need on subsequent messages
|
|
56
59
|
response_provider_data: Optional[Dict[str, Any]] = None
|
|
57
60
|
|
|
@@ -307,7 +310,7 @@ class Model(ABC):
|
|
|
307
310
|
tools: Optional[List[Union[Function, dict]]] = None,
|
|
308
311
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
309
312
|
tool_call_limit: Optional[int] = None,
|
|
310
|
-
run_response: Optional[RunOutput] = None,
|
|
313
|
+
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
311
314
|
send_media_to_model: bool = True,
|
|
312
315
|
) -> ModelResponse:
|
|
313
316
|
"""
|
|
@@ -481,6 +484,7 @@ class Model(ABC):
|
|
|
481
484
|
tools: Optional[List[Union[Function, dict]]] = None,
|
|
482
485
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
483
486
|
tool_call_limit: Optional[int] = None,
|
|
487
|
+
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
484
488
|
send_media_to_model: bool = True,
|
|
485
489
|
) -> ModelResponse:
|
|
486
490
|
"""
|
|
@@ -516,6 +520,7 @@ class Model(ABC):
|
|
|
516
520
|
response_format=response_format,
|
|
517
521
|
tools=_tool_dicts,
|
|
518
522
|
tool_choice=tool_choice or self._tool_choice,
|
|
523
|
+
run_response=run_response,
|
|
519
524
|
)
|
|
520
525
|
|
|
521
526
|
# Add assistant message to messages
|
|
@@ -643,7 +648,7 @@ class Model(ABC):
|
|
|
643
648
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
644
649
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
645
650
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
646
|
-
run_response: Optional[RunOutput] = None,
|
|
651
|
+
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
647
652
|
) -> None:
|
|
648
653
|
"""
|
|
649
654
|
Process a single model response and return the assistant message and whether to continue.
|
|
@@ -696,7 +701,7 @@ class Model(ABC):
|
|
|
696
701
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
697
702
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
698
703
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
699
|
-
run_response: Optional[RunOutput] = None,
|
|
704
|
+
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
700
705
|
) -> None:
|
|
701
706
|
"""
|
|
702
707
|
Process a single async model response and return the assistant message and whether to continue.
|
|
@@ -756,7 +761,6 @@ class Model(ABC):
|
|
|
756
761
|
Returns:
|
|
757
762
|
Message: The populated assistant message
|
|
758
763
|
"""
|
|
759
|
-
# Add role to assistant message
|
|
760
764
|
if provider_response.role is not None:
|
|
761
765
|
assistant_message.role = provider_response.role
|
|
762
766
|
|
|
@@ -820,7 +824,7 @@ class Model(ABC):
|
|
|
820
824
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
821
825
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
822
826
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
823
|
-
run_response: Optional[RunOutput] = None,
|
|
827
|
+
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
824
828
|
) -> Iterator[ModelResponse]:
|
|
825
829
|
"""
|
|
826
830
|
Process a streaming response from the model.
|
|
@@ -834,14 +838,14 @@ class Model(ABC):
|
|
|
834
838
|
tool_choice=tool_choice or self._tool_choice,
|
|
835
839
|
run_response=run_response,
|
|
836
840
|
):
|
|
837
|
-
|
|
841
|
+
for model_response_delta in self._populate_stream_data(
|
|
838
842
|
stream_data=stream_data,
|
|
839
|
-
assistant_message=assistant_message,
|
|
840
843
|
model_response_delta=response_delta,
|
|
841
|
-
)
|
|
844
|
+
):
|
|
845
|
+
yield model_response_delta
|
|
842
846
|
|
|
843
|
-
#
|
|
844
|
-
self.
|
|
847
|
+
# Populate assistant message from stream data after the stream ends
|
|
848
|
+
self._populate_assistant_message_from_stream_data(assistant_message=assistant_message, stream_data=stream_data)
|
|
845
849
|
|
|
846
850
|
def response_stream(
|
|
847
851
|
self,
|
|
@@ -851,7 +855,7 @@ class Model(ABC):
|
|
|
851
855
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
852
856
|
tool_call_limit: Optional[int] = None,
|
|
853
857
|
stream_model_response: bool = True,
|
|
854
|
-
run_response: Optional[RunOutput] = None,
|
|
858
|
+
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
855
859
|
send_media_to_model: bool = True,
|
|
856
860
|
) -> Iterator[Union[ModelResponse, RunOutputEvent, TeamRunOutputEvent]]:
|
|
857
861
|
"""
|
|
@@ -905,22 +909,6 @@ class Model(ABC):
|
|
|
905
909
|
streaming_responses.append(response)
|
|
906
910
|
yield response
|
|
907
911
|
|
|
908
|
-
# Populate assistant message from stream data
|
|
909
|
-
if stream_data.response_content:
|
|
910
|
-
assistant_message.content = stream_data.response_content
|
|
911
|
-
if stream_data.response_reasoning_content:
|
|
912
|
-
assistant_message.reasoning_content = stream_data.response_reasoning_content
|
|
913
|
-
if stream_data.response_redacted_reasoning_content:
|
|
914
|
-
assistant_message.redacted_reasoning_content = stream_data.response_redacted_reasoning_content
|
|
915
|
-
if stream_data.response_provider_data:
|
|
916
|
-
assistant_message.provider_data = stream_data.response_provider_data
|
|
917
|
-
if stream_data.response_citations:
|
|
918
|
-
assistant_message.citations = stream_data.response_citations
|
|
919
|
-
if stream_data.response_audio:
|
|
920
|
-
assistant_message.audio_output = stream_data.response_audio
|
|
921
|
-
if stream_data.response_tool_calls and len(stream_data.response_tool_calls) > 0:
|
|
922
|
-
assistant_message.tool_calls = self.parse_tool_calls(stream_data.response_tool_calls)
|
|
923
|
-
|
|
924
912
|
else:
|
|
925
913
|
self._process_model_response(
|
|
926
914
|
messages=messages,
|
|
@@ -1019,7 +1007,7 @@ class Model(ABC):
|
|
|
1019
1007
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
1020
1008
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
1021
1009
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
1022
|
-
run_response: Optional[RunOutput] = None,
|
|
1010
|
+
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
1023
1011
|
) -> AsyncIterator[ModelResponse]:
|
|
1024
1012
|
"""
|
|
1025
1013
|
Process a streaming response from the model.
|
|
@@ -1032,15 +1020,14 @@ class Model(ABC):
|
|
|
1032
1020
|
tool_choice=tool_choice or self._tool_choice,
|
|
1033
1021
|
run_response=run_response,
|
|
1034
1022
|
): # type: ignore
|
|
1035
|
-
for
|
|
1023
|
+
for model_response_delta in self._populate_stream_data(
|
|
1036
1024
|
stream_data=stream_data,
|
|
1037
|
-
assistant_message=assistant_message,
|
|
1038
1025
|
model_response_delta=response_delta,
|
|
1039
1026
|
):
|
|
1040
|
-
yield
|
|
1027
|
+
yield model_response_delta
|
|
1041
1028
|
|
|
1042
|
-
# Populate the
|
|
1043
|
-
self.
|
|
1029
|
+
# Populate assistant message from stream data after the stream ends
|
|
1030
|
+
self._populate_assistant_message_from_stream_data(assistant_message=assistant_message, stream_data=stream_data)
|
|
1044
1031
|
|
|
1045
1032
|
async def aresponse_stream(
|
|
1046
1033
|
self,
|
|
@@ -1050,7 +1037,7 @@ class Model(ABC):
|
|
|
1050
1037
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
1051
1038
|
tool_call_limit: Optional[int] = None,
|
|
1052
1039
|
stream_model_response: bool = True,
|
|
1053
|
-
run_response: Optional[RunOutput] = None,
|
|
1040
|
+
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
1054
1041
|
send_media_to_model: bool = True,
|
|
1055
1042
|
) -> AsyncIterator[Union[ModelResponse, RunOutputEvent, TeamRunOutputEvent]]:
|
|
1056
1043
|
"""
|
|
@@ -1104,20 +1091,6 @@ class Model(ABC):
|
|
|
1104
1091
|
streaming_responses.append(model_response)
|
|
1105
1092
|
yield model_response
|
|
1106
1093
|
|
|
1107
|
-
# Populate assistant message from stream data
|
|
1108
|
-
if stream_data.response_content:
|
|
1109
|
-
assistant_message.content = stream_data.response_content
|
|
1110
|
-
if stream_data.response_reasoning_content:
|
|
1111
|
-
assistant_message.reasoning_content = stream_data.response_reasoning_content
|
|
1112
|
-
if stream_data.response_redacted_reasoning_content:
|
|
1113
|
-
assistant_message.redacted_reasoning_content = stream_data.response_redacted_reasoning_content
|
|
1114
|
-
if stream_data.response_provider_data:
|
|
1115
|
-
assistant_message.provider_data = stream_data.response_provider_data
|
|
1116
|
-
if stream_data.response_audio:
|
|
1117
|
-
assistant_message.audio_output = stream_data.response_audio
|
|
1118
|
-
if stream_data.response_tool_calls and len(stream_data.response_tool_calls) > 0:
|
|
1119
|
-
assistant_message.tool_calls = self.parse_tool_calls(stream_data.response_tool_calls)
|
|
1120
|
-
|
|
1121
1094
|
else:
|
|
1122
1095
|
await self._aprocess_model_response(
|
|
1123
1096
|
messages=messages,
|
|
@@ -1209,15 +1182,51 @@ class Model(ABC):
|
|
|
1209
1182
|
if self.cache_response and cache_key and streaming_responses:
|
|
1210
1183
|
self._save_streaming_responses_to_cache(cache_key, streaming_responses)
|
|
1211
1184
|
|
|
1212
|
-
def
|
|
1213
|
-
self,
|
|
1185
|
+
def _populate_assistant_message_from_stream_data(
|
|
1186
|
+
self, assistant_message: Message, stream_data: MessageData
|
|
1187
|
+
) -> None:
|
|
1188
|
+
"""
|
|
1189
|
+
Populate an assistant message with the stream data.
|
|
1190
|
+
"""
|
|
1191
|
+
if stream_data.response_role is not None:
|
|
1192
|
+
assistant_message.role = stream_data.response_role
|
|
1193
|
+
if stream_data.response_metrics is not None:
|
|
1194
|
+
assistant_message.metrics = stream_data.response_metrics
|
|
1195
|
+
if stream_data.response_content:
|
|
1196
|
+
assistant_message.content = stream_data.response_content
|
|
1197
|
+
if stream_data.response_reasoning_content:
|
|
1198
|
+
assistant_message.reasoning_content = stream_data.response_reasoning_content
|
|
1199
|
+
if stream_data.response_redacted_reasoning_content:
|
|
1200
|
+
assistant_message.redacted_reasoning_content = stream_data.response_redacted_reasoning_content
|
|
1201
|
+
if stream_data.response_provider_data:
|
|
1202
|
+
assistant_message.provider_data = stream_data.response_provider_data
|
|
1203
|
+
if stream_data.response_citations:
|
|
1204
|
+
assistant_message.citations = stream_data.response_citations
|
|
1205
|
+
if stream_data.response_audio:
|
|
1206
|
+
assistant_message.audio_output = stream_data.response_audio
|
|
1207
|
+
if stream_data.response_image:
|
|
1208
|
+
assistant_message.image_output = stream_data.response_image
|
|
1209
|
+
if stream_data.response_video:
|
|
1210
|
+
assistant_message.video_output = stream_data.response_video
|
|
1211
|
+
if stream_data.response_file:
|
|
1212
|
+
assistant_message.file_output = stream_data.response_file
|
|
1213
|
+
if stream_data.response_tool_calls and len(stream_data.response_tool_calls) > 0:
|
|
1214
|
+
assistant_message.tool_calls = self.parse_tool_calls(stream_data.response_tool_calls)
|
|
1215
|
+
|
|
1216
|
+
def _populate_stream_data(
|
|
1217
|
+
self, stream_data: MessageData, model_response_delta: ModelResponse
|
|
1214
1218
|
) -> Iterator[ModelResponse]:
|
|
1215
1219
|
"""Update the stream data and assistant message with the model response."""
|
|
1216
|
-
# Add role to assistant message
|
|
1217
|
-
if model_response_delta.role is not None:
|
|
1218
|
-
assistant_message.role = model_response_delta.role
|
|
1219
1220
|
|
|
1220
1221
|
should_yield = False
|
|
1222
|
+
if model_response_delta.role is not None:
|
|
1223
|
+
stream_data.response_role = model_response_delta.role # type: ignore
|
|
1224
|
+
|
|
1225
|
+
if model_response_delta.response_usage is not None:
|
|
1226
|
+
if stream_data.response_metrics is None:
|
|
1227
|
+
stream_data.response_metrics = Metrics()
|
|
1228
|
+
stream_data.response_metrics += model_response_delta.response_usage
|
|
1229
|
+
|
|
1221
1230
|
# Update stream_data content
|
|
1222
1231
|
if model_response_delta.content is not None:
|
|
1223
1232
|
stream_data.response_content += model_response_delta.content
|
|
@@ -1440,11 +1449,13 @@ class Model(ABC):
|
|
|
1440
1449
|
|
|
1441
1450
|
if isinstance(function_execution_result.result, (GeneratorType, collections.abc.Iterator)):
|
|
1442
1451
|
for item in function_execution_result.result:
|
|
1443
|
-
# This function yields agent/team run events
|
|
1444
|
-
if
|
|
1445
|
-
item, tuple(get_args(
|
|
1452
|
+
# This function yields agent/team/workflow run events
|
|
1453
|
+
if (
|
|
1454
|
+
isinstance(item, tuple(get_args(RunOutputEvent)))
|
|
1455
|
+
or isinstance(item, tuple(get_args(TeamRunOutputEvent)))
|
|
1456
|
+
or isinstance(item, tuple(get_args(WorkflowRunOutputEvent)))
|
|
1446
1457
|
):
|
|
1447
|
-
# We only capture content events
|
|
1458
|
+
# We only capture content events for output accumulation
|
|
1448
1459
|
if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
|
|
1449
1460
|
if item.content is not None and isinstance(item.content, BaseModel):
|
|
1450
1461
|
function_call_output += item.content.model_dump_json()
|
|
@@ -1458,6 +1469,16 @@ class Model(ABC):
|
|
|
1458
1469
|
if isinstance(item, CustomEvent):
|
|
1459
1470
|
function_call_output += str(item)
|
|
1460
1471
|
|
|
1472
|
+
# For WorkflowCompletedEvent, extract content for final output
|
|
1473
|
+
from agno.run.workflow import WorkflowCompletedEvent
|
|
1474
|
+
|
|
1475
|
+
if isinstance(item, WorkflowCompletedEvent):
|
|
1476
|
+
if item.content is not None:
|
|
1477
|
+
if isinstance(item.content, BaseModel):
|
|
1478
|
+
function_call_output += item.content.model_dump_json()
|
|
1479
|
+
else:
|
|
1480
|
+
function_call_output += str(item.content)
|
|
1481
|
+
|
|
1461
1482
|
# Yield the event itself to bubble it up
|
|
1462
1483
|
yield item
|
|
1463
1484
|
|
|
@@ -1829,9 +1850,12 @@ class Model(ABC):
|
|
|
1829
1850
|
|
|
1830
1851
|
try:
|
|
1831
1852
|
async for item in function_call.result:
|
|
1832
|
-
# This function yields agent/team run events
|
|
1833
|
-
if isinstance(
|
|
1834
|
-
item,
|
|
1853
|
+
# This function yields agent/team/workflow run events
|
|
1854
|
+
if isinstance(
|
|
1855
|
+
item,
|
|
1856
|
+
tuple(get_args(RunOutputEvent))
|
|
1857
|
+
+ tuple(get_args(TeamRunOutputEvent))
|
|
1858
|
+
+ tuple(get_args(WorkflowRunOutputEvent)),
|
|
1835
1859
|
):
|
|
1836
1860
|
# We only capture content events
|
|
1837
1861
|
if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
|
|
@@ -1848,6 +1872,16 @@ class Model(ABC):
|
|
|
1848
1872
|
if isinstance(item, CustomEvent):
|
|
1849
1873
|
function_call_output += str(item)
|
|
1850
1874
|
|
|
1875
|
+
# For WorkflowCompletedEvent, extract content for final output
|
|
1876
|
+
from agno.run.workflow import WorkflowCompletedEvent
|
|
1877
|
+
|
|
1878
|
+
if isinstance(item, WorkflowCompletedEvent):
|
|
1879
|
+
if item.content is not None:
|
|
1880
|
+
if isinstance(item.content, BaseModel):
|
|
1881
|
+
function_call_output += item.content.model_dump_json()
|
|
1882
|
+
else:
|
|
1883
|
+
function_call_output += str(item.content)
|
|
1884
|
+
|
|
1851
1885
|
# Put the event into the queue to be yielded
|
|
1852
1886
|
await event_queue.put(item)
|
|
1853
1887
|
|
|
@@ -1938,9 +1972,12 @@ class Model(ABC):
|
|
|
1938
1972
|
# Events from async generators were already yielded in real-time above
|
|
1939
1973
|
elif isinstance(function_call.result, (GeneratorType, collections.abc.Iterator)):
|
|
1940
1974
|
for item in function_call.result:
|
|
1941
|
-
# This function yields agent/team run events
|
|
1942
|
-
if isinstance(
|
|
1943
|
-
item,
|
|
1975
|
+
# This function yields agent/team/workflow run events
|
|
1976
|
+
if isinstance(
|
|
1977
|
+
item,
|
|
1978
|
+
tuple(get_args(RunOutputEvent))
|
|
1979
|
+
+ tuple(get_args(TeamRunOutputEvent))
|
|
1980
|
+
+ tuple(get_args(WorkflowRunOutputEvent)),
|
|
1944
1981
|
):
|
|
1945
1982
|
# We only capture content events
|
|
1946
1983
|
if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
|
|
@@ -2115,10 +2152,14 @@ class Model(ABC):
|
|
|
2115
2152
|
new_model = cls.__new__(cls)
|
|
2116
2153
|
memo[id(self)] = new_model
|
|
2117
2154
|
|
|
2118
|
-
# Deep copy all attributes
|
|
2155
|
+
# Deep copy all attributes except client objects
|
|
2119
2156
|
for k, v in self.__dict__.items():
|
|
2120
2157
|
if k in {"response_format", "_tools", "_functions"}:
|
|
2121
2158
|
continue
|
|
2159
|
+
# Skip client objects
|
|
2160
|
+
if k in {"client", "async_client", "http_client", "mistral_client", "model_client"}:
|
|
2161
|
+
setattr(new_model, k, None)
|
|
2162
|
+
continue
|
|
2122
2163
|
try:
|
|
2123
2164
|
setattr(new_model, k, deepcopy(v, memo))
|
|
2124
2165
|
except Exception:
|
agno/models/cerebras/cerebras.py
CHANGED
|
@@ -63,7 +63,7 @@ class Cerebras(Model):
|
|
|
63
63
|
max_retries: Optional[int] = None
|
|
64
64
|
default_headers: Optional[Any] = None
|
|
65
65
|
default_query: Optional[Any] = None
|
|
66
|
-
http_client: Optional[httpx.Client] = None
|
|
66
|
+
http_client: Optional[Union[httpx.Client, httpx.AsyncClient]] = None
|
|
67
67
|
client_params: Optional[Dict[str, Any]] = None
|
|
68
68
|
|
|
69
69
|
# Cerebras clients
|
|
@@ -102,12 +102,15 @@ class Cerebras(Model):
|
|
|
102
102
|
Returns:
|
|
103
103
|
CerebrasClient: An instance of the Cerebras client.
|
|
104
104
|
"""
|
|
105
|
-
if self.client:
|
|
105
|
+
if self.client and not self.client.is_closed():
|
|
106
106
|
return self.client
|
|
107
107
|
|
|
108
108
|
client_params: Dict[str, Any] = self._get_client_params()
|
|
109
|
-
if self.http_client
|
|
110
|
-
|
|
109
|
+
if self.http_client:
|
|
110
|
+
if isinstance(self.http_client, httpx.Client):
|
|
111
|
+
client_params["http_client"] = self.http_client
|
|
112
|
+
else:
|
|
113
|
+
log_debug("http_client is not an instance of httpx.Client.")
|
|
111
114
|
self.client = CerebrasClient(**client_params)
|
|
112
115
|
return self.client
|
|
113
116
|
|
|
@@ -118,13 +121,15 @@ class Cerebras(Model):
|
|
|
118
121
|
Returns:
|
|
119
122
|
AsyncCerebras: An instance of the asynchronous Cerebras client.
|
|
120
123
|
"""
|
|
121
|
-
if self.async_client:
|
|
124
|
+
if self.async_client and not self.async_client.is_closed():
|
|
122
125
|
return self.async_client
|
|
123
126
|
|
|
124
127
|
client_params: Dict[str, Any] = self._get_client_params()
|
|
125
|
-
if self.http_client:
|
|
128
|
+
if self.http_client and isinstance(self.http_client, httpx.AsyncClient):
|
|
126
129
|
client_params["http_client"] = self.http_client
|
|
127
130
|
else:
|
|
131
|
+
if self.http_client:
|
|
132
|
+
log_debug("The current http_client is not async. A default httpx.AsyncClient will be used instead.")
|
|
128
133
|
# Create a new async HTTP client with custom limits
|
|
129
134
|
client_params["http_client"] = httpx.AsyncClient(
|
|
130
135
|
limits=httpx.Limits(max_connections=1000, max_keepalive_connections=100)
|
agno/models/groq/groq.py
CHANGED
|
@@ -61,7 +61,7 @@ class Groq(Model):
|
|
|
61
61
|
max_retries: Optional[int] = None
|
|
62
62
|
default_headers: Optional[Any] = None
|
|
63
63
|
default_query: Optional[Any] = None
|
|
64
|
-
http_client: Optional[httpx.Client] = None
|
|
64
|
+
http_client: Optional[Union[httpx.Client, httpx.AsyncClient]] = None
|
|
65
65
|
client_params: Optional[Dict[str, Any]] = None
|
|
66
66
|
|
|
67
67
|
# Groq clients
|
|
@@ -115,18 +115,21 @@ class Groq(Model):
|
|
|
115
115
|
Returns:
|
|
116
116
|
AsyncGroqClient: An instance of the asynchronous Groq client.
|
|
117
117
|
"""
|
|
118
|
-
if self.async_client:
|
|
118
|
+
if self.async_client and not self.async_client.is_closed():
|
|
119
119
|
return self.async_client
|
|
120
120
|
|
|
121
121
|
client_params: Dict[str, Any] = self._get_client_params()
|
|
122
|
-
if self.http_client:
|
|
122
|
+
if self.http_client and isinstance(self.http_client, httpx.AsyncClient):
|
|
123
123
|
client_params["http_client"] = self.http_client
|
|
124
124
|
else:
|
|
125
|
+
if self.http_client:
|
|
126
|
+
log_debug("The current http_client is not async. A default httpx.AsyncClient will be used instead.")
|
|
125
127
|
# Create a new async HTTP client with custom limits
|
|
126
128
|
client_params["http_client"] = httpx.AsyncClient(
|
|
127
129
|
limits=httpx.Limits(max_connections=1000, max_keepalive_connections=100)
|
|
128
130
|
)
|
|
129
|
-
|
|
131
|
+
self.async_client = AsyncGroqClient(**client_params)
|
|
132
|
+
return self.async_client
|
|
130
133
|
|
|
131
134
|
def get_request_params(
|
|
132
135
|
self,
|
agno/models/meta/llama.py
CHANGED
|
@@ -61,7 +61,7 @@ class Llama(Model):
|
|
|
61
61
|
max_retries: Optional[int] = None
|
|
62
62
|
default_headers: Optional[Any] = None
|
|
63
63
|
default_query: Optional[Any] = None
|
|
64
|
-
http_client: Optional[httpx.Client] = None
|
|
64
|
+
http_client: Optional[Union[httpx.Client, httpx.AsyncClient]] = None
|
|
65
65
|
client_params: Optional[Dict[str, Any]] = None
|
|
66
66
|
|
|
67
67
|
# OpenAI clients
|
|
@@ -104,8 +104,11 @@ class Llama(Model):
|
|
|
104
104
|
return self.client
|
|
105
105
|
|
|
106
106
|
client_params: Dict[str, Any] = self._get_client_params()
|
|
107
|
-
if self.http_client
|
|
108
|
-
|
|
107
|
+
if self.http_client:
|
|
108
|
+
if isinstance(self.http_client, httpx.Client):
|
|
109
|
+
client_params["http_client"] = self.http_client
|
|
110
|
+
else:
|
|
111
|
+
log_debug("http_client is not an instance of httpx.Client.")
|
|
109
112
|
self.client = LlamaAPIClient(**client_params)
|
|
110
113
|
return self.client
|
|
111
114
|
|
|
@@ -116,18 +119,21 @@ class Llama(Model):
|
|
|
116
119
|
Returns:
|
|
117
120
|
AsyncLlamaAPIClient: An instance of the asynchronous Llama client.
|
|
118
121
|
"""
|
|
119
|
-
if self.async_client:
|
|
122
|
+
if self.async_client and not self.async_client.is_closed():
|
|
120
123
|
return self.async_client
|
|
121
124
|
|
|
122
125
|
client_params: Dict[str, Any] = self._get_client_params()
|
|
123
|
-
if self.http_client:
|
|
126
|
+
if self.http_client and isinstance(self.http_client, httpx.AsyncClient):
|
|
124
127
|
client_params["http_client"] = self.http_client
|
|
125
128
|
else:
|
|
129
|
+
if self.http_client:
|
|
130
|
+
log_debug("The current http_client is not async. A default httpx.AsyncClient will be used instead.")
|
|
126
131
|
# Create a new async HTTP client with custom limits
|
|
127
132
|
client_params["http_client"] = httpx.AsyncClient(
|
|
128
133
|
limits=httpx.Limits(max_connections=1000, max_keepalive_connections=100)
|
|
129
134
|
)
|
|
130
|
-
|
|
135
|
+
self.async_client = AsyncLlamaAPIClient(**client_params)
|
|
136
|
+
return self.async_client
|
|
131
137
|
|
|
132
138
|
def get_request_params(
|
|
133
139
|
self,
|
agno/models/meta/llama_openai.py
CHANGED
|
@@ -62,6 +62,9 @@ class LlamaOpenAI(OpenAILike):
|
|
|
62
62
|
|
|
63
63
|
def get_async_client(self):
|
|
64
64
|
"""Override to provide custom httpx client that properly handles redirects"""
|
|
65
|
+
if self.async_client and not self.async_client.is_closed():
|
|
66
|
+
return self.async_client
|
|
67
|
+
|
|
65
68
|
client_params = self._get_client_params()
|
|
66
69
|
|
|
67
70
|
# Llama gives a 307 redirect error, so we need to set up a custom client to allow redirects
|
|
@@ -71,4 +74,5 @@ class LlamaOpenAI(OpenAILike):
|
|
|
71
74
|
timeout=httpx.Timeout(30.0),
|
|
72
75
|
)
|
|
73
76
|
|
|
74
|
-
|
|
77
|
+
self.async_client = AsyncOpenAIClient(**client_params)
|
|
78
|
+
return self.async_client
|
agno/models/openai/chat.py
CHANGED
|
@@ -14,6 +14,7 @@ from agno.models.message import Message
|
|
|
14
14
|
from agno.models.metrics import Metrics
|
|
15
15
|
from agno.models.response import ModelResponse
|
|
16
16
|
from agno.run.agent import RunOutput
|
|
17
|
+
from agno.run.team import TeamRunOutput
|
|
17
18
|
from agno.utils.log import log_debug, log_error, log_warning
|
|
18
19
|
from agno.utils.openai import _format_file_for_message, audio_to_message, images_to_message
|
|
19
20
|
from agno.utils.reasoning import extract_thinking_content
|
|
@@ -81,6 +82,10 @@ class OpenAIChat(Model):
|
|
|
81
82
|
http_client: Optional[Union[httpx.Client, httpx.AsyncClient]] = None
|
|
82
83
|
client_params: Optional[Dict[str, Any]] = None
|
|
83
84
|
|
|
85
|
+
# OpenAI clients
|
|
86
|
+
client: Optional[OpenAIClient] = None
|
|
87
|
+
async_client: Optional[AsyncOpenAIClient] = None
|
|
88
|
+
|
|
84
89
|
# The role to map the message role to.
|
|
85
90
|
default_role_map = {
|
|
86
91
|
"system": "developer",
|
|
@@ -123,13 +128,18 @@ class OpenAIChat(Model):
|
|
|
123
128
|
Returns:
|
|
124
129
|
OpenAIClient: An instance of the OpenAI client.
|
|
125
130
|
"""
|
|
131
|
+
if self.client and not self.client.is_closed():
|
|
132
|
+
return self.client
|
|
133
|
+
|
|
126
134
|
client_params: Dict[str, Any] = self._get_client_params()
|
|
127
135
|
if self.http_client:
|
|
128
136
|
if isinstance(self.http_client, httpx.Client):
|
|
129
137
|
client_params["http_client"] = self.http_client
|
|
130
138
|
else:
|
|
131
|
-
|
|
132
|
-
|
|
139
|
+
log_debug("http_client is not an instance of httpx.Client.")
|
|
140
|
+
|
|
141
|
+
self.client = OpenAIClient(**client_params)
|
|
142
|
+
return self.client
|
|
133
143
|
|
|
134
144
|
def get_async_client(self) -> AsyncOpenAIClient:
|
|
135
145
|
"""
|
|
@@ -138,29 +148,28 @@ class OpenAIChat(Model):
|
|
|
138
148
|
Returns:
|
|
139
149
|
AsyncOpenAIClient: An instance of the asynchronous OpenAI client.
|
|
140
150
|
"""
|
|
151
|
+
if self.async_client and not self.async_client.is_closed():
|
|
152
|
+
return self.async_client
|
|
153
|
+
|
|
141
154
|
client_params: Dict[str, Any] = self._get_client_params()
|
|
142
|
-
if self.http_client:
|
|
143
|
-
|
|
144
|
-
client_params["http_client"] = self.http_client
|
|
145
|
-
else:
|
|
146
|
-
log_warning("http_client is not an instance of httpx.AsyncClient. Using default httpx.AsyncClient.")
|
|
147
|
-
# Create a new async HTTP client with custom limits
|
|
148
|
-
client_params["http_client"] = httpx.AsyncClient(
|
|
149
|
-
limits=httpx.Limits(max_connections=1000, max_keepalive_connections=100)
|
|
150
|
-
)
|
|
155
|
+
if self.http_client and isinstance(self.http_client, httpx.AsyncClient):
|
|
156
|
+
client_params["http_client"] = self.http_client
|
|
151
157
|
else:
|
|
158
|
+
if self.http_client:
|
|
159
|
+
log_debug("The current http_client is not async. A default httpx.AsyncClient will be used instead.")
|
|
152
160
|
# Create a new async HTTP client with custom limits
|
|
153
161
|
client_params["http_client"] = httpx.AsyncClient(
|
|
154
162
|
limits=httpx.Limits(max_connections=1000, max_keepalive_connections=100)
|
|
155
163
|
)
|
|
156
|
-
|
|
164
|
+
self.async_client = AsyncOpenAIClient(**client_params)
|
|
165
|
+
return self.async_client
|
|
157
166
|
|
|
158
167
|
def get_request_params(
|
|
159
168
|
self,
|
|
160
169
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
161
170
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
162
171
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
163
|
-
run_response: Optional[RunOutput] = None,
|
|
172
|
+
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
164
173
|
) -> Dict[str, Any]:
|
|
165
174
|
"""
|
|
166
175
|
Returns keyword arguments for API requests.
|
|
@@ -347,7 +356,7 @@ class OpenAIChat(Model):
|
|
|
347
356
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
348
357
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
349
358
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
350
|
-
run_response: Optional[RunOutput] = None,
|
|
359
|
+
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
351
360
|
) -> ModelResponse:
|
|
352
361
|
"""
|
|
353
362
|
Send a chat completion request to the OpenAI API and parse the response.
|
|
@@ -427,7 +436,7 @@ class OpenAIChat(Model):
|
|
|
427
436
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
428
437
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
429
438
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
430
|
-
run_response: Optional[RunOutput] = None,
|
|
439
|
+
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
431
440
|
) -> ModelResponse:
|
|
432
441
|
"""
|
|
433
442
|
Sends an asynchronous chat completion request to the OpenAI API.
|
|
@@ -506,7 +515,7 @@ class OpenAIChat(Model):
|
|
|
506
515
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
507
516
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
508
517
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
509
|
-
run_response: Optional[RunOutput] = None,
|
|
518
|
+
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
510
519
|
) -> Iterator[ModelResponse]:
|
|
511
520
|
"""
|
|
512
521
|
Send a streaming chat completion request to the OpenAI API.
|
|
@@ -582,7 +591,7 @@ class OpenAIChat(Model):
|
|
|
582
591
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
583
592
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
584
593
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
585
|
-
run_response: Optional[RunOutput] = None,
|
|
594
|
+
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
586
595
|
) -> AsyncIterator[ModelResponse]:
|
|
587
596
|
"""
|
|
588
597
|
Sends an asynchronous streaming chat completion request to the OpenAI API.
|