agno 2.2.5__py3-none-any.whl → 2.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. agno/agent/agent.py +500 -423
  2. agno/api/os.py +1 -1
  3. agno/culture/manager.py +12 -8
  4. agno/guardrails/prompt_injection.py +1 -0
  5. agno/knowledge/chunking/agentic.py +6 -2
  6. agno/knowledge/embedder/vllm.py +262 -0
  7. agno/knowledge/knowledge.py +37 -5
  8. agno/memory/manager.py +9 -4
  9. agno/models/anthropic/claude.py +1 -2
  10. agno/models/azure/ai_foundry.py +31 -14
  11. agno/models/azure/openai_chat.py +12 -4
  12. agno/models/base.py +106 -65
  13. agno/models/cerebras/cerebras.py +11 -6
  14. agno/models/groq/groq.py +7 -4
  15. agno/models/meta/llama.py +12 -6
  16. agno/models/meta/llama_openai.py +5 -1
  17. agno/models/openai/chat.py +26 -17
  18. agno/models/openai/responses.py +11 -63
  19. agno/models/requesty/requesty.py +5 -2
  20. agno/models/utils.py +254 -8
  21. agno/models/vertexai/claude.py +9 -13
  22. agno/os/app.py +13 -12
  23. agno/os/routers/evals/evals.py +8 -8
  24. agno/os/routers/evals/utils.py +1 -0
  25. agno/os/schema.py +56 -38
  26. agno/os/utils.py +27 -0
  27. agno/run/__init__.py +6 -0
  28. agno/run/agent.py +5 -0
  29. agno/run/base.py +18 -1
  30. agno/run/team.py +13 -9
  31. agno/run/workflow.py +39 -0
  32. agno/session/summary.py +8 -2
  33. agno/session/workflow.py +4 -3
  34. agno/team/team.py +302 -369
  35. agno/tools/exa.py +21 -16
  36. agno/tools/file.py +153 -25
  37. agno/tools/function.py +98 -17
  38. agno/tools/mcp/mcp.py +8 -1
  39. agno/tools/notion.py +204 -0
  40. agno/utils/agent.py +78 -0
  41. agno/utils/events.py +2 -0
  42. agno/utils/hooks.py +1 -1
  43. agno/utils/models/claude.py +25 -8
  44. agno/utils/print_response/workflow.py +115 -16
  45. agno/vectordb/__init__.py +2 -1
  46. agno/vectordb/milvus/milvus.py +5 -0
  47. agno/vectordb/redis/__init__.py +5 -0
  48. agno/vectordb/redis/redisdb.py +687 -0
  49. agno/workflow/__init__.py +2 -0
  50. agno/workflow/agent.py +299 -0
  51. agno/workflow/step.py +13 -2
  52. agno/workflow/workflow.py +969 -72
  53. {agno-2.2.5.dist-info → agno-2.2.7.dist-info}/METADATA +10 -3
  54. {agno-2.2.5.dist-info → agno-2.2.7.dist-info}/RECORD +57 -52
  55. {agno-2.2.5.dist-info → agno-2.2.7.dist-info}/WHEEL +0 -0
  56. {agno-2.2.5.dist-info → agno-2.2.7.dist-info}/licenses/LICENSE +0 -0
  57. {agno-2.2.5.dist-info → agno-2.2.7.dist-info}/top_level.txt +0 -0
agno/models/base.py CHANGED
@@ -31,7 +31,8 @@ from agno.models.metrics import Metrics
31
31
  from agno.models.response import ModelResponse, ModelResponseEvent, ToolExecution
32
32
  from agno.run.agent import CustomEvent, RunContentEvent, RunOutput, RunOutputEvent
33
33
  from agno.run.team import RunContentEvent as TeamRunContentEvent
34
- from agno.run.team import TeamRunOutputEvent
34
+ from agno.run.team import TeamRunOutput, TeamRunOutputEvent
35
+ from agno.run.workflow import WorkflowRunOutputEvent
35
36
  from agno.tools.function import Function, FunctionCall, FunctionExecutionResult, UserInputField
36
37
  from agno.utils.log import log_debug, log_error, log_info, log_warning
37
38
  from agno.utils.timer import Timer
@@ -52,6 +53,8 @@ class MessageData:
52
53
  response_video: Optional[Video] = None
53
54
  response_file: Optional[File] = None
54
55
 
56
+ response_metrics: Optional[Metrics] = None
57
+
55
58
  # Data from the provider that we might need on subsequent messages
56
59
  response_provider_data: Optional[Dict[str, Any]] = None
57
60
 
@@ -307,7 +310,7 @@ class Model(ABC):
307
310
  tools: Optional[List[Union[Function, dict]]] = None,
308
311
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
309
312
  tool_call_limit: Optional[int] = None,
310
- run_response: Optional[RunOutput] = None,
313
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
311
314
  send_media_to_model: bool = True,
312
315
  ) -> ModelResponse:
313
316
  """
@@ -481,6 +484,7 @@ class Model(ABC):
481
484
  tools: Optional[List[Union[Function, dict]]] = None,
482
485
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
483
486
  tool_call_limit: Optional[int] = None,
487
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
484
488
  send_media_to_model: bool = True,
485
489
  ) -> ModelResponse:
486
490
  """
@@ -516,6 +520,7 @@ class Model(ABC):
516
520
  response_format=response_format,
517
521
  tools=_tool_dicts,
518
522
  tool_choice=tool_choice or self._tool_choice,
523
+ run_response=run_response,
519
524
  )
520
525
 
521
526
  # Add assistant message to messages
@@ -643,7 +648,7 @@ class Model(ABC):
643
648
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
644
649
  tools: Optional[List[Dict[str, Any]]] = None,
645
650
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
646
- run_response: Optional[RunOutput] = None,
651
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
647
652
  ) -> None:
648
653
  """
649
654
  Process a single model response and return the assistant message and whether to continue.
@@ -696,7 +701,7 @@ class Model(ABC):
696
701
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
697
702
  tools: Optional[List[Dict[str, Any]]] = None,
698
703
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
699
- run_response: Optional[RunOutput] = None,
704
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
700
705
  ) -> None:
701
706
  """
702
707
  Process a single async model response and return the assistant message and whether to continue.
@@ -756,7 +761,6 @@ class Model(ABC):
756
761
  Returns:
757
762
  Message: The populated assistant message
758
763
  """
759
- # Add role to assistant message
760
764
  if provider_response.role is not None:
761
765
  assistant_message.role = provider_response.role
762
766
 
@@ -820,7 +824,7 @@ class Model(ABC):
820
824
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
821
825
  tools: Optional[List[Dict[str, Any]]] = None,
822
826
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
823
- run_response: Optional[RunOutput] = None,
827
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
824
828
  ) -> Iterator[ModelResponse]:
825
829
  """
826
830
  Process a streaming response from the model.
@@ -834,14 +838,14 @@ class Model(ABC):
834
838
  tool_choice=tool_choice or self._tool_choice,
835
839
  run_response=run_response,
836
840
  ):
837
- yield from self._populate_stream_data_and_assistant_message(
841
+ for model_response_delta in self._populate_stream_data(
838
842
  stream_data=stream_data,
839
- assistant_message=assistant_message,
840
843
  model_response_delta=response_delta,
841
- )
844
+ ):
845
+ yield model_response_delta
842
846
 
843
- # Add final metrics to assistant message
844
- self._populate_assistant_message(assistant_message=assistant_message, provider_response=response_delta)
847
+ # Populate assistant message from stream data after the stream ends
848
+ self._populate_assistant_message_from_stream_data(assistant_message=assistant_message, stream_data=stream_data)
845
849
 
846
850
  def response_stream(
847
851
  self,
@@ -851,7 +855,7 @@ class Model(ABC):
851
855
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
852
856
  tool_call_limit: Optional[int] = None,
853
857
  stream_model_response: bool = True,
854
- run_response: Optional[RunOutput] = None,
858
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
855
859
  send_media_to_model: bool = True,
856
860
  ) -> Iterator[Union[ModelResponse, RunOutputEvent, TeamRunOutputEvent]]:
857
861
  """
@@ -905,22 +909,6 @@ class Model(ABC):
905
909
  streaming_responses.append(response)
906
910
  yield response
907
911
 
908
- # Populate assistant message from stream data
909
- if stream_data.response_content:
910
- assistant_message.content = stream_data.response_content
911
- if stream_data.response_reasoning_content:
912
- assistant_message.reasoning_content = stream_data.response_reasoning_content
913
- if stream_data.response_redacted_reasoning_content:
914
- assistant_message.redacted_reasoning_content = stream_data.response_redacted_reasoning_content
915
- if stream_data.response_provider_data:
916
- assistant_message.provider_data = stream_data.response_provider_data
917
- if stream_data.response_citations:
918
- assistant_message.citations = stream_data.response_citations
919
- if stream_data.response_audio:
920
- assistant_message.audio_output = stream_data.response_audio
921
- if stream_data.response_tool_calls and len(stream_data.response_tool_calls) > 0:
922
- assistant_message.tool_calls = self.parse_tool_calls(stream_data.response_tool_calls)
923
-
924
912
  else:
925
913
  self._process_model_response(
926
914
  messages=messages,
@@ -1019,7 +1007,7 @@ class Model(ABC):
1019
1007
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
1020
1008
  tools: Optional[List[Dict[str, Any]]] = None,
1021
1009
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
1022
- run_response: Optional[RunOutput] = None,
1010
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
1023
1011
  ) -> AsyncIterator[ModelResponse]:
1024
1012
  """
1025
1013
  Process a streaming response from the model.
@@ -1032,15 +1020,14 @@ class Model(ABC):
1032
1020
  tool_choice=tool_choice or self._tool_choice,
1033
1021
  run_response=run_response,
1034
1022
  ): # type: ignore
1035
- for model_response in self._populate_stream_data_and_assistant_message(
1023
+ for model_response_delta in self._populate_stream_data(
1036
1024
  stream_data=stream_data,
1037
- assistant_message=assistant_message,
1038
1025
  model_response_delta=response_delta,
1039
1026
  ):
1040
- yield model_response
1027
+ yield model_response_delta
1041
1028
 
1042
- # Populate the assistant message
1043
- self._populate_assistant_message(assistant_message=assistant_message, provider_response=model_response)
1029
+ # Populate assistant message from stream data after the stream ends
1030
+ self._populate_assistant_message_from_stream_data(assistant_message=assistant_message, stream_data=stream_data)
1044
1031
 
1045
1032
  async def aresponse_stream(
1046
1033
  self,
@@ -1050,7 +1037,7 @@ class Model(ABC):
1050
1037
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
1051
1038
  tool_call_limit: Optional[int] = None,
1052
1039
  stream_model_response: bool = True,
1053
- run_response: Optional[RunOutput] = None,
1040
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
1054
1041
  send_media_to_model: bool = True,
1055
1042
  ) -> AsyncIterator[Union[ModelResponse, RunOutputEvent, TeamRunOutputEvent]]:
1056
1043
  """
@@ -1104,20 +1091,6 @@ class Model(ABC):
1104
1091
  streaming_responses.append(model_response)
1105
1092
  yield model_response
1106
1093
 
1107
- # Populate assistant message from stream data
1108
- if stream_data.response_content:
1109
- assistant_message.content = stream_data.response_content
1110
- if stream_data.response_reasoning_content:
1111
- assistant_message.reasoning_content = stream_data.response_reasoning_content
1112
- if stream_data.response_redacted_reasoning_content:
1113
- assistant_message.redacted_reasoning_content = stream_data.response_redacted_reasoning_content
1114
- if stream_data.response_provider_data:
1115
- assistant_message.provider_data = stream_data.response_provider_data
1116
- if stream_data.response_audio:
1117
- assistant_message.audio_output = stream_data.response_audio
1118
- if stream_data.response_tool_calls and len(stream_data.response_tool_calls) > 0:
1119
- assistant_message.tool_calls = self.parse_tool_calls(stream_data.response_tool_calls)
1120
-
1121
1094
  else:
1122
1095
  await self._aprocess_model_response(
1123
1096
  messages=messages,
@@ -1209,15 +1182,51 @@ class Model(ABC):
1209
1182
  if self.cache_response and cache_key and streaming_responses:
1210
1183
  self._save_streaming_responses_to_cache(cache_key, streaming_responses)
1211
1184
 
1212
- def _populate_stream_data_and_assistant_message(
1213
- self, stream_data: MessageData, assistant_message: Message, model_response_delta: ModelResponse
1185
+ def _populate_assistant_message_from_stream_data(
1186
+ self, assistant_message: Message, stream_data: MessageData
1187
+ ) -> None:
1188
+ """
1189
+ Populate an assistant message with the stream data.
1190
+ """
1191
+ if stream_data.response_role is not None:
1192
+ assistant_message.role = stream_data.response_role
1193
+ if stream_data.response_metrics is not None:
1194
+ assistant_message.metrics = stream_data.response_metrics
1195
+ if stream_data.response_content:
1196
+ assistant_message.content = stream_data.response_content
1197
+ if stream_data.response_reasoning_content:
1198
+ assistant_message.reasoning_content = stream_data.response_reasoning_content
1199
+ if stream_data.response_redacted_reasoning_content:
1200
+ assistant_message.redacted_reasoning_content = stream_data.response_redacted_reasoning_content
1201
+ if stream_data.response_provider_data:
1202
+ assistant_message.provider_data = stream_data.response_provider_data
1203
+ if stream_data.response_citations:
1204
+ assistant_message.citations = stream_data.response_citations
1205
+ if stream_data.response_audio:
1206
+ assistant_message.audio_output = stream_data.response_audio
1207
+ if stream_data.response_image:
1208
+ assistant_message.image_output = stream_data.response_image
1209
+ if stream_data.response_video:
1210
+ assistant_message.video_output = stream_data.response_video
1211
+ if stream_data.response_file:
1212
+ assistant_message.file_output = stream_data.response_file
1213
+ if stream_data.response_tool_calls and len(stream_data.response_tool_calls) > 0:
1214
+ assistant_message.tool_calls = self.parse_tool_calls(stream_data.response_tool_calls)
1215
+
1216
+ def _populate_stream_data(
1217
+ self, stream_data: MessageData, model_response_delta: ModelResponse
1214
1218
  ) -> Iterator[ModelResponse]:
1215
1219
  """Update the stream data and assistant message with the model response."""
1216
- # Add role to assistant message
1217
- if model_response_delta.role is not None:
1218
- assistant_message.role = model_response_delta.role
1219
1220
 
1220
1221
  should_yield = False
1222
+ if model_response_delta.role is not None:
1223
+ stream_data.response_role = model_response_delta.role # type: ignore
1224
+
1225
+ if model_response_delta.response_usage is not None:
1226
+ if stream_data.response_metrics is None:
1227
+ stream_data.response_metrics = Metrics()
1228
+ stream_data.response_metrics += model_response_delta.response_usage
1229
+
1221
1230
  # Update stream_data content
1222
1231
  if model_response_delta.content is not None:
1223
1232
  stream_data.response_content += model_response_delta.content
@@ -1440,11 +1449,13 @@ class Model(ABC):
1440
1449
 
1441
1450
  if isinstance(function_execution_result.result, (GeneratorType, collections.abc.Iterator)):
1442
1451
  for item in function_execution_result.result:
1443
- # This function yields agent/team run events
1444
- if isinstance(item, tuple(get_args(RunOutputEvent))) or isinstance(
1445
- item, tuple(get_args(TeamRunOutputEvent))
1452
+ # This function yields agent/team/workflow run events
1453
+ if (
1454
+ isinstance(item, tuple(get_args(RunOutputEvent)))
1455
+ or isinstance(item, tuple(get_args(TeamRunOutputEvent)))
1456
+ or isinstance(item, tuple(get_args(WorkflowRunOutputEvent)))
1446
1457
  ):
1447
- # We only capture content events
1458
+ # We only capture content events for output accumulation
1448
1459
  if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
1449
1460
  if item.content is not None and isinstance(item.content, BaseModel):
1450
1461
  function_call_output += item.content.model_dump_json()
@@ -1458,6 +1469,16 @@ class Model(ABC):
1458
1469
  if isinstance(item, CustomEvent):
1459
1470
  function_call_output += str(item)
1460
1471
 
1472
+ # For WorkflowCompletedEvent, extract content for final output
1473
+ from agno.run.workflow import WorkflowCompletedEvent
1474
+
1475
+ if isinstance(item, WorkflowCompletedEvent):
1476
+ if item.content is not None:
1477
+ if isinstance(item.content, BaseModel):
1478
+ function_call_output += item.content.model_dump_json()
1479
+ else:
1480
+ function_call_output += str(item.content)
1481
+
1461
1482
  # Yield the event itself to bubble it up
1462
1483
  yield item
1463
1484
 
@@ -1829,9 +1850,12 @@ class Model(ABC):
1829
1850
 
1830
1851
  try:
1831
1852
  async for item in function_call.result:
1832
- # This function yields agent/team run events
1833
- if isinstance(item, tuple(get_args(RunOutputEvent))) or isinstance(
1834
- item, tuple(get_args(TeamRunOutputEvent))
1853
+ # This function yields agent/team/workflow run events
1854
+ if isinstance(
1855
+ item,
1856
+ tuple(get_args(RunOutputEvent))
1857
+ + tuple(get_args(TeamRunOutputEvent))
1858
+ + tuple(get_args(WorkflowRunOutputEvent)),
1835
1859
  ):
1836
1860
  # We only capture content events
1837
1861
  if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
@@ -1848,6 +1872,16 @@ class Model(ABC):
1848
1872
  if isinstance(item, CustomEvent):
1849
1873
  function_call_output += str(item)
1850
1874
 
1875
+ # For WorkflowCompletedEvent, extract content for final output
1876
+ from agno.run.workflow import WorkflowCompletedEvent
1877
+
1878
+ if isinstance(item, WorkflowCompletedEvent):
1879
+ if item.content is not None:
1880
+ if isinstance(item.content, BaseModel):
1881
+ function_call_output += item.content.model_dump_json()
1882
+ else:
1883
+ function_call_output += str(item.content)
1884
+
1851
1885
  # Put the event into the queue to be yielded
1852
1886
  await event_queue.put(item)
1853
1887
 
@@ -1938,9 +1972,12 @@ class Model(ABC):
1938
1972
  # Events from async generators were already yielded in real-time above
1939
1973
  elif isinstance(function_call.result, (GeneratorType, collections.abc.Iterator)):
1940
1974
  for item in function_call.result:
1941
- # This function yields agent/team run events
1942
- if isinstance(item, tuple(get_args(RunOutputEvent))) or isinstance(
1943
- item, tuple(get_args(TeamRunOutputEvent))
1975
+ # This function yields agent/team/workflow run events
1976
+ if isinstance(
1977
+ item,
1978
+ tuple(get_args(RunOutputEvent))
1979
+ + tuple(get_args(TeamRunOutputEvent))
1980
+ + tuple(get_args(WorkflowRunOutputEvent)),
1944
1981
  ):
1945
1982
  # We only capture content events
1946
1983
  if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
@@ -2115,10 +2152,14 @@ class Model(ABC):
2115
2152
  new_model = cls.__new__(cls)
2116
2153
  memo[id(self)] = new_model
2117
2154
 
2118
- # Deep copy all attributes
2155
+ # Deep copy all attributes except client objects
2119
2156
  for k, v in self.__dict__.items():
2120
2157
  if k in {"response_format", "_tools", "_functions"}:
2121
2158
  continue
2159
+ # Skip client objects
2160
+ if k in {"client", "async_client", "http_client", "mistral_client", "model_client"}:
2161
+ setattr(new_model, k, None)
2162
+ continue
2122
2163
  try:
2123
2164
  setattr(new_model, k, deepcopy(v, memo))
2124
2165
  except Exception:
@@ -63,7 +63,7 @@ class Cerebras(Model):
63
63
  max_retries: Optional[int] = None
64
64
  default_headers: Optional[Any] = None
65
65
  default_query: Optional[Any] = None
66
- http_client: Optional[httpx.Client] = None
66
+ http_client: Optional[Union[httpx.Client, httpx.AsyncClient]] = None
67
67
  client_params: Optional[Dict[str, Any]] = None
68
68
 
69
69
  # Cerebras clients
@@ -102,12 +102,15 @@ class Cerebras(Model):
102
102
  Returns:
103
103
  CerebrasClient: An instance of the Cerebras client.
104
104
  """
105
- if self.client:
105
+ if self.client and not self.client.is_closed():
106
106
  return self.client
107
107
 
108
108
  client_params: Dict[str, Any] = self._get_client_params()
109
- if self.http_client is not None:
110
- client_params["http_client"] = self.http_client
109
+ if self.http_client:
110
+ if isinstance(self.http_client, httpx.Client):
111
+ client_params["http_client"] = self.http_client
112
+ else:
113
+ log_debug("http_client is not an instance of httpx.Client.")
111
114
  self.client = CerebrasClient(**client_params)
112
115
  return self.client
113
116
 
@@ -118,13 +121,15 @@ class Cerebras(Model):
118
121
  Returns:
119
122
  AsyncCerebras: An instance of the asynchronous Cerebras client.
120
123
  """
121
- if self.async_client:
124
+ if self.async_client and not self.async_client.is_closed():
122
125
  return self.async_client
123
126
 
124
127
  client_params: Dict[str, Any] = self._get_client_params()
125
- if self.http_client:
128
+ if self.http_client and isinstance(self.http_client, httpx.AsyncClient):
126
129
  client_params["http_client"] = self.http_client
127
130
  else:
131
+ if self.http_client:
132
+ log_debug("The current http_client is not async. A default httpx.AsyncClient will be used instead.")
128
133
  # Create a new async HTTP client with custom limits
129
134
  client_params["http_client"] = httpx.AsyncClient(
130
135
  limits=httpx.Limits(max_connections=1000, max_keepalive_connections=100)
agno/models/groq/groq.py CHANGED
@@ -61,7 +61,7 @@ class Groq(Model):
61
61
  max_retries: Optional[int] = None
62
62
  default_headers: Optional[Any] = None
63
63
  default_query: Optional[Any] = None
64
- http_client: Optional[httpx.Client] = None
64
+ http_client: Optional[Union[httpx.Client, httpx.AsyncClient]] = None
65
65
  client_params: Optional[Dict[str, Any]] = None
66
66
 
67
67
  # Groq clients
@@ -115,18 +115,21 @@ class Groq(Model):
115
115
  Returns:
116
116
  AsyncGroqClient: An instance of the asynchronous Groq client.
117
117
  """
118
- if self.async_client:
118
+ if self.async_client and not self.async_client.is_closed():
119
119
  return self.async_client
120
120
 
121
121
  client_params: Dict[str, Any] = self._get_client_params()
122
- if self.http_client:
122
+ if self.http_client and isinstance(self.http_client, httpx.AsyncClient):
123
123
  client_params["http_client"] = self.http_client
124
124
  else:
125
+ if self.http_client:
126
+ log_debug("The current http_client is not async. A default httpx.AsyncClient will be used instead.")
125
127
  # Create a new async HTTP client with custom limits
126
128
  client_params["http_client"] = httpx.AsyncClient(
127
129
  limits=httpx.Limits(max_connections=1000, max_keepalive_connections=100)
128
130
  )
129
- return AsyncGroqClient(**client_params)
131
+ self.async_client = AsyncGroqClient(**client_params)
132
+ return self.async_client
130
133
 
131
134
  def get_request_params(
132
135
  self,
agno/models/meta/llama.py CHANGED
@@ -61,7 +61,7 @@ class Llama(Model):
61
61
  max_retries: Optional[int] = None
62
62
  default_headers: Optional[Any] = None
63
63
  default_query: Optional[Any] = None
64
- http_client: Optional[httpx.Client] = None
64
+ http_client: Optional[Union[httpx.Client, httpx.AsyncClient]] = None
65
65
  client_params: Optional[Dict[str, Any]] = None
66
66
 
67
67
  # OpenAI clients
@@ -104,8 +104,11 @@ class Llama(Model):
104
104
  return self.client
105
105
 
106
106
  client_params: Dict[str, Any] = self._get_client_params()
107
- if self.http_client is not None:
108
- client_params["http_client"] = self.http_client
107
+ if self.http_client:
108
+ if isinstance(self.http_client, httpx.Client):
109
+ client_params["http_client"] = self.http_client
110
+ else:
111
+ log_debug("http_client is not an instance of httpx.Client.")
109
112
  self.client = LlamaAPIClient(**client_params)
110
113
  return self.client
111
114
 
@@ -116,18 +119,21 @@ class Llama(Model):
116
119
  Returns:
117
120
  AsyncLlamaAPIClient: An instance of the asynchronous Llama client.
118
121
  """
119
- if self.async_client:
122
+ if self.async_client and not self.async_client.is_closed():
120
123
  return self.async_client
121
124
 
122
125
  client_params: Dict[str, Any] = self._get_client_params()
123
- if self.http_client:
126
+ if self.http_client and isinstance(self.http_client, httpx.AsyncClient):
124
127
  client_params["http_client"] = self.http_client
125
128
  else:
129
+ if self.http_client:
130
+ log_debug("The current http_client is not async. A default httpx.AsyncClient will be used instead.")
126
131
  # Create a new async HTTP client with custom limits
127
132
  client_params["http_client"] = httpx.AsyncClient(
128
133
  limits=httpx.Limits(max_connections=1000, max_keepalive_connections=100)
129
134
  )
130
- return AsyncLlamaAPIClient(**client_params)
135
+ self.async_client = AsyncLlamaAPIClient(**client_params)
136
+ return self.async_client
131
137
 
132
138
  def get_request_params(
133
139
  self,
@@ -62,6 +62,9 @@ class LlamaOpenAI(OpenAILike):
62
62
 
63
63
  def get_async_client(self):
64
64
  """Override to provide custom httpx client that properly handles redirects"""
65
+ if self.async_client and not self.async_client.is_closed():
66
+ return self.async_client
67
+
65
68
  client_params = self._get_client_params()
66
69
 
67
70
  # Llama gives a 307 redirect error, so we need to set up a custom client to allow redirects
@@ -71,4 +74,5 @@ class LlamaOpenAI(OpenAILike):
71
74
  timeout=httpx.Timeout(30.0),
72
75
  )
73
76
 
74
- return AsyncOpenAIClient(**client_params)
77
+ self.async_client = AsyncOpenAIClient(**client_params)
78
+ return self.async_client
@@ -14,6 +14,7 @@ from agno.models.message import Message
14
14
  from agno.models.metrics import Metrics
15
15
  from agno.models.response import ModelResponse
16
16
  from agno.run.agent import RunOutput
17
+ from agno.run.team import TeamRunOutput
17
18
  from agno.utils.log import log_debug, log_error, log_warning
18
19
  from agno.utils.openai import _format_file_for_message, audio_to_message, images_to_message
19
20
  from agno.utils.reasoning import extract_thinking_content
@@ -81,6 +82,10 @@ class OpenAIChat(Model):
81
82
  http_client: Optional[Union[httpx.Client, httpx.AsyncClient]] = None
82
83
  client_params: Optional[Dict[str, Any]] = None
83
84
 
85
+ # OpenAI clients
86
+ client: Optional[OpenAIClient] = None
87
+ async_client: Optional[AsyncOpenAIClient] = None
88
+
84
89
  # The role to map the message role to.
85
90
  default_role_map = {
86
91
  "system": "developer",
@@ -123,13 +128,18 @@ class OpenAIChat(Model):
123
128
  Returns:
124
129
  OpenAIClient: An instance of the OpenAI client.
125
130
  """
131
+ if self.client and not self.client.is_closed():
132
+ return self.client
133
+
126
134
  client_params: Dict[str, Any] = self._get_client_params()
127
135
  if self.http_client:
128
136
  if isinstance(self.http_client, httpx.Client):
129
137
  client_params["http_client"] = self.http_client
130
138
  else:
131
- log_warning("http_client is not an instance of httpx.Client.")
132
- return OpenAIClient(**client_params)
139
+ log_debug("http_client is not an instance of httpx.Client.")
140
+
141
+ self.client = OpenAIClient(**client_params)
142
+ return self.client
133
143
 
134
144
  def get_async_client(self) -> AsyncOpenAIClient:
135
145
  """
@@ -138,29 +148,28 @@ class OpenAIChat(Model):
138
148
  Returns:
139
149
  AsyncOpenAIClient: An instance of the asynchronous OpenAI client.
140
150
  """
151
+ if self.async_client and not self.async_client.is_closed():
152
+ return self.async_client
153
+
141
154
  client_params: Dict[str, Any] = self._get_client_params()
142
- if self.http_client:
143
- if isinstance(self.http_client, httpx.AsyncClient):
144
- client_params["http_client"] = self.http_client
145
- else:
146
- log_warning("http_client is not an instance of httpx.AsyncClient. Using default httpx.AsyncClient.")
147
- # Create a new async HTTP client with custom limits
148
- client_params["http_client"] = httpx.AsyncClient(
149
- limits=httpx.Limits(max_connections=1000, max_keepalive_connections=100)
150
- )
155
+ if self.http_client and isinstance(self.http_client, httpx.AsyncClient):
156
+ client_params["http_client"] = self.http_client
151
157
  else:
158
+ if self.http_client:
159
+ log_debug("The current http_client is not async. A default httpx.AsyncClient will be used instead.")
152
160
  # Create a new async HTTP client with custom limits
153
161
  client_params["http_client"] = httpx.AsyncClient(
154
162
  limits=httpx.Limits(max_connections=1000, max_keepalive_connections=100)
155
163
  )
156
- return AsyncOpenAIClient(**client_params)
164
+ self.async_client = AsyncOpenAIClient(**client_params)
165
+ return self.async_client
157
166
 
158
167
  def get_request_params(
159
168
  self,
160
169
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
161
170
  tools: Optional[List[Dict[str, Any]]] = None,
162
171
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
163
- run_response: Optional[RunOutput] = None,
172
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
164
173
  ) -> Dict[str, Any]:
165
174
  """
166
175
  Returns keyword arguments for API requests.
@@ -347,7 +356,7 @@ class OpenAIChat(Model):
347
356
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
348
357
  tools: Optional[List[Dict[str, Any]]] = None,
349
358
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
350
- run_response: Optional[RunOutput] = None,
359
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
351
360
  ) -> ModelResponse:
352
361
  """
353
362
  Send a chat completion request to the OpenAI API and parse the response.
@@ -427,7 +436,7 @@ class OpenAIChat(Model):
427
436
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
428
437
  tools: Optional[List[Dict[str, Any]]] = None,
429
438
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
430
- run_response: Optional[RunOutput] = None,
439
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
431
440
  ) -> ModelResponse:
432
441
  """
433
442
  Sends an asynchronous chat completion request to the OpenAI API.
@@ -506,7 +515,7 @@ class OpenAIChat(Model):
506
515
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
507
516
  tools: Optional[List[Dict[str, Any]]] = None,
508
517
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
509
- run_response: Optional[RunOutput] = None,
518
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
510
519
  ) -> Iterator[ModelResponse]:
511
520
  """
512
521
  Send a streaming chat completion request to the OpenAI API.
@@ -582,7 +591,7 @@ class OpenAIChat(Model):
582
591
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
583
592
  tools: Optional[List[Dict[str, Any]]] = None,
584
593
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
585
- run_response: Optional[RunOutput] = None,
594
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
586
595
  ) -> AsyncIterator[ModelResponse]:
587
596
  """
588
597
  Sends an asynchronous streaming chat completion request to the OpenAI API.