agno 2.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (314) hide show
  1. agno/agent/agent.py +5540 -2273
  2. agno/api/api.py +2 -0
  3. agno/api/os.py +1 -1
  4. agno/compression/__init__.py +3 -0
  5. agno/compression/manager.py +247 -0
  6. agno/culture/__init__.py +3 -0
  7. agno/culture/manager.py +956 -0
  8. agno/db/async_postgres/__init__.py +3 -0
  9. agno/db/base.py +689 -6
  10. agno/db/dynamo/dynamo.py +933 -37
  11. agno/db/dynamo/schemas.py +174 -10
  12. agno/db/dynamo/utils.py +63 -4
  13. agno/db/firestore/firestore.py +831 -9
  14. agno/db/firestore/schemas.py +51 -0
  15. agno/db/firestore/utils.py +102 -4
  16. agno/db/gcs_json/gcs_json_db.py +660 -12
  17. agno/db/gcs_json/utils.py +60 -26
  18. agno/db/in_memory/in_memory_db.py +287 -14
  19. agno/db/in_memory/utils.py +60 -2
  20. agno/db/json/json_db.py +590 -14
  21. agno/db/json/utils.py +60 -26
  22. agno/db/migrations/manager.py +199 -0
  23. agno/db/migrations/v1_to_v2.py +43 -13
  24. agno/db/migrations/versions/__init__.py +0 -0
  25. agno/db/migrations/versions/v2_3_0.py +938 -0
  26. agno/db/mongo/__init__.py +15 -1
  27. agno/db/mongo/async_mongo.py +2760 -0
  28. agno/db/mongo/mongo.py +879 -11
  29. agno/db/mongo/schemas.py +42 -0
  30. agno/db/mongo/utils.py +80 -8
  31. agno/db/mysql/__init__.py +2 -1
  32. agno/db/mysql/async_mysql.py +2912 -0
  33. agno/db/mysql/mysql.py +946 -68
  34. agno/db/mysql/schemas.py +72 -10
  35. agno/db/mysql/utils.py +198 -7
  36. agno/db/postgres/__init__.py +2 -1
  37. agno/db/postgres/async_postgres.py +2579 -0
  38. agno/db/postgres/postgres.py +942 -57
  39. agno/db/postgres/schemas.py +81 -18
  40. agno/db/postgres/utils.py +164 -2
  41. agno/db/redis/redis.py +671 -7
  42. agno/db/redis/schemas.py +50 -0
  43. agno/db/redis/utils.py +65 -7
  44. agno/db/schemas/__init__.py +2 -1
  45. agno/db/schemas/culture.py +120 -0
  46. agno/db/schemas/evals.py +1 -0
  47. agno/db/schemas/memory.py +17 -2
  48. agno/db/singlestore/schemas.py +63 -0
  49. agno/db/singlestore/singlestore.py +949 -83
  50. agno/db/singlestore/utils.py +60 -2
  51. agno/db/sqlite/__init__.py +2 -1
  52. agno/db/sqlite/async_sqlite.py +2911 -0
  53. agno/db/sqlite/schemas.py +62 -0
  54. agno/db/sqlite/sqlite.py +965 -46
  55. agno/db/sqlite/utils.py +169 -8
  56. agno/db/surrealdb/__init__.py +3 -0
  57. agno/db/surrealdb/metrics.py +292 -0
  58. agno/db/surrealdb/models.py +334 -0
  59. agno/db/surrealdb/queries.py +71 -0
  60. agno/db/surrealdb/surrealdb.py +1908 -0
  61. agno/db/surrealdb/utils.py +147 -0
  62. agno/db/utils.py +2 -0
  63. agno/eval/__init__.py +10 -0
  64. agno/eval/accuracy.py +75 -55
  65. agno/eval/agent_as_judge.py +861 -0
  66. agno/eval/base.py +29 -0
  67. agno/eval/performance.py +16 -7
  68. agno/eval/reliability.py +28 -16
  69. agno/eval/utils.py +35 -17
  70. agno/exceptions.py +27 -2
  71. agno/filters.py +354 -0
  72. agno/guardrails/prompt_injection.py +1 -0
  73. agno/hooks/__init__.py +3 -0
  74. agno/hooks/decorator.py +164 -0
  75. agno/integrations/discord/client.py +1 -1
  76. agno/knowledge/chunking/agentic.py +13 -10
  77. agno/knowledge/chunking/fixed.py +4 -1
  78. agno/knowledge/chunking/semantic.py +9 -4
  79. agno/knowledge/chunking/strategy.py +59 -15
  80. agno/knowledge/embedder/fastembed.py +1 -1
  81. agno/knowledge/embedder/nebius.py +1 -1
  82. agno/knowledge/embedder/ollama.py +8 -0
  83. agno/knowledge/embedder/openai.py +8 -8
  84. agno/knowledge/embedder/sentence_transformer.py +6 -2
  85. agno/knowledge/embedder/vllm.py +262 -0
  86. agno/knowledge/knowledge.py +1618 -318
  87. agno/knowledge/reader/base.py +6 -2
  88. agno/knowledge/reader/csv_reader.py +8 -10
  89. agno/knowledge/reader/docx_reader.py +5 -6
  90. agno/knowledge/reader/field_labeled_csv_reader.py +16 -20
  91. agno/knowledge/reader/json_reader.py +5 -4
  92. agno/knowledge/reader/markdown_reader.py +8 -8
  93. agno/knowledge/reader/pdf_reader.py +17 -19
  94. agno/knowledge/reader/pptx_reader.py +101 -0
  95. agno/knowledge/reader/reader_factory.py +32 -3
  96. agno/knowledge/reader/s3_reader.py +3 -3
  97. agno/knowledge/reader/tavily_reader.py +193 -0
  98. agno/knowledge/reader/text_reader.py +22 -10
  99. agno/knowledge/reader/web_search_reader.py +1 -48
  100. agno/knowledge/reader/website_reader.py +10 -10
  101. agno/knowledge/reader/wikipedia_reader.py +33 -1
  102. agno/knowledge/types.py +1 -0
  103. agno/knowledge/utils.py +72 -7
  104. agno/media.py +22 -6
  105. agno/memory/__init__.py +14 -1
  106. agno/memory/manager.py +544 -83
  107. agno/memory/strategies/__init__.py +15 -0
  108. agno/memory/strategies/base.py +66 -0
  109. agno/memory/strategies/summarize.py +196 -0
  110. agno/memory/strategies/types.py +37 -0
  111. agno/models/aimlapi/aimlapi.py +17 -0
  112. agno/models/anthropic/claude.py +515 -40
  113. agno/models/aws/bedrock.py +102 -21
  114. agno/models/aws/claude.py +131 -274
  115. agno/models/azure/ai_foundry.py +41 -19
  116. agno/models/azure/openai_chat.py +39 -8
  117. agno/models/base.py +1249 -525
  118. agno/models/cerebras/cerebras.py +91 -21
  119. agno/models/cerebras/cerebras_openai.py +21 -2
  120. agno/models/cohere/chat.py +40 -6
  121. agno/models/cometapi/cometapi.py +18 -1
  122. agno/models/dashscope/dashscope.py +2 -3
  123. agno/models/deepinfra/deepinfra.py +18 -1
  124. agno/models/deepseek/deepseek.py +69 -3
  125. agno/models/fireworks/fireworks.py +18 -1
  126. agno/models/google/gemini.py +877 -80
  127. agno/models/google/utils.py +22 -0
  128. agno/models/groq/groq.py +51 -18
  129. agno/models/huggingface/huggingface.py +17 -6
  130. agno/models/ibm/watsonx.py +16 -6
  131. agno/models/internlm/internlm.py +18 -1
  132. agno/models/langdb/langdb.py +13 -1
  133. agno/models/litellm/chat.py +44 -9
  134. agno/models/litellm/litellm_openai.py +18 -1
  135. agno/models/message.py +28 -5
  136. agno/models/meta/llama.py +47 -14
  137. agno/models/meta/llama_openai.py +22 -17
  138. agno/models/mistral/mistral.py +8 -4
  139. agno/models/nebius/nebius.py +6 -7
  140. agno/models/nvidia/nvidia.py +20 -3
  141. agno/models/ollama/chat.py +24 -8
  142. agno/models/openai/chat.py +104 -29
  143. agno/models/openai/responses.py +101 -81
  144. agno/models/openrouter/openrouter.py +60 -3
  145. agno/models/perplexity/perplexity.py +17 -1
  146. agno/models/portkey/portkey.py +7 -6
  147. agno/models/requesty/requesty.py +24 -4
  148. agno/models/response.py +73 -2
  149. agno/models/sambanova/sambanova.py +20 -3
  150. agno/models/siliconflow/siliconflow.py +19 -2
  151. agno/models/together/together.py +20 -3
  152. agno/models/utils.py +254 -8
  153. agno/models/vercel/v0.py +20 -3
  154. agno/models/vertexai/__init__.py +0 -0
  155. agno/models/vertexai/claude.py +190 -0
  156. agno/models/vllm/vllm.py +19 -14
  157. agno/models/xai/xai.py +19 -2
  158. agno/os/app.py +549 -152
  159. agno/os/auth.py +190 -3
  160. agno/os/config.py +23 -0
  161. agno/os/interfaces/a2a/router.py +8 -11
  162. agno/os/interfaces/a2a/utils.py +1 -1
  163. agno/os/interfaces/agui/router.py +18 -3
  164. agno/os/interfaces/agui/utils.py +152 -39
  165. agno/os/interfaces/slack/router.py +55 -37
  166. agno/os/interfaces/slack/slack.py +9 -1
  167. agno/os/interfaces/whatsapp/router.py +0 -1
  168. agno/os/interfaces/whatsapp/security.py +3 -1
  169. agno/os/mcp.py +110 -52
  170. agno/os/middleware/__init__.py +2 -0
  171. agno/os/middleware/jwt.py +676 -112
  172. agno/os/router.py +40 -1478
  173. agno/os/routers/agents/__init__.py +3 -0
  174. agno/os/routers/agents/router.py +599 -0
  175. agno/os/routers/agents/schema.py +261 -0
  176. agno/os/routers/evals/evals.py +96 -39
  177. agno/os/routers/evals/schemas.py +65 -33
  178. agno/os/routers/evals/utils.py +80 -10
  179. agno/os/routers/health.py +10 -4
  180. agno/os/routers/knowledge/knowledge.py +196 -38
  181. agno/os/routers/knowledge/schemas.py +82 -22
  182. agno/os/routers/memory/memory.py +279 -52
  183. agno/os/routers/memory/schemas.py +46 -17
  184. agno/os/routers/metrics/metrics.py +20 -8
  185. agno/os/routers/metrics/schemas.py +16 -16
  186. agno/os/routers/session/session.py +462 -34
  187. agno/os/routers/teams/__init__.py +3 -0
  188. agno/os/routers/teams/router.py +512 -0
  189. agno/os/routers/teams/schema.py +257 -0
  190. agno/os/routers/traces/__init__.py +3 -0
  191. agno/os/routers/traces/schemas.py +414 -0
  192. agno/os/routers/traces/traces.py +499 -0
  193. agno/os/routers/workflows/__init__.py +3 -0
  194. agno/os/routers/workflows/router.py +624 -0
  195. agno/os/routers/workflows/schema.py +75 -0
  196. agno/os/schema.py +256 -693
  197. agno/os/scopes.py +469 -0
  198. agno/os/utils.py +514 -36
  199. agno/reasoning/anthropic.py +80 -0
  200. agno/reasoning/gemini.py +73 -0
  201. agno/reasoning/openai.py +5 -0
  202. agno/reasoning/vertexai.py +76 -0
  203. agno/run/__init__.py +6 -0
  204. agno/run/agent.py +155 -32
  205. agno/run/base.py +55 -3
  206. agno/run/requirement.py +181 -0
  207. agno/run/team.py +125 -38
  208. agno/run/workflow.py +72 -18
  209. agno/session/agent.py +102 -89
  210. agno/session/summary.py +56 -15
  211. agno/session/team.py +164 -90
  212. agno/session/workflow.py +405 -40
  213. agno/table.py +10 -0
  214. agno/team/team.py +3974 -1903
  215. agno/tools/dalle.py +2 -4
  216. agno/tools/eleven_labs.py +23 -25
  217. agno/tools/exa.py +21 -16
  218. agno/tools/file.py +153 -23
  219. agno/tools/file_generation.py +16 -10
  220. agno/tools/firecrawl.py +15 -7
  221. agno/tools/function.py +193 -38
  222. agno/tools/gmail.py +238 -14
  223. agno/tools/google_drive.py +271 -0
  224. agno/tools/googlecalendar.py +36 -8
  225. agno/tools/googlesheets.py +20 -5
  226. agno/tools/jira.py +20 -0
  227. agno/tools/mcp/__init__.py +10 -0
  228. agno/tools/mcp/mcp.py +331 -0
  229. agno/tools/mcp/multi_mcp.py +347 -0
  230. agno/tools/mcp/params.py +24 -0
  231. agno/tools/mcp_toolbox.py +3 -3
  232. agno/tools/models/nebius.py +5 -5
  233. agno/tools/models_labs.py +20 -10
  234. agno/tools/nano_banana.py +151 -0
  235. agno/tools/notion.py +204 -0
  236. agno/tools/parallel.py +314 -0
  237. agno/tools/postgres.py +76 -36
  238. agno/tools/redshift.py +406 -0
  239. agno/tools/scrapegraph.py +1 -1
  240. agno/tools/shopify.py +1519 -0
  241. agno/tools/slack.py +18 -3
  242. agno/tools/spotify.py +919 -0
  243. agno/tools/tavily.py +146 -0
  244. agno/tools/toolkit.py +25 -0
  245. agno/tools/workflow.py +8 -1
  246. agno/tools/yfinance.py +12 -11
  247. agno/tracing/__init__.py +12 -0
  248. agno/tracing/exporter.py +157 -0
  249. agno/tracing/schemas.py +276 -0
  250. agno/tracing/setup.py +111 -0
  251. agno/utils/agent.py +938 -0
  252. agno/utils/cryptography.py +22 -0
  253. agno/utils/dttm.py +33 -0
  254. agno/utils/events.py +151 -3
  255. agno/utils/gemini.py +15 -5
  256. agno/utils/hooks.py +118 -4
  257. agno/utils/http.py +113 -2
  258. agno/utils/knowledge.py +12 -5
  259. agno/utils/log.py +1 -0
  260. agno/utils/mcp.py +92 -2
  261. agno/utils/media.py +187 -1
  262. agno/utils/merge_dict.py +3 -3
  263. agno/utils/message.py +60 -0
  264. agno/utils/models/ai_foundry.py +9 -2
  265. agno/utils/models/claude.py +49 -14
  266. agno/utils/models/cohere.py +9 -2
  267. agno/utils/models/llama.py +9 -2
  268. agno/utils/models/mistral.py +4 -2
  269. agno/utils/print_response/agent.py +109 -16
  270. agno/utils/print_response/team.py +223 -30
  271. agno/utils/print_response/workflow.py +251 -34
  272. agno/utils/streamlit.py +1 -1
  273. agno/utils/team.py +98 -9
  274. agno/utils/tokens.py +657 -0
  275. agno/vectordb/base.py +39 -7
  276. agno/vectordb/cassandra/cassandra.py +21 -5
  277. agno/vectordb/chroma/chromadb.py +43 -12
  278. agno/vectordb/clickhouse/clickhousedb.py +21 -5
  279. agno/vectordb/couchbase/couchbase.py +29 -5
  280. agno/vectordb/lancedb/lance_db.py +92 -181
  281. agno/vectordb/langchaindb/langchaindb.py +24 -4
  282. agno/vectordb/lightrag/lightrag.py +17 -3
  283. agno/vectordb/llamaindex/llamaindexdb.py +25 -5
  284. agno/vectordb/milvus/milvus.py +50 -37
  285. agno/vectordb/mongodb/__init__.py +7 -1
  286. agno/vectordb/mongodb/mongodb.py +36 -30
  287. agno/vectordb/pgvector/pgvector.py +201 -77
  288. agno/vectordb/pineconedb/pineconedb.py +41 -23
  289. agno/vectordb/qdrant/qdrant.py +67 -54
  290. agno/vectordb/redis/__init__.py +9 -0
  291. agno/vectordb/redis/redisdb.py +682 -0
  292. agno/vectordb/singlestore/singlestore.py +50 -29
  293. agno/vectordb/surrealdb/surrealdb.py +31 -41
  294. agno/vectordb/upstashdb/upstashdb.py +34 -6
  295. agno/vectordb/weaviate/weaviate.py +53 -14
  296. agno/workflow/__init__.py +2 -0
  297. agno/workflow/agent.py +299 -0
  298. agno/workflow/condition.py +120 -18
  299. agno/workflow/loop.py +77 -10
  300. agno/workflow/parallel.py +231 -143
  301. agno/workflow/router.py +118 -17
  302. agno/workflow/step.py +609 -170
  303. agno/workflow/steps.py +73 -6
  304. agno/workflow/types.py +96 -21
  305. agno/workflow/workflow.py +2039 -262
  306. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/METADATA +201 -66
  307. agno-2.3.13.dist-info/RECORD +613 -0
  308. agno/tools/googlesearch.py +0 -98
  309. agno/tools/mcp.py +0 -679
  310. agno/tools/memori.py +0 -339
  311. agno-2.1.2.dist-info/RECORD +0 -543
  312. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +0 -0
  313. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/licenses/LICENSE +0 -0
  314. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
agno/models/message.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import json
2
2
  from time import time
3
3
  from typing import Any, Dict, List, Optional, Sequence, Union
4
+ from uuid import uuid4
4
5
 
5
6
  from pydantic import BaseModel, ConfigDict, Field
6
7
 
@@ -41,6 +42,9 @@ class Citations(BaseModel):
41
42
  # Raw citations from the model
42
43
  raw: Optional[Any] = None
43
44
 
45
+ # Search queries used to retrieve the citations
46
+ search_queries: Optional[List[str]] = None
47
+
44
48
  # URLs of the citations.
45
49
  urls: Optional[List[UrlCitation]] = None
46
50
 
@@ -51,11 +55,16 @@ class Citations(BaseModel):
51
55
  class Message(BaseModel):
52
56
  """Message sent to the Model"""
53
57
 
58
+ id: str = Field(default_factory=lambda: str(uuid4()))
59
+
54
60
  # The role of the message author.
55
61
  # One of system, user, assistant, or tool.
56
62
  role: str
57
63
  # The contents of the message.
58
64
  content: Optional[Union[List[Any], str]] = None
65
+ # Compressed content of the message
66
+ compressed_content: Optional[str] = None
67
+
59
68
  # An optional name for the participant.
60
69
  # Provides the model information to differentiate between participants of the same role.
61
70
  name: Optional[str] = None
@@ -106,6 +115,8 @@ class Message(BaseModel):
106
115
  references: Optional[MessageReferences] = None
107
116
  # The Unix timestamp the message was created.
108
117
  created_at: int = Field(default_factory=lambda: int(time()))
118
+ # When True, the message will be sent to the Model but not persisted afterwards.
119
+ temporary: bool = False
109
120
 
110
121
  model_config = ConfigDict(extra="allow", populate_by_name=True, arbitrary_types_allowed=True)
111
122
 
@@ -120,6 +131,12 @@ class Message(BaseModel):
120
131
  return json.dumps(self.content)
121
132
  return ""
122
133
 
134
+ def get_content(self, use_compressed_content: bool = False) -> Optional[Union[List[Any], str]]:
135
+ """Return tool result content to send to API"""
136
+ if use_compressed_content and self.compressed_content is not None:
137
+ return self.compressed_content
138
+ return self.content
139
+
123
140
  @classmethod
124
141
  def from_dict(cls, data: Dict[str, Any]) -> "Message":
125
142
  # Handle image reconstruction properly
@@ -259,9 +276,11 @@ class Message(BaseModel):
259
276
  def to_dict(self) -> Dict[str, Any]:
260
277
  """Returns the message as a dictionary."""
261
278
  message_dict = {
279
+ "id": self.id,
262
280
  "content": self.content,
263
281
  "reasoning_content": self.reasoning_content,
264
282
  "from_history": self.from_history,
283
+ "compressed_content": self.compressed_content,
265
284
  "stop_after_tool_call": self.stop_after_tool_call,
266
285
  "role": self.role,
267
286
  "name": self.name,
@@ -311,13 +330,14 @@ class Message(BaseModel):
311
330
  "created_at": self.created_at,
312
331
  }
313
332
 
314
- def log(self, metrics: bool = True, level: Optional[str] = None):
333
+ def log(self, metrics: bool = True, level: Optional[str] = None, use_compressed_content: bool = False):
315
334
  """Log the message to the console
316
335
 
317
336
  Args:
318
337
  metrics (bool): Whether to log the metrics.
319
338
  level (str): The level to log the message at. One of debug, info, warning, or error.
320
339
  Defaults to debug.
340
+ use_compressed_content (bool): Whether to use compressed content.
321
341
  """
322
342
  _logger = log_debug
323
343
  if level == "info":
@@ -344,10 +364,13 @@ class Message(BaseModel):
344
364
  if self.reasoning_content:
345
365
  _logger(f"<reasoning>\n{self.reasoning_content}\n</reasoning>")
346
366
  if self.content:
347
- if isinstance(self.content, str) or isinstance(self.content, list):
348
- _logger(self.content)
349
- elif isinstance(self.content, dict):
350
- _logger(json.dumps(self.content, indent=2))
367
+ if use_compressed_content and self.compressed_content:
368
+ _logger("Compressed content:\n" + self.compressed_content)
369
+ else:
370
+ if isinstance(self.content, str) or isinstance(self.content, list):
371
+ _logger(self.content)
372
+ elif isinstance(self.content, dict):
373
+ _logger(json.dumps(self.content, indent=2))
351
374
  if self.tool_calls:
352
375
  tool_calls_list = ["Tool Calls:"]
353
376
  for tool_call in self.tool_calls:
agno/models/meta/llama.py CHANGED
@@ -12,6 +12,7 @@ from agno.models.message import Message
12
12
  from agno.models.metrics import Metrics
13
13
  from agno.models.response import ModelResponse
14
14
  from agno.run.agent import RunOutput
15
+ from agno.utils.http import get_default_async_client, get_default_sync_client
15
16
  from agno.utils.log import log_debug, log_error, log_warning
16
17
  from agno.utils.models.llama import format_message
17
18
 
@@ -61,7 +62,7 @@ class Llama(Model):
61
62
  max_retries: Optional[int] = None
62
63
  default_headers: Optional[Any] = None
63
64
  default_query: Optional[Any] = None
64
- http_client: Optional[httpx.Client] = None
65
+ http_client: Optional[Union[httpx.Client, httpx.AsyncClient]] = None
65
66
  client_params: Optional[Dict[str, Any]] = None
66
67
 
67
68
  # OpenAI clients
@@ -104,8 +105,16 @@ class Llama(Model):
104
105
  return self.client
105
106
 
106
107
  client_params: Dict[str, Any] = self._get_client_params()
107
- if self.http_client is not None:
108
- client_params["http_client"] = self.http_client
108
+ if self.http_client:
109
+ if isinstance(self.http_client, httpx.Client):
110
+ client_params["http_client"] = self.http_client
111
+ else:
112
+ log_warning("http_client is not an instance of httpx.Client. Using default global httpx.Client.")
113
+ # Use global sync client when user http_client is invalid
114
+ client_params["http_client"] = get_default_sync_client()
115
+ else:
116
+ # Use global sync client when no custom http_client is provided
117
+ client_params["http_client"] = get_default_sync_client()
109
118
  self.client = LlamaAPIClient(**client_params)
110
119
  return self.client
111
120
 
@@ -116,18 +125,26 @@ class Llama(Model):
116
125
  Returns:
117
126
  AsyncLlamaAPIClient: An instance of the asynchronous Llama client.
118
127
  """
119
- if self.async_client:
128
+ if self.async_client and not self.async_client.is_closed():
120
129
  return self.async_client
121
130
 
122
131
  client_params: Dict[str, Any] = self._get_client_params()
123
132
  if self.http_client:
124
- client_params["http_client"] = self.http_client
133
+ if isinstance(self.http_client, httpx.AsyncClient):
134
+ client_params["http_client"] = self.http_client
135
+ else:
136
+ log_warning(
137
+ "http_client is not an instance of httpx.AsyncClient. Using default global httpx.AsyncClient."
138
+ )
139
+ # Use global async client when user http_client is invalid
140
+ client_params["http_client"] = get_default_async_client()
125
141
  else:
126
- # Create a new async HTTP client with custom limits
127
- client_params["http_client"] = httpx.AsyncClient(
128
- limits=httpx.Limits(max_connections=1000, max_keepalive_connections=100)
129
- )
130
- return AsyncLlamaAPIClient(**client_params)
142
+ # Use global async client when no custom http_client is provided
143
+ client_params["http_client"] = get_default_async_client()
144
+
145
+ # Create and cache the client
146
+ self.async_client = AsyncLlamaAPIClient(**client_params)
147
+ return self.async_client
131
148
 
132
149
  def get_request_params(
133
150
  self,
@@ -200,6 +217,7 @@ class Llama(Model):
200
217
  tools: Optional[List[Dict[str, Any]]] = None,
201
218
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
202
219
  run_response: Optional[RunOutput] = None,
220
+ compress_tool_results: bool = False,
203
221
  ) -> ModelResponse:
204
222
  """
205
223
  Send a chat completion request to the Llama API.
@@ -208,7 +226,10 @@ class Llama(Model):
208
226
 
209
227
  provider_response = self.get_client().chat.completions.create(
210
228
  model=self.id,
211
- messages=[format_message(m, tool_calls=bool(tools)) for m in messages], # type: ignore
229
+ messages=[
230
+ format_message(m, tool_calls=bool(tools), compress_tool_results=compress_tool_results) # type: ignore
231
+ for m in messages
232
+ ],
212
233
  **self.get_request_params(tools=tools, response_format=response_format),
213
234
  )
214
235
 
@@ -225,6 +246,7 @@ class Llama(Model):
225
246
  tools: Optional[List[Dict[str, Any]]] = None,
226
247
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
227
248
  run_response: Optional[RunOutput] = None,
249
+ compress_tool_results: bool = False,
228
250
  ) -> ModelResponse:
229
251
  """
230
252
  Sends an asynchronous chat completion request to the Llama API.
@@ -236,7 +258,10 @@ class Llama(Model):
236
258
 
237
259
  provider_response = await self.get_async_client().chat.completions.create(
238
260
  model=self.id,
239
- messages=[format_message(m, tool_calls=bool(tools)) for m in messages], # type: ignore
261
+ messages=[
262
+ format_message(m, tool_calls=bool(tools), compress_tool_results=compress_tool_results) # type: ignore
263
+ for m in messages
264
+ ],
240
265
  **self.get_request_params(tools=tools, response_format=response_format),
241
266
  )
242
267
 
@@ -253,6 +278,7 @@ class Llama(Model):
253
278
  tools: Optional[List[Dict[str, Any]]] = None,
254
279
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
255
280
  run_response: Optional[RunOutput] = None,
281
+ compress_tool_results: bool = False,
256
282
  ) -> Iterator[ModelResponse]:
257
283
  """
258
284
  Send a streaming chat completion request to the Llama API.
@@ -265,7 +291,10 @@ class Llama(Model):
265
291
 
266
292
  for chunk in self.get_client().chat.completions.create(
267
293
  model=self.id,
268
- messages=[format_message(m, tool_calls=bool(tools)) for m in messages], # type: ignore
294
+ messages=[
295
+ format_message(m, tool_calls=bool(tools), compress_tool_results=compress_tool_results) # type: ignore
296
+ for m in messages
297
+ ],
269
298
  stream=True,
270
299
  **self.get_request_params(tools=tools, response_format=response_format),
271
300
  ):
@@ -285,6 +314,7 @@ class Llama(Model):
285
314
  tools: Optional[List[Dict[str, Any]]] = None,
286
315
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
287
316
  run_response: Optional[RunOutput] = None,
317
+ compress_tool_results: bool = False,
288
318
  ) -> AsyncIterator[ModelResponse]:
289
319
  """
290
320
  Sends an asynchronous streaming chat completion request to the Llama API.
@@ -297,7 +327,10 @@ class Llama(Model):
297
327
  try:
298
328
  async for chunk in await self.get_async_client().chat.completions.create(
299
329
  model=self.id,
300
- messages=[format_message(m, tool_calls=bool(tools)) for m in messages], # type: ignore
330
+ messages=[
331
+ format_message(m, tool_calls=bool(tools), compress_tool_results=compress_tool_results) # type: ignore
332
+ for m in messages
333
+ ],
301
334
  stream=True,
302
335
  **self.get_request_params(tools=tools, response_format=response_format),
303
336
  ):
@@ -1,14 +1,13 @@
1
- from dataclasses import dataclass, field
1
+ from dataclasses import dataclass
2
2
  from os import getenv
3
3
  from typing import Any, Dict, Optional
4
4
 
5
- import httpx
6
-
7
5
  try:
8
6
  from openai import AsyncOpenAI as AsyncOpenAIClient
9
7
  except ImportError:
10
8
  raise ImportError("`openai` not installed. Please install using `pip install openai`")
11
9
 
10
+ from agno.exceptions import ModelAuthenticationError
12
11
  from agno.models.meta.llama import Message
13
12
  from agno.models.openai.like import OpenAILike
14
13
  from agno.utils.models.llama import format_message
@@ -31,7 +30,7 @@ class LlamaOpenAI(OpenAILike):
31
30
  name: str = "LlamaOpenAI"
32
31
  provider: str = "LlamaOpenAI"
33
32
 
34
- api_key: Optional[str] = field(default_factory=lambda: getenv("LLAMA_API_KEY"))
33
+ api_key: Optional[str] = None
35
34
  base_url: Optional[str] = "https://api.llama.com/compat/v1/"
36
35
 
37
36
  # Request parameters
@@ -48,6 +47,25 @@ class LlamaOpenAI(OpenAILike):
48
47
  supports_native_structured_outputs: bool = False
49
48
  supports_json_schema_outputs: bool = True
50
49
 
50
+ # Cached async client
51
+ openai_async_client: Optional[AsyncOpenAIClient] = None
52
+
53
+ def _get_client_params(self) -> Dict[str, Any]:
54
+ """
55
+ Returns client parameters for API requests, checking for LLAMA_API_KEY.
56
+
57
+ Returns:
58
+ Dict[str, Any]: A dictionary of client parameters for API requests.
59
+ """
60
+ if not self.api_key:
61
+ self.api_key = getenv("LLAMA_API_KEY")
62
+ if not self.api_key:
63
+ raise ModelAuthenticationError(
64
+ message="LLAMA_API_KEY not set. Please set the LLAMA_API_KEY environment variable.",
65
+ model_name=self.name,
66
+ )
67
+ return super()._get_client_params()
68
+
51
69
  def _format_message(self, message: Message) -> Dict[str, Any]:
52
70
  """
53
71
  Format a message into the format expected by Llama API.
@@ -59,16 +77,3 @@ class LlamaOpenAI(OpenAILike):
59
77
  Dict[str, Any]: The formatted message.
60
78
  """
61
79
  return format_message(message, openai_like=True)
62
-
63
- def get_async_client(self):
64
- """Override to provide custom httpx client that properly handles redirects"""
65
- client_params = self._get_client_params()
66
-
67
- # Llama gives a 307 redirect error, so we need to set up a custom client to allow redirects
68
- client_params["http_client"] = httpx.AsyncClient(
69
- limits=httpx.Limits(max_connections=1000, max_keepalive_connections=100),
70
- follow_redirects=True,
71
- timeout=httpx.Timeout(30.0),
72
- )
73
-
74
- return AsyncOpenAIClient(**client_params)
@@ -174,11 +174,12 @@ class MistralChat(Model):
174
174
  tools: Optional[List[Dict[str, Any]]] = None,
175
175
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
176
176
  run_response: Optional[RunOutput] = None,
177
+ compress_tool_results: bool = False,
177
178
  ) -> ModelResponse:
178
179
  """
179
180
  Send a chat completion request to the Mistral model.
180
181
  """
181
- mistral_messages = format_messages(messages)
182
+ mistral_messages = format_messages(messages, compress_tool_results)
182
183
  try:
183
184
  response: Union[ChatCompletionResponse, ParsedChatCompletionResponse]
184
185
  if (
@@ -229,11 +230,12 @@ class MistralChat(Model):
229
230
  tools: Optional[List[Dict[str, Any]]] = None,
230
231
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
231
232
  run_response: Optional[RunOutput] = None,
233
+ compress_tool_results: bool = False,
232
234
  ) -> Iterator[ModelResponse]:
233
235
  """
234
236
  Stream the response from the Mistral model.
235
237
  """
236
- mistral_messages = format_messages(messages)
238
+ mistral_messages = format_messages(messages, compress_tool_results)
237
239
 
238
240
  if run_response and run_response.metrics:
239
241
  run_response.metrics.set_time_to_first_token()
@@ -265,11 +267,12 @@ class MistralChat(Model):
265
267
  tools: Optional[List[Dict[str, Any]]] = None,
266
268
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
267
269
  run_response: Optional[RunOutput] = None,
270
+ compress_tool_results: bool = False,
268
271
  ) -> ModelResponse:
269
272
  """
270
273
  Send an asynchronous chat completion request to the Mistral API.
271
274
  """
272
- mistral_messages = format_messages(messages)
275
+ mistral_messages = format_messages(messages, compress_tool_results)
273
276
  try:
274
277
  response: Union[ChatCompletionResponse, ParsedChatCompletionResponse]
275
278
  if (
@@ -316,11 +319,12 @@ class MistralChat(Model):
316
319
  tools: Optional[List[Dict[str, Any]]] = None,
317
320
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
318
321
  run_response: Optional[RunOutput] = None,
322
+ compress_tool_results: bool = False,
319
323
  ) -> AsyncIterator[ModelResponse]:
320
324
  """
321
325
  Stream an asynchronous response from the Mistral API.
322
326
  """
323
- mistral_messages = format_messages(messages)
327
+ mistral_messages = format_messages(messages, compress_tool_results)
324
328
  try:
325
329
  if run_response and run_response.metrics:
326
330
  run_response.metrics.set_time_to_first_token()
@@ -2,36 +2,35 @@ from dataclasses import dataclass, field
2
2
  from os import getenv
3
3
  from typing import Any, Dict, Optional
4
4
 
5
- from agno.exceptions import ModelProviderError
5
+ from agno.exceptions import ModelAuthenticationError
6
6
  from agno.models.openai.like import OpenAILike
7
7
 
8
8
 
9
9
  @dataclass
10
10
  class Nebius(OpenAILike):
11
11
  """
12
- A class for interacting with Nebius AI Studio models.
12
+ A class for interacting with Nebius Token Factory models.
13
13
 
14
14
  Attributes:
15
15
  id (str): The model id. Defaults to "Qwen/Qwen3-235B-A22B"".
16
16
  name (str): The model name. Defaults to "Nebius".
17
17
  provider (str): The provider name. Defaults to "Nebius".
18
18
  api_key (Optional[str]): The API key.
19
- base_url (str): The base URL. Defaults to "https://api.studio.nebius.com/v1".
19
+ base_url (str): The base URL. Defaults to "https://api.tokenfactory.nebius.com/v1".
20
20
  """
21
21
 
22
- id: str = "Qwen/Qwen3-4B-fast" # Default model for chat
22
+ id: str = "openai/gpt-oss-20b" # Default model for chat
23
23
  name: str = "Nebius"
24
24
  provider: str = "Nebius"
25
25
 
26
26
  api_key: Optional[str] = field(default_factory=lambda: getenv("NEBIUS_API_KEY"))
27
- base_url: str = "https://api.studio.nebius.com/v1/"
27
+ base_url: str = "https://api.tokenfactory.nebius.com/v1/"
28
28
 
29
29
  def _get_client_params(self) -> Dict[str, Any]:
30
30
  if not self.api_key:
31
- raise ModelProviderError(
31
+ raise ModelAuthenticationError(
32
32
  message="NEBIUS_API_KEY not set. Please set the NEBIUS_API_KEY environment variable.",
33
33
  model_name=self.name,
34
- model_id=self.id,
35
34
  )
36
35
 
37
36
  # Define base client params
@@ -1,7 +1,8 @@
1
- from dataclasses import dataclass, field
1
+ from dataclasses import dataclass
2
2
  from os import getenv
3
- from typing import Optional
3
+ from typing import Any, Dict, Optional
4
4
 
5
+ from agno.exceptions import ModelAuthenticationError
5
6
  from agno.models.openai.like import OpenAILike
6
7
 
7
8
 
@@ -22,7 +23,23 @@ class Nvidia(OpenAILike):
22
23
  name: str = "Nvidia"
23
24
  provider: str = "Nvidia"
24
25
 
25
- api_key: Optional[str] = field(default_factory=lambda: getenv("NVIDIA_API_KEY"))
26
+ api_key: Optional[str] = None
26
27
  base_url: str = "https://integrate.api.nvidia.com/v1"
27
28
 
28
29
  supports_native_structured_outputs: bool = False
30
+
31
+ def _get_client_params(self) -> Dict[str, Any]:
32
+ """
33
+ Returns client parameters for API requests, checking for NVIDIA_API_KEY.
34
+
35
+ Returns:
36
+ Dict[str, Any]: A dictionary of client parameters for API requests.
37
+ """
38
+ if not self.api_key:
39
+ self.api_key = getenv("NVIDIA_API_KEY")
40
+ if not self.api_key:
41
+ raise ModelAuthenticationError(
42
+ message="NVIDIA_API_KEY not set. Please set the NVIDIA_API_KEY environment variable.",
43
+ model_name=self.name,
44
+ )
45
+ return super()._get_client_params()
@@ -147,19 +147,26 @@ class Ollama(Model):
147
147
  cleaned_dict = {k: v for k, v in model_dict.items() if v is not None}
148
148
  return cleaned_dict
149
149
 
150
- def _format_message(self, message: Message) -> Dict[str, Any]:
150
+ def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
151
151
  """
152
152
  Format a message into the format expected by Ollama.
153
153
 
154
154
  Args:
155
155
  message (Message): The message to format.
156
+ compress_tool_results: Whether to compress tool results.
156
157
 
157
158
  Returns:
158
159
  Dict[str, Any]: The formatted message.
159
160
  """
161
+ # Use compressed content for tool messages if compression is active
162
+ if message.role == "tool":
163
+ content = message.get_content(use_compressed_content=compress_tool_results)
164
+ else:
165
+ content = message.content
166
+
160
167
  _message: Dict[str, Any] = {
161
168
  "role": message.role,
162
- "content": message.content,
169
+ "content": content,
163
170
  }
164
171
 
165
172
  if message.role == "assistant" and message.tool_calls is not None:
@@ -228,6 +235,7 @@ class Ollama(Model):
228
235
  tools: Optional[List[Dict[str, Any]]] = None,
229
236
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
230
237
  run_response: Optional[RunOutput] = None,
238
+ compress_tool_results: bool = False,
231
239
  ) -> ModelResponse:
232
240
  """
233
241
  Send a chat request to the Ollama API.
@@ -241,7 +249,7 @@ class Ollama(Model):
241
249
 
242
250
  provider_response = self.get_client().chat(
243
251
  model=self.id.strip(),
244
- messages=[self._format_message(m) for m in messages], # type: ignore
252
+ messages=[self._format_message(m, compress_tool_results) for m in messages], # type: ignore
245
253
  **request_kwargs,
246
254
  ) # type: ignore
247
255
 
@@ -258,6 +266,7 @@ class Ollama(Model):
258
266
  tools: Optional[List[Dict[str, Any]]] = None,
259
267
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
260
268
  run_response: Optional[RunOutput] = None,
269
+ compress_tool_results: bool = False,
261
270
  ) -> ModelResponse:
262
271
  """
263
272
  Sends an asynchronous chat request to the Ollama API.
@@ -271,7 +280,7 @@ class Ollama(Model):
271
280
 
272
281
  provider_response = await self.get_async_client().chat(
273
282
  model=self.id.strip(),
274
- messages=[self._format_message(m) for m in messages], # type: ignore
283
+ messages=[self._format_message(m, compress_tool_results) for m in messages], # type: ignore
275
284
  **request_kwargs,
276
285
  ) # type: ignore
277
286
 
@@ -288,6 +297,7 @@ class Ollama(Model):
288
297
  tools: Optional[List[Dict[str, Any]]] = None,
289
298
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
290
299
  run_response: Optional[RunOutput] = None,
300
+ compress_tool_results: bool = False,
291
301
  ) -> Iterator[ModelResponse]:
292
302
  """
293
303
  Sends a streaming chat request to the Ollama API.
@@ -299,7 +309,7 @@ class Ollama(Model):
299
309
 
300
310
  for chunk in self.get_client().chat(
301
311
  model=self.id,
302
- messages=[self._format_message(m) for m in messages], # type: ignore
312
+ messages=[self._format_message(m, compress_tool_results) for m in messages], # type: ignore
303
313
  stream=True,
304
314
  **self.get_request_params(tools=tools),
305
315
  ):
@@ -315,6 +325,7 @@ class Ollama(Model):
315
325
  tools: Optional[List[Dict[str, Any]]] = None,
316
326
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
317
327
  run_response: Optional[RunOutput] = None,
328
+ compress_tool_results: bool = False,
318
329
  ) -> AsyncIterator[ModelResponse]:
319
330
  """
320
331
  Sends an asynchronous streaming chat completion request to the Ollama API.
@@ -326,7 +337,7 @@ class Ollama(Model):
326
337
 
327
338
  async for chunk in await self.get_async_client().chat(
328
339
  model=self.id.strip(),
329
- messages=[self._format_message(m) for m in messages], # type: ignore
340
+ messages=[self._format_message(m, compress_tool_results) for m in messages], # type: ignore
330
341
  stream=True,
331
342
  **self.get_request_params(tools=tools),
332
343
  ):
@@ -429,8 +440,13 @@ class Ollama(Model):
429
440
  """
430
441
  metrics = Metrics()
431
442
 
432
- metrics.input_tokens = response.get("prompt_eval_count", 0)
433
- metrics.output_tokens = response.get("eval_count", 0)
443
+ # Safely handle None values from Ollama Cloud responses
444
+ input_tokens = response.get("prompt_eval_count")
445
+ output_tokens = response.get("eval_count")
446
+
447
+ # Default to 0 if None
448
+ metrics.input_tokens = input_tokens if input_tokens is not None else 0
449
+ metrics.output_tokens = output_tokens if output_tokens is not None else 0
434
450
  metrics.total_tokens = metrics.input_tokens + metrics.output_tokens
435
451
 
436
452
  return metrics