agno 2.3.2__py3-none-any.whl → 2.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. agno/agent/agent.py +513 -185
  2. agno/compression/__init__.py +3 -0
  3. agno/compression/manager.py +176 -0
  4. agno/db/dynamo/dynamo.py +11 -0
  5. agno/db/firestore/firestore.py +5 -1
  6. agno/db/gcs_json/gcs_json_db.py +5 -2
  7. agno/db/in_memory/in_memory_db.py +5 -2
  8. agno/db/json/json_db.py +5 -1
  9. agno/db/migrations/manager.py +4 -4
  10. agno/db/mongo/async_mongo.py +158 -34
  11. agno/db/mongo/mongo.py +6 -2
  12. agno/db/mysql/mysql.py +48 -54
  13. agno/db/postgres/async_postgres.py +66 -52
  14. agno/db/postgres/postgres.py +42 -50
  15. agno/db/redis/redis.py +5 -0
  16. agno/db/redis/utils.py +5 -5
  17. agno/db/singlestore/singlestore.py +99 -108
  18. agno/db/sqlite/async_sqlite.py +29 -27
  19. agno/db/sqlite/sqlite.py +30 -26
  20. agno/knowledge/reader/pdf_reader.py +2 -2
  21. agno/knowledge/reader/tavily_reader.py +0 -1
  22. agno/memory/__init__.py +14 -1
  23. agno/memory/manager.py +217 -4
  24. agno/memory/strategies/__init__.py +15 -0
  25. agno/memory/strategies/base.py +67 -0
  26. agno/memory/strategies/summarize.py +196 -0
  27. agno/memory/strategies/types.py +37 -0
  28. agno/models/aimlapi/aimlapi.py +18 -0
  29. agno/models/anthropic/claude.py +87 -81
  30. agno/models/aws/bedrock.py +38 -16
  31. agno/models/aws/claude.py +97 -277
  32. agno/models/azure/ai_foundry.py +8 -4
  33. agno/models/base.py +101 -14
  34. agno/models/cerebras/cerebras.py +25 -9
  35. agno/models/cerebras/cerebras_openai.py +22 -2
  36. agno/models/cohere/chat.py +18 -6
  37. agno/models/cometapi/cometapi.py +19 -1
  38. agno/models/deepinfra/deepinfra.py +19 -1
  39. agno/models/fireworks/fireworks.py +19 -1
  40. agno/models/google/gemini.py +583 -21
  41. agno/models/groq/groq.py +23 -6
  42. agno/models/huggingface/huggingface.py +22 -7
  43. agno/models/ibm/watsonx.py +21 -7
  44. agno/models/internlm/internlm.py +19 -1
  45. agno/models/langdb/langdb.py +10 -0
  46. agno/models/litellm/chat.py +17 -7
  47. agno/models/litellm/litellm_openai.py +19 -1
  48. agno/models/message.py +19 -5
  49. agno/models/meta/llama.py +25 -5
  50. agno/models/meta/llama_openai.py +18 -0
  51. agno/models/mistral/mistral.py +13 -5
  52. agno/models/nvidia/nvidia.py +19 -1
  53. agno/models/ollama/chat.py +17 -6
  54. agno/models/openai/chat.py +22 -7
  55. agno/models/openai/responses.py +28 -10
  56. agno/models/openrouter/openrouter.py +20 -0
  57. agno/models/perplexity/perplexity.py +17 -0
  58. agno/models/requesty/requesty.py +18 -0
  59. agno/models/sambanova/sambanova.py +19 -1
  60. agno/models/siliconflow/siliconflow.py +19 -1
  61. agno/models/together/together.py +19 -1
  62. agno/models/vercel/v0.py +19 -1
  63. agno/models/vertexai/claude.py +99 -5
  64. agno/models/xai/xai.py +18 -0
  65. agno/os/interfaces/agui/router.py +1 -0
  66. agno/os/interfaces/agui/utils.py +97 -57
  67. agno/os/router.py +16 -0
  68. agno/os/routers/memory/memory.py +143 -0
  69. agno/os/routers/memory/schemas.py +26 -0
  70. agno/os/schema.py +33 -6
  71. agno/os/utils.py +134 -10
  72. agno/run/base.py +2 -1
  73. agno/run/workflow.py +1 -1
  74. agno/team/team.py +566 -219
  75. agno/tools/mcp/mcp.py +1 -1
  76. agno/utils/agent.py +119 -1
  77. agno/utils/models/ai_foundry.py +9 -2
  78. agno/utils/models/claude.py +12 -5
  79. agno/utils/models/cohere.py +9 -2
  80. agno/utils/models/llama.py +9 -2
  81. agno/utils/models/mistral.py +4 -2
  82. agno/utils/print_response/agent.py +37 -2
  83. agno/utils/print_response/team.py +52 -0
  84. agno/utils/tokens.py +41 -0
  85. agno/workflow/types.py +2 -2
  86. {agno-2.3.2.dist-info → agno-2.3.4.dist-info}/METADATA +45 -40
  87. {agno-2.3.2.dist-info → agno-2.3.4.dist-info}/RECORD +90 -83
  88. {agno-2.3.2.dist-info → agno-2.3.4.dist-info}/WHEEL +0 -0
  89. {agno-2.3.2.dist-info → agno-2.3.4.dist-info}/licenses/LICENSE +0 -0
  90. {agno-2.3.2.dist-info → agno-2.3.4.dist-info}/top_level.txt +0 -0
agno/models/groq/groq.py CHANGED
@@ -74,7 +74,11 @@ class Groq(Model):
74
74
  if not self.api_key:
75
75
  self.api_key = getenv("GROQ_API_KEY")
76
76
  if not self.api_key:
77
- log_error("GROQ_API_KEY not set. Please set the GROQ_API_KEY environment variable.")
77
+ raise ModelProviderError(
78
+ message="GROQ_API_KEY not set. Please set the GROQ_API_KEY environment variable.",
79
+ model_name=self.name,
80
+ model_id=self.id,
81
+ )
78
82
 
79
83
  # Define base client params
80
84
  base_params = {
@@ -221,19 +225,28 @@ class Groq(Model):
221
225
  self,
222
226
  message: Message,
223
227
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
228
+ compress_tool_results: bool = False,
224
229
  ) -> Dict[str, Any]:
225
230
  """
226
231
  Format a message into the format expected by Groq.
227
232
 
228
233
  Args:
229
234
  message (Message): The message to format.
235
+ response_format: Optional response format specification.
236
+ compress_tool_results: Whether to compress tool results.
230
237
 
231
238
  Returns:
232
239
  Dict[str, Any]: The formatted message.
233
240
  """
241
+ # Use compressed content for tool messages if compression is active
242
+ if message.role == "tool":
243
+ content = message.get_content(use_compressed_content=compress_tool_results)
244
+ else:
245
+ content = message.content
246
+
234
247
  message_dict: Dict[str, Any] = {
235
248
  "role": message.role,
236
- "content": message.content,
249
+ "content": content,
237
250
  "name": message.name,
238
251
  "tool_call_id": message.tool_call_id,
239
252
  "tool_calls": message.tool_calls,
@@ -276,6 +289,7 @@ class Groq(Model):
276
289
  tools: Optional[List[Dict[str, Any]]] = None,
277
290
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
278
291
  run_response: Optional[RunOutput] = None,
292
+ compress_tool_results: bool = False,
279
293
  ) -> ModelResponse:
280
294
  """
281
295
  Send a chat completion request to the Groq API.
@@ -287,7 +301,7 @@ class Groq(Model):
287
301
  assistant_message.metrics.start_timer()
288
302
  provider_response = self.get_client().chat.completions.create(
289
303
  model=self.id,
290
- messages=[self.format_message(m) for m in messages], # type: ignore
304
+ messages=[self.format_message(m, response_format, compress_tool_results) for m in messages], # type: ignore
291
305
  **self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
292
306
  )
293
307
  assistant_message.metrics.stop_timer()
@@ -316,6 +330,7 @@ class Groq(Model):
316
330
  tools: Optional[List[Dict[str, Any]]] = None,
317
331
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
318
332
  run_response: Optional[RunOutput] = None,
333
+ compress_tool_results: bool = False,
319
334
  ) -> ModelResponse:
320
335
  """
321
336
  Sends an asynchronous chat completion request to the Groq API.
@@ -327,7 +342,7 @@ class Groq(Model):
327
342
  assistant_message.metrics.start_timer()
328
343
  response = await self.get_async_client().chat.completions.create(
329
344
  model=self.id,
330
- messages=[self.format_message(m) for m in messages], # type: ignore
345
+ messages=[self.format_message(m, response_format, compress_tool_results) for m in messages], # type: ignore
331
346
  **self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
332
347
  )
333
348
  assistant_message.metrics.stop_timer()
@@ -356,6 +371,7 @@ class Groq(Model):
356
371
  tools: Optional[List[Dict[str, Any]]] = None,
357
372
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
358
373
  run_response: Optional[RunOutput] = None,
374
+ compress_tool_results: bool = False,
359
375
  ) -> Iterator[ModelResponse]:
360
376
  """
361
377
  Send a streaming chat completion request to the Groq API.
@@ -368,7 +384,7 @@ class Groq(Model):
368
384
 
369
385
  for chunk in self.get_client().chat.completions.create(
370
386
  model=self.id,
371
- messages=[self.format_message(m) for m in messages], # type: ignore
387
+ messages=[self.format_message(m, response_format, compress_tool_results) for m in messages], # type: ignore
372
388
  stream=True,
373
389
  **self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
374
390
  ):
@@ -396,6 +412,7 @@ class Groq(Model):
396
412
  tools: Optional[List[Dict[str, Any]]] = None,
397
413
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
398
414
  run_response: Optional[RunOutput] = None,
415
+ compress_tool_results: bool = False,
399
416
  ) -> AsyncIterator[ModelResponse]:
400
417
  """
401
418
  Sends an asynchronous streaming chat completion request to the Groq API.
@@ -409,7 +426,7 @@ class Groq(Model):
409
426
 
410
427
  async_stream = await self.get_async_client().chat.completions.create(
411
428
  model=self.id,
412
- messages=[self.format_message(m) for m in messages], # type: ignore
429
+ messages=[self.format_message(m, response_format, compress_tool_results) for m in messages], # type: ignore
413
430
  stream=True,
414
431
  **self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
415
432
  )
@@ -73,7 +73,11 @@ class HuggingFace(Model):
73
73
  def get_client_params(self) -> Dict[str, Any]:
74
74
  self.api_key = self.api_key or getenv("HF_TOKEN")
75
75
  if not self.api_key:
76
- log_error("HF_TOKEN not set. Please set the HF_TOKEN environment variable.")
76
+ raise ModelProviderError(
77
+ message="HF_TOKEN not set. Please set the HF_TOKEN environment variable.",
78
+ model_name=self.name,
79
+ model_id=self.id,
80
+ )
77
81
 
78
82
  _client_params: Dict[str, Any] = {}
79
83
  if self.api_key is not None:
@@ -191,19 +195,26 @@ class HuggingFace(Model):
191
195
  cleaned_dict = {k: v for k, v in _dict.items() if v is not None}
192
196
  return cleaned_dict
193
197
 
194
- def _format_message(self, message: Message) -> Dict[str, Any]:
198
+ def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
195
199
  """
196
200
  Format a message into the format expected by HuggingFace.
197
201
 
198
202
  Args:
199
203
  message (Message): The message to format.
204
+ compress_tool_results: Whether to compress tool results.
200
205
 
201
206
  Returns:
202
207
  Dict[str, Any]: The formatted message.
203
208
  """
209
+ # Use compressed content for tool messages if compression is active
210
+ if message.role == "tool":
211
+ content = message.get_content(use_compressed_content=compress_tool_results)
212
+ else:
213
+ content = message.content if message.content is not None else ""
214
+
204
215
  message_dict: Dict[str, Any] = {
205
216
  "role": message.role,
206
- "content": message.content if message.content is not None else "",
217
+ "content": content,
207
218
  "name": message.name or message.tool_name,
208
219
  "tool_call_id": message.tool_call_id,
209
220
  "tool_calls": message.tool_calls,
@@ -236,6 +247,7 @@ class HuggingFace(Model):
236
247
  tools: Optional[List[Dict[str, Any]]] = None,
237
248
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
238
249
  run_response: Optional[RunOutput] = None,
250
+ compress_tool_results: bool = False,
239
251
  ) -> ModelResponse:
240
252
  """
241
253
  Send a chat completion request to the HuggingFace Hub.
@@ -247,7 +259,7 @@ class HuggingFace(Model):
247
259
  assistant_message.metrics.start_timer()
248
260
  provider_response = self.get_client().chat.completions.create(
249
261
  model=self.id,
250
- messages=[self._format_message(m) for m in messages],
262
+ messages=[self._format_message(m, compress_tool_results) for m in messages],
251
263
  **self.get_request_params(tools=tools, tool_choice=tool_choice),
252
264
  )
253
265
  assistant_message.metrics.stop_timer()
@@ -269,6 +281,7 @@ class HuggingFace(Model):
269
281
  tools: Optional[List[Dict[str, Any]]] = None,
270
282
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
271
283
  run_response: Optional[RunOutput] = None,
284
+ compress_tool_results: bool = False,
272
285
  ) -> ModelResponse:
273
286
  """
274
287
  Sends an asynchronous chat completion request to the HuggingFace Hub Inference.
@@ -280,7 +293,7 @@ class HuggingFace(Model):
280
293
  assistant_message.metrics.start_timer()
281
294
  provider_response = await self.get_async_client().chat.completions.create(
282
295
  model=self.id,
283
- messages=[self._format_message(m) for m in messages],
296
+ messages=[self._format_message(m, compress_tool_results) for m in messages],
284
297
  **self.get_request_params(tools=tools, tool_choice=tool_choice),
285
298
  )
286
299
  assistant_message.metrics.stop_timer()
@@ -302,6 +315,7 @@ class HuggingFace(Model):
302
315
  tools: Optional[List[Dict[str, Any]]] = None,
303
316
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
304
317
  run_response: Optional[RunOutput] = None,
318
+ compress_tool_results: bool = False,
305
319
  ) -> Iterator[ModelResponse]:
306
320
  """
307
321
  Send a streaming chat completion request to the HuggingFace API.
@@ -314,7 +328,7 @@ class HuggingFace(Model):
314
328
 
315
329
  stream = self.get_client().chat.completions.create(
316
330
  model=self.id,
317
- messages=[self._format_message(m) for m in messages],
331
+ messages=[self._format_message(m, compress_tool_results) for m in messages],
318
332
  stream=True,
319
333
  stream_options=ChatCompletionInputStreamOptions(include_usage=True), # type: ignore
320
334
  **self.get_request_params(tools=tools, tool_choice=tool_choice),
@@ -340,6 +354,7 @@ class HuggingFace(Model):
340
354
  tools: Optional[List[Dict[str, Any]]] = None,
341
355
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
342
356
  run_response: Optional[RunOutput] = None,
357
+ compress_tool_results: bool = False,
343
358
  ) -> AsyncIterator[Any]:
344
359
  """
345
360
  Sends an asynchronous streaming chat completion request to the HuggingFace API.
@@ -351,7 +366,7 @@ class HuggingFace(Model):
351
366
  assistant_message.metrics.start_timer()
352
367
  provider_response = await self.get_async_client().chat.completions.create(
353
368
  model=self.id,
354
- messages=[self._format_message(m) for m in messages],
369
+ messages=[self._format_message(m, compress_tool_results) for m in messages],
355
370
  stream=True,
356
371
  stream_options=ChatCompletionInputStreamOptions(include_usage=True), # type: ignore
357
372
  **self.get_request_params(tools=tools, tool_choice=tool_choice),
@@ -59,7 +59,11 @@ class WatsonX(Model):
59
59
  # Fetch API key and project ID from env if not already set
60
60
  self.api_key = self.api_key or getenv("IBM_WATSONX_API_KEY")
61
61
  if not self.api_key:
62
- log_error("IBM_WATSONX_API_KEY not set. Please set the IBM_WATSONX_API_KEY environment variable.")
62
+ raise ModelProviderError(
63
+ message="IBM_WATSONX_API_KEY not set. Please set the IBM_WATSONX_API_KEY environment variable.",
64
+ model_name=self.name,
65
+ model_id=self.id,
66
+ )
63
67
 
64
68
  self.project_id = self.project_id or getenv("IBM_WATSONX_PROJECT_ID")
65
69
  if not self.project_id:
@@ -129,12 +133,13 @@ class WatsonX(Model):
129
133
  log_debug(f"Calling {self.provider} with request parameters: {request_params}", log_level=2)
130
134
  return request_params
131
135
 
132
- def _format_message(self, message: Message) -> Dict[str, Any]:
136
+ def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
133
137
  """
134
138
  Format a message into the format expected by WatsonX.
135
139
 
136
140
  Args:
137
141
  message (Message): The message to format.
142
+ compress_tool_results: Whether to compress tool results.
138
143
 
139
144
  Returns:
140
145
  Dict[str, Any]: The formatted message.
@@ -151,7 +156,12 @@ class WatsonX(Model):
151
156
  if message.videos is not None and len(message.videos) > 0:
152
157
  log_warning("Video input is currently unsupported.")
153
158
 
154
- return message.to_dict()
159
+ message_dict = message.to_dict()
160
+
161
+ # Use compressed content for tool messages if compression is active
162
+ if message.role == "tool" and compress_tool_results:
163
+ message_dict["content"] = message.get_content(use_compressed_content=True)
164
+ return message_dict
155
165
 
156
166
  def invoke(
157
167
  self,
@@ -161,6 +171,7 @@ class WatsonX(Model):
161
171
  tools: Optional[List[Dict[str, Any]]] = None,
162
172
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
163
173
  run_response: Optional[RunOutput] = None,
174
+ compress_tool_results: bool = False,
164
175
  ) -> ModelResponse:
165
176
  """
166
177
  Send a chat completion request to the WatsonX API.
@@ -171,7 +182,7 @@ class WatsonX(Model):
171
182
 
172
183
  client = self.get_client()
173
184
 
174
- formatted_messages = [self._format_message(m) for m in messages]
185
+ formatted_messages = [self._format_message(m, compress_tool_results) for m in messages]
175
186
  request_params = self.get_request_params(
176
187
  response_format=response_format, tools=tools, tool_choice=tool_choice
177
188
  )
@@ -196,6 +207,7 @@ class WatsonX(Model):
196
207
  tools: Optional[List[Dict[str, Any]]] = None,
197
208
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
198
209
  run_response: Optional[RunOutput] = None,
210
+ compress_tool_results: bool = False,
199
211
  ) -> Any:
200
212
  """
201
213
  Sends an asynchronous chat completion request to the WatsonX API.
@@ -205,7 +217,7 @@ class WatsonX(Model):
205
217
  run_response.metrics.set_time_to_first_token()
206
218
 
207
219
  client = self.get_client()
208
- formatted_messages = [self._format_message(m) for m in messages]
220
+ formatted_messages = [self._format_message(m, compress_tool_results) for m in messages]
209
221
 
210
222
  request_params = self.get_request_params(
211
223
  response_format=response_format, tools=tools, tool_choice=tool_choice
@@ -231,13 +243,14 @@ class WatsonX(Model):
231
243
  tools: Optional[List[Dict[str, Any]]] = None,
232
244
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
233
245
  run_response: Optional[RunOutput] = None,
246
+ compress_tool_results: bool = False,
234
247
  ) -> Iterator[ModelResponse]:
235
248
  """
236
249
  Send a streaming chat completion request to the WatsonX API.
237
250
  """
238
251
  try:
239
252
  client = self.get_client()
240
- formatted_messages = [self._format_message(m) for m in messages]
253
+ formatted_messages = [self._format_message(m, compress_tool_results) for m in messages]
241
254
 
242
255
  request_params = self.get_request_params(
243
256
  response_format=response_format, tools=tools, tool_choice=tool_choice
@@ -265,6 +278,7 @@ class WatsonX(Model):
265
278
  tools: Optional[List[Dict[str, Any]]] = None,
266
279
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
267
280
  run_response: Optional[RunOutput] = None,
281
+ compress_tool_results: bool = False,
268
282
  ) -> AsyncIterator[ModelResponse]:
269
283
  """
270
284
  Sends an asynchronous streaming chat completion request to the WatsonX API.
@@ -274,7 +288,7 @@ class WatsonX(Model):
274
288
  run_response.metrics.set_time_to_first_token()
275
289
 
276
290
  client = self.get_client()
277
- formatted_messages = [self._format_message(m) for m in messages]
291
+ formatted_messages = [self._format_message(m, compress_tool_results) for m in messages]
278
292
 
279
293
  # Get parameters for chat
280
294
  request_params = self.get_request_params(
@@ -1,7 +1,8 @@
1
1
  from dataclasses import dataclass, field
2
2
  from os import getenv
3
- from typing import Optional
3
+ from typing import Any, Dict, Optional
4
4
 
5
+ from agno.exceptions import ModelProviderError
5
6
  from agno.models.openai.like import OpenAILike
6
7
 
7
8
 
@@ -24,3 +25,20 @@ class InternLM(OpenAILike):
24
25
 
25
26
  api_key: Optional[str] = field(default_factory=lambda: getenv("INTERNLM_API_KEY"))
26
27
  base_url: Optional[str] = "https://internlm-chat.intern-ai.org.cn/puyu/api/v1/chat/completions"
28
+
29
+ def _get_client_params(self) -> Dict[str, Any]:
30
+ """
31
+ Returns client parameters for API requests, checking for INTERNLM_API_KEY.
32
+
33
+ Returns:
34
+ Dict[str, Any]: A dictionary of client parameters for API requests.
35
+ """
36
+ if not self.api_key:
37
+ self.api_key = getenv("INTERNLM_API_KEY")
38
+ if not self.api_key:
39
+ raise ModelProviderError(
40
+ message="INTERNLM_API_KEY not set. Please set the INTERNLM_API_KEY environment variable.",
41
+ model_name=self.name,
42
+ model_id=self.id,
43
+ )
44
+ return super()._get_client_params()
@@ -2,6 +2,7 @@ from dataclasses import dataclass, field
2
2
  from os import getenv
3
3
  from typing import Any, Dict, Optional
4
4
 
5
+ from agno.exceptions import ModelProviderError
5
6
  from agno.models.openai.like import OpenAILike
6
7
 
7
8
 
@@ -32,6 +33,15 @@ class LangDB(OpenAILike):
32
33
  default_headers: Optional[dict] = None
33
34
 
34
35
  def _get_client_params(self) -> Dict[str, Any]:
36
+ if not self.api_key:
37
+ self.api_key = getenv("LANGDB_API_KEY")
38
+ if not self.api_key:
39
+ raise ModelProviderError(
40
+ message="LANGDB_API_KEY not set. Please set the LANGDB_API_KEY environment variable.",
41
+ model_name=self.name,
42
+ model_id=self.id,
43
+ )
44
+
35
45
  if not self.project_id:
36
46
  raise ValueError("LANGDB_PROJECT_ID not set in the environment")
37
47
 
@@ -74,11 +74,17 @@ class LiteLLM(Model):
74
74
  self.client = litellm
75
75
  return self.client
76
76
 
77
- def _format_messages(self, messages: List[Message]) -> List[Dict[str, Any]]:
77
+ def _format_messages(self, messages: List[Message], compress_tool_results: bool = False) -> List[Dict[str, Any]]:
78
78
  """Format messages for LiteLLM API."""
79
79
  formatted_messages = []
80
80
  for m in messages:
81
- msg = {"role": m.role, "content": m.content if m.content is not None else ""}
81
+ # Use compressed content for tool messages if compression is active
82
+ if m.role == "tool":
83
+ content = m.get_content(use_compressed_content=compress_tool_results)
84
+ else:
85
+ content = m.content if m.content is not None else ""
86
+
87
+ msg = {"role": m.role, "content": content}
82
88
 
83
89
  # Handle media
84
90
  if (m.images is not None and len(m.images) > 0) or (m.audio is not None and len(m.audio) > 0):
@@ -98,7 +104,7 @@ class LiteLLM(Model):
98
104
  if isinstance(msg["content"], str):
99
105
  content_list = [{"type": "text", "text": msg["content"]}]
100
106
  else:
101
- content_list = msg["content"]
107
+ content_list = msg["content"] if isinstance(msg["content"], list) else []
102
108
  for file in m.files:
103
109
  file_part = _format_file_for_message(file)
104
110
  if file_part:
@@ -186,10 +192,11 @@ class LiteLLM(Model):
186
192
  tools: Optional[List[Dict[str, Any]]] = None,
187
193
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
188
194
  run_response: Optional[RunOutput] = None,
195
+ compress_tool_results: bool = False,
189
196
  ) -> ModelResponse:
190
197
  """Sends a chat completion request to the LiteLLM API."""
191
198
  completion_kwargs = self.get_request_params(tools=tools)
192
- completion_kwargs["messages"] = self._format_messages(messages)
199
+ completion_kwargs["messages"] = self._format_messages(messages, compress_tool_results)
193
200
 
194
201
  if run_response and run_response.metrics:
195
202
  run_response.metrics.set_time_to_first_token()
@@ -211,10 +218,11 @@ class LiteLLM(Model):
211
218
  tools: Optional[List[Dict[str, Any]]] = None,
212
219
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
213
220
  run_response: Optional[RunOutput] = None,
221
+ compress_tool_results: bool = False,
214
222
  ) -> Iterator[ModelResponse]:
215
223
  """Sends a streaming chat completion request to the LiteLLM API."""
216
224
  completion_kwargs = self.get_request_params(tools=tools)
217
- completion_kwargs["messages"] = self._format_messages(messages)
225
+ completion_kwargs["messages"] = self._format_messages(messages, compress_tool_results)
218
226
  completion_kwargs["stream"] = True
219
227
  completion_kwargs["stream_options"] = {"include_usage": True}
220
228
 
@@ -236,10 +244,11 @@ class LiteLLM(Model):
236
244
  tools: Optional[List[Dict[str, Any]]] = None,
237
245
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
238
246
  run_response: Optional[RunOutput] = None,
247
+ compress_tool_results: bool = False,
239
248
  ) -> ModelResponse:
240
249
  """Sends an asynchronous chat completion request to the LiteLLM API."""
241
250
  completion_kwargs = self.get_request_params(tools=tools)
242
- completion_kwargs["messages"] = self._format_messages(messages)
251
+ completion_kwargs["messages"] = self._format_messages(messages, compress_tool_results)
243
252
 
244
253
  if run_response and run_response.metrics:
245
254
  run_response.metrics.set_time_to_first_token()
@@ -261,10 +270,11 @@ class LiteLLM(Model):
261
270
  tools: Optional[List[Dict[str, Any]]] = None,
262
271
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
263
272
  run_response: Optional[RunOutput] = None,
273
+ compress_tool_results: bool = False,
264
274
  ) -> AsyncIterator[ModelResponse]:
265
275
  """Sends an asynchronous streaming chat request to the LiteLLM API."""
266
276
  completion_kwargs = self.get_request_params(tools=tools)
267
- completion_kwargs["messages"] = self._format_messages(messages)
277
+ completion_kwargs["messages"] = self._format_messages(messages, compress_tool_results)
268
278
  completion_kwargs["stream"] = True
269
279
  completion_kwargs["stream_options"] = {"include_usage": True}
270
280
 
@@ -1,7 +1,8 @@
1
1
  from dataclasses import dataclass, field
2
2
  from os import getenv
3
- from typing import Optional
3
+ from typing import Any, Dict, Optional
4
4
 
5
+ from agno.exceptions import ModelProviderError
5
6
  from agno.models.openai.like import OpenAILike
6
7
 
7
8
 
@@ -23,3 +24,20 @@ class LiteLLMOpenAI(OpenAILike):
23
24
 
24
25
  api_key: Optional[str] = field(default_factory=lambda: getenv("LITELLM_API_KEY"))
25
26
  base_url: str = "http://0.0.0.0:4000"
27
+
28
+ def _get_client_params(self) -> Dict[str, Any]:
29
+ """
30
+ Returns client parameters for API requests, checking for LITELLM_API_KEY.
31
+
32
+ Returns:
33
+ Dict[str, Any]: A dictionary of client parameters for API requests.
34
+ """
35
+ if not self.api_key:
36
+ self.api_key = getenv("LITELLM_API_KEY")
37
+ if not self.api_key:
38
+ raise ModelProviderError(
39
+ message="LITELLM_API_KEY not set. Please set the LITELLM_API_KEY environment variable.",
40
+ model_name=self.name,
41
+ model_id=self.id,
42
+ )
43
+ return super()._get_client_params()
agno/models/message.py CHANGED
@@ -59,6 +59,9 @@ class Message(BaseModel):
59
59
  role: str
60
60
  # The contents of the message.
61
61
  content: Optional[Union[List[Any], str]] = None
62
+ # Compressed content of the message
63
+ compressed_content: Optional[str] = None
64
+
62
65
  # An optional name for the participant.
63
66
  # Provides the model information to differentiate between participants of the same role.
64
67
  name: Optional[str] = None
@@ -123,6 +126,12 @@ class Message(BaseModel):
123
126
  return json.dumps(self.content)
124
127
  return ""
125
128
 
129
+ def get_content(self, use_compressed_content: bool = False) -> Optional[Union[List[Any], str]]:
130
+ """Return tool result content to send to API"""
131
+ if use_compressed_content and self.compressed_content is not None:
132
+ return self.compressed_content
133
+ return self.content
134
+
126
135
  @classmethod
127
136
  def from_dict(cls, data: Dict[str, Any]) -> "Message":
128
137
  # Handle image reconstruction properly
@@ -266,6 +275,7 @@ class Message(BaseModel):
266
275
  "content": self.content,
267
276
  "reasoning_content": self.reasoning_content,
268
277
  "from_history": self.from_history,
278
+ "compressed_content": self.compressed_content,
269
279
  "stop_after_tool_call": self.stop_after_tool_call,
270
280
  "role": self.role,
271
281
  "name": self.name,
@@ -315,13 +325,14 @@ class Message(BaseModel):
315
325
  "created_at": self.created_at,
316
326
  }
317
327
 
318
- def log(self, metrics: bool = True, level: Optional[str] = None):
328
+ def log(self, metrics: bool = True, level: Optional[str] = None, use_compressed_content: bool = False):
319
329
  """Log the message to the console
320
330
 
321
331
  Args:
322
332
  metrics (bool): Whether to log the metrics.
323
333
  level (str): The level to log the message at. One of debug, info, warning, or error.
324
334
  Defaults to debug.
335
+ use_compressed_content (bool): Whether to use compressed content.
325
336
  """
326
337
  _logger = log_debug
327
338
  if level == "info":
@@ -348,10 +359,13 @@ class Message(BaseModel):
348
359
  if self.reasoning_content:
349
360
  _logger(f"<reasoning>\n{self.reasoning_content}\n</reasoning>")
350
361
  if self.content:
351
- if isinstance(self.content, str) or isinstance(self.content, list):
352
- _logger(self.content)
353
- elif isinstance(self.content, dict):
354
- _logger(json.dumps(self.content, indent=2))
362
+ if use_compressed_content and self.compressed_content:
363
+ _logger("Compressed content:\n" + self.compressed_content)
364
+ else:
365
+ if isinstance(self.content, str) or isinstance(self.content, list):
366
+ _logger(self.content)
367
+ elif isinstance(self.content, dict):
368
+ _logger(json.dumps(self.content, indent=2))
355
369
  if self.tool_calls:
356
370
  tool_calls_list = ["Tool Calls:"]
357
371
  for tool_call in self.tool_calls:
agno/models/meta/llama.py CHANGED
@@ -74,7 +74,11 @@ class Llama(Model):
74
74
  if not self.api_key:
75
75
  self.api_key = getenv("LLAMA_API_KEY")
76
76
  if not self.api_key:
77
- log_error("LLAMA_API_KEY not set. Please set the LLAMA_API_KEY environment variable.")
77
+ raise ModelProviderError(
78
+ message="LLAMA_API_KEY not set. Please set the LLAMA_API_KEY environment variable.",
79
+ model_name=self.name,
80
+ model_id=self.id,
81
+ )
78
82
 
79
83
  # Define base client params
80
84
  base_params = {
@@ -217,6 +221,7 @@ class Llama(Model):
217
221
  tools: Optional[List[Dict[str, Any]]] = None,
218
222
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
219
223
  run_response: Optional[RunOutput] = None,
224
+ compress_tool_results: bool = False,
220
225
  ) -> ModelResponse:
221
226
  """
222
227
  Send a chat completion request to the Llama API.
@@ -225,7 +230,10 @@ class Llama(Model):
225
230
 
226
231
  provider_response = self.get_client().chat.completions.create(
227
232
  model=self.id,
228
- messages=[format_message(m, tool_calls=bool(tools)) for m in messages], # type: ignore
233
+ messages=[
234
+ format_message(m, tool_calls=bool(tools), compress_tool_results=compress_tool_results) # type: ignore
235
+ for m in messages
236
+ ],
229
237
  **self.get_request_params(tools=tools, response_format=response_format),
230
238
  )
231
239
 
@@ -242,6 +250,7 @@ class Llama(Model):
242
250
  tools: Optional[List[Dict[str, Any]]] = None,
243
251
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
244
252
  run_response: Optional[RunOutput] = None,
253
+ compress_tool_results: bool = False,
245
254
  ) -> ModelResponse:
246
255
  """
247
256
  Sends an asynchronous chat completion request to the Llama API.
@@ -253,7 +262,10 @@ class Llama(Model):
253
262
 
254
263
  provider_response = await self.get_async_client().chat.completions.create(
255
264
  model=self.id,
256
- messages=[format_message(m, tool_calls=bool(tools)) for m in messages], # type: ignore
265
+ messages=[
266
+ format_message(m, tool_calls=bool(tools), compress_tool_results=compress_tool_results) # type: ignore
267
+ for m in messages
268
+ ],
257
269
  **self.get_request_params(tools=tools, response_format=response_format),
258
270
  )
259
271
 
@@ -270,6 +282,7 @@ class Llama(Model):
270
282
  tools: Optional[List[Dict[str, Any]]] = None,
271
283
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
272
284
  run_response: Optional[RunOutput] = None,
285
+ compress_tool_results: bool = False,
273
286
  ) -> Iterator[ModelResponse]:
274
287
  """
275
288
  Send a streaming chat completion request to the Llama API.
@@ -282,7 +295,10 @@ class Llama(Model):
282
295
 
283
296
  for chunk in self.get_client().chat.completions.create(
284
297
  model=self.id,
285
- messages=[format_message(m, tool_calls=bool(tools)) for m in messages], # type: ignore
298
+ messages=[
299
+ format_message(m, tool_calls=bool(tools), compress_tool_results=compress_tool_results) # type: ignore
300
+ for m in messages
301
+ ],
286
302
  stream=True,
287
303
  **self.get_request_params(tools=tools, response_format=response_format),
288
304
  ):
@@ -302,6 +318,7 @@ class Llama(Model):
302
318
  tools: Optional[List[Dict[str, Any]]] = None,
303
319
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
304
320
  run_response: Optional[RunOutput] = None,
321
+ compress_tool_results: bool = False,
305
322
  ) -> AsyncIterator[ModelResponse]:
306
323
  """
307
324
  Sends an asynchronous streaming chat completion request to the Llama API.
@@ -314,7 +331,10 @@ class Llama(Model):
314
331
  try:
315
332
  async for chunk in await self.get_async_client().chat.completions.create(
316
333
  model=self.id,
317
- messages=[format_message(m, tool_calls=bool(tools)) for m in messages], # type: ignore
334
+ messages=[
335
+ format_message(m, tool_calls=bool(tools), compress_tool_results=compress_tool_results) # type: ignore
336
+ for m in messages
337
+ ],
318
338
  stream=True,
319
339
  **self.get_request_params(tools=tools, response_format=response_format),
320
340
  ):