agno 2.3.1__py3-none-any.whl → 2.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. agno/agent/agent.py +514 -186
  2. agno/compression/__init__.py +3 -0
  3. agno/compression/manager.py +176 -0
  4. agno/db/dynamo/dynamo.py +11 -0
  5. agno/db/firestore/firestore.py +5 -1
  6. agno/db/gcs_json/gcs_json_db.py +5 -2
  7. agno/db/in_memory/in_memory_db.py +5 -2
  8. agno/db/json/json_db.py +5 -1
  9. agno/db/migrations/manager.py +4 -4
  10. agno/db/mongo/async_mongo.py +158 -34
  11. agno/db/mongo/mongo.py +6 -2
  12. agno/db/mysql/mysql.py +48 -54
  13. agno/db/postgres/async_postgres.py +61 -51
  14. agno/db/postgres/postgres.py +42 -50
  15. agno/db/redis/redis.py +5 -0
  16. agno/db/redis/utils.py +5 -5
  17. agno/db/schemas/memory.py +7 -5
  18. agno/db/singlestore/singlestore.py +99 -108
  19. agno/db/sqlite/async_sqlite.py +32 -30
  20. agno/db/sqlite/sqlite.py +34 -30
  21. agno/knowledge/reader/pdf_reader.py +2 -2
  22. agno/knowledge/reader/tavily_reader.py +0 -1
  23. agno/memory/__init__.py +14 -1
  24. agno/memory/manager.py +223 -8
  25. agno/memory/strategies/__init__.py +15 -0
  26. agno/memory/strategies/base.py +67 -0
  27. agno/memory/strategies/summarize.py +196 -0
  28. agno/memory/strategies/types.py +37 -0
  29. agno/models/anthropic/claude.py +84 -80
  30. agno/models/aws/bedrock.py +38 -16
  31. agno/models/aws/claude.py +97 -277
  32. agno/models/azure/ai_foundry.py +8 -4
  33. agno/models/base.py +101 -14
  34. agno/models/cerebras/cerebras.py +18 -7
  35. agno/models/cerebras/cerebras_openai.py +4 -2
  36. agno/models/cohere/chat.py +8 -4
  37. agno/models/google/gemini.py +578 -20
  38. agno/models/groq/groq.py +18 -5
  39. agno/models/huggingface/huggingface.py +17 -6
  40. agno/models/ibm/watsonx.py +16 -6
  41. agno/models/litellm/chat.py +17 -7
  42. agno/models/message.py +19 -5
  43. agno/models/meta/llama.py +20 -4
  44. agno/models/mistral/mistral.py +8 -4
  45. agno/models/ollama/chat.py +17 -6
  46. agno/models/openai/chat.py +17 -6
  47. agno/models/openai/responses.py +23 -9
  48. agno/models/vertexai/claude.py +99 -5
  49. agno/os/interfaces/agui/router.py +1 -0
  50. agno/os/interfaces/agui/utils.py +97 -57
  51. agno/os/router.py +16 -1
  52. agno/os/routers/memory/memory.py +146 -0
  53. agno/os/routers/memory/schemas.py +26 -0
  54. agno/os/schema.py +21 -6
  55. agno/os/utils.py +134 -10
  56. agno/run/base.py +2 -1
  57. agno/run/workflow.py +1 -1
  58. agno/team/team.py +571 -225
  59. agno/tools/mcp/mcp.py +1 -1
  60. agno/utils/agent.py +119 -1
  61. agno/utils/dttm.py +33 -0
  62. agno/utils/models/ai_foundry.py +9 -2
  63. agno/utils/models/claude.py +12 -5
  64. agno/utils/models/cohere.py +9 -2
  65. agno/utils/models/llama.py +9 -2
  66. agno/utils/models/mistral.py +4 -2
  67. agno/utils/print_response/agent.py +37 -2
  68. agno/utils/print_response/team.py +52 -0
  69. agno/utils/tokens.py +41 -0
  70. agno/workflow/types.py +2 -2
  71. {agno-2.3.1.dist-info → agno-2.3.3.dist-info}/METADATA +45 -40
  72. {agno-2.3.1.dist-info → agno-2.3.3.dist-info}/RECORD +75 -68
  73. {agno-2.3.1.dist-info → agno-2.3.3.dist-info}/WHEEL +0 -0
  74. {agno-2.3.1.dist-info → agno-2.3.3.dist-info}/licenses/LICENSE +0 -0
  75. {agno-2.3.1.dist-info → agno-2.3.3.dist-info}/top_level.txt +0 -0
agno/models/groq/groq.py CHANGED
@@ -221,19 +221,28 @@ class Groq(Model):
221
221
  self,
222
222
  message: Message,
223
223
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
224
+ compress_tool_results: bool = False,
224
225
  ) -> Dict[str, Any]:
225
226
  """
226
227
  Format a message into the format expected by Groq.
227
228
 
228
229
  Args:
229
230
  message (Message): The message to format.
231
+ response_format: Optional response format specification.
232
+ compress_tool_results: Whether to compress tool results.
230
233
 
231
234
  Returns:
232
235
  Dict[str, Any]: The formatted message.
233
236
  """
237
+ # Use compressed content for tool messages if compression is active
238
+ if message.role == "tool":
239
+ content = message.get_content(use_compressed_content=compress_tool_results)
240
+ else:
241
+ content = message.content
242
+
234
243
  message_dict: Dict[str, Any] = {
235
244
  "role": message.role,
236
- "content": message.content,
245
+ "content": content,
237
246
  "name": message.name,
238
247
  "tool_call_id": message.tool_call_id,
239
248
  "tool_calls": message.tool_calls,
@@ -276,6 +285,7 @@ class Groq(Model):
276
285
  tools: Optional[List[Dict[str, Any]]] = None,
277
286
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
278
287
  run_response: Optional[RunOutput] = None,
288
+ compress_tool_results: bool = False,
279
289
  ) -> ModelResponse:
280
290
  """
281
291
  Send a chat completion request to the Groq API.
@@ -287,7 +297,7 @@ class Groq(Model):
287
297
  assistant_message.metrics.start_timer()
288
298
  provider_response = self.get_client().chat.completions.create(
289
299
  model=self.id,
290
- messages=[self.format_message(m) for m in messages], # type: ignore
300
+ messages=[self.format_message(m, response_format, compress_tool_results) for m in messages], # type: ignore
291
301
  **self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
292
302
  )
293
303
  assistant_message.metrics.stop_timer()
@@ -316,6 +326,7 @@ class Groq(Model):
316
326
  tools: Optional[List[Dict[str, Any]]] = None,
317
327
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
318
328
  run_response: Optional[RunOutput] = None,
329
+ compress_tool_results: bool = False,
319
330
  ) -> ModelResponse:
320
331
  """
321
332
  Sends an asynchronous chat completion request to the Groq API.
@@ -327,7 +338,7 @@ class Groq(Model):
327
338
  assistant_message.metrics.start_timer()
328
339
  response = await self.get_async_client().chat.completions.create(
329
340
  model=self.id,
330
- messages=[self.format_message(m) for m in messages], # type: ignore
341
+ messages=[self.format_message(m, response_format, compress_tool_results) for m in messages], # type: ignore
331
342
  **self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
332
343
  )
333
344
  assistant_message.metrics.stop_timer()
@@ -356,6 +367,7 @@ class Groq(Model):
356
367
  tools: Optional[List[Dict[str, Any]]] = None,
357
368
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
358
369
  run_response: Optional[RunOutput] = None,
370
+ compress_tool_results: bool = False,
359
371
  ) -> Iterator[ModelResponse]:
360
372
  """
361
373
  Send a streaming chat completion request to the Groq API.
@@ -368,7 +380,7 @@ class Groq(Model):
368
380
 
369
381
  for chunk in self.get_client().chat.completions.create(
370
382
  model=self.id,
371
- messages=[self.format_message(m) for m in messages], # type: ignore
383
+ messages=[self.format_message(m, response_format, compress_tool_results) for m in messages], # type: ignore
372
384
  stream=True,
373
385
  **self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
374
386
  ):
@@ -396,6 +408,7 @@ class Groq(Model):
396
408
  tools: Optional[List[Dict[str, Any]]] = None,
397
409
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
398
410
  run_response: Optional[RunOutput] = None,
411
+ compress_tool_results: bool = False,
399
412
  ) -> AsyncIterator[ModelResponse]:
400
413
  """
401
414
  Sends an asynchronous streaming chat completion request to the Groq API.
@@ -409,7 +422,7 @@ class Groq(Model):
409
422
 
410
423
  async_stream = await self.get_async_client().chat.completions.create(
411
424
  model=self.id,
412
- messages=[self.format_message(m) for m in messages], # type: ignore
425
+ messages=[self.format_message(m, response_format, compress_tool_results) for m in messages], # type: ignore
413
426
  stream=True,
414
427
  **self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
415
428
  )
@@ -191,19 +191,26 @@ class HuggingFace(Model):
191
191
  cleaned_dict = {k: v for k, v in _dict.items() if v is not None}
192
192
  return cleaned_dict
193
193
 
194
- def _format_message(self, message: Message) -> Dict[str, Any]:
194
+ def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
195
195
  """
196
196
  Format a message into the format expected by HuggingFace.
197
197
 
198
198
  Args:
199
199
  message (Message): The message to format.
200
+ compress_tool_results: Whether to compress tool results.
200
201
 
201
202
  Returns:
202
203
  Dict[str, Any]: The formatted message.
203
204
  """
205
+ # Use compressed content for tool messages if compression is active
206
+ if message.role == "tool":
207
+ content = message.get_content(use_compressed_content=compress_tool_results)
208
+ else:
209
+ content = message.content if message.content is not None else ""
210
+
204
211
  message_dict: Dict[str, Any] = {
205
212
  "role": message.role,
206
- "content": message.content if message.content is not None else "",
213
+ "content": content,
207
214
  "name": message.name or message.tool_name,
208
215
  "tool_call_id": message.tool_call_id,
209
216
  "tool_calls": message.tool_calls,
@@ -236,6 +243,7 @@ class HuggingFace(Model):
236
243
  tools: Optional[List[Dict[str, Any]]] = None,
237
244
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
238
245
  run_response: Optional[RunOutput] = None,
246
+ compress_tool_results: bool = False,
239
247
  ) -> ModelResponse:
240
248
  """
241
249
  Send a chat completion request to the HuggingFace Hub.
@@ -247,7 +255,7 @@ class HuggingFace(Model):
247
255
  assistant_message.metrics.start_timer()
248
256
  provider_response = self.get_client().chat.completions.create(
249
257
  model=self.id,
250
- messages=[self._format_message(m) for m in messages],
258
+ messages=[self._format_message(m, compress_tool_results) for m in messages],
251
259
  **self.get_request_params(tools=tools, tool_choice=tool_choice),
252
260
  )
253
261
  assistant_message.metrics.stop_timer()
@@ -269,6 +277,7 @@ class HuggingFace(Model):
269
277
  tools: Optional[List[Dict[str, Any]]] = None,
270
278
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
271
279
  run_response: Optional[RunOutput] = None,
280
+ compress_tool_results: bool = False,
272
281
  ) -> ModelResponse:
273
282
  """
274
283
  Sends an asynchronous chat completion request to the HuggingFace Hub Inference.
@@ -280,7 +289,7 @@ class HuggingFace(Model):
280
289
  assistant_message.metrics.start_timer()
281
290
  provider_response = await self.get_async_client().chat.completions.create(
282
291
  model=self.id,
283
- messages=[self._format_message(m) for m in messages],
292
+ messages=[self._format_message(m, compress_tool_results) for m in messages],
284
293
  **self.get_request_params(tools=tools, tool_choice=tool_choice),
285
294
  )
286
295
  assistant_message.metrics.stop_timer()
@@ -302,6 +311,7 @@ class HuggingFace(Model):
302
311
  tools: Optional[List[Dict[str, Any]]] = None,
303
312
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
304
313
  run_response: Optional[RunOutput] = None,
314
+ compress_tool_results: bool = False,
305
315
  ) -> Iterator[ModelResponse]:
306
316
  """
307
317
  Send a streaming chat completion request to the HuggingFace API.
@@ -314,7 +324,7 @@ class HuggingFace(Model):
314
324
 
315
325
  stream = self.get_client().chat.completions.create(
316
326
  model=self.id,
317
- messages=[self._format_message(m) for m in messages],
327
+ messages=[self._format_message(m, compress_tool_results) for m in messages],
318
328
  stream=True,
319
329
  stream_options=ChatCompletionInputStreamOptions(include_usage=True), # type: ignore
320
330
  **self.get_request_params(tools=tools, tool_choice=tool_choice),
@@ -340,6 +350,7 @@ class HuggingFace(Model):
340
350
  tools: Optional[List[Dict[str, Any]]] = None,
341
351
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
342
352
  run_response: Optional[RunOutput] = None,
353
+ compress_tool_results: bool = False,
343
354
  ) -> AsyncIterator[Any]:
344
355
  """
345
356
  Sends an asynchronous streaming chat completion request to the HuggingFace API.
@@ -351,7 +362,7 @@ class HuggingFace(Model):
351
362
  assistant_message.metrics.start_timer()
352
363
  provider_response = await self.get_async_client().chat.completions.create(
353
364
  model=self.id,
354
- messages=[self._format_message(m) for m in messages],
365
+ messages=[self._format_message(m, compress_tool_results) for m in messages],
355
366
  stream=True,
356
367
  stream_options=ChatCompletionInputStreamOptions(include_usage=True), # type: ignore
357
368
  **self.get_request_params(tools=tools, tool_choice=tool_choice),
@@ -129,12 +129,13 @@ class WatsonX(Model):
129
129
  log_debug(f"Calling {self.provider} with request parameters: {request_params}", log_level=2)
130
130
  return request_params
131
131
 
132
- def _format_message(self, message: Message) -> Dict[str, Any]:
132
+ def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
133
133
  """
134
134
  Format a message into the format expected by WatsonX.
135
135
 
136
136
  Args:
137
137
  message (Message): The message to format.
138
+ compress_tool_results: Whether to compress tool results.
138
139
 
139
140
  Returns:
140
141
  Dict[str, Any]: The formatted message.
@@ -151,7 +152,12 @@ class WatsonX(Model):
151
152
  if message.videos is not None and len(message.videos) > 0:
152
153
  log_warning("Video input is currently unsupported.")
153
154
 
154
- return message.to_dict()
155
+ message_dict = message.to_dict()
156
+
157
+ # Use compressed content for tool messages if compression is active
158
+ if message.role == "tool" and compress_tool_results:
159
+ message_dict["content"] = message.get_content(use_compressed_content=True)
160
+ return message_dict
155
161
 
156
162
  def invoke(
157
163
  self,
@@ -161,6 +167,7 @@ class WatsonX(Model):
161
167
  tools: Optional[List[Dict[str, Any]]] = None,
162
168
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
163
169
  run_response: Optional[RunOutput] = None,
170
+ compress_tool_results: bool = False,
164
171
  ) -> ModelResponse:
165
172
  """
166
173
  Send a chat completion request to the WatsonX API.
@@ -171,7 +178,7 @@ class WatsonX(Model):
171
178
 
172
179
  client = self.get_client()
173
180
 
174
- formatted_messages = [self._format_message(m) for m in messages]
181
+ formatted_messages = [self._format_message(m, compress_tool_results) for m in messages]
175
182
  request_params = self.get_request_params(
176
183
  response_format=response_format, tools=tools, tool_choice=tool_choice
177
184
  )
@@ -196,6 +203,7 @@ class WatsonX(Model):
196
203
  tools: Optional[List[Dict[str, Any]]] = None,
197
204
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
198
205
  run_response: Optional[RunOutput] = None,
206
+ compress_tool_results: bool = False,
199
207
  ) -> Any:
200
208
  """
201
209
  Sends an asynchronous chat completion request to the WatsonX API.
@@ -205,7 +213,7 @@ class WatsonX(Model):
205
213
  run_response.metrics.set_time_to_first_token()
206
214
 
207
215
  client = self.get_client()
208
- formatted_messages = [self._format_message(m) for m in messages]
216
+ formatted_messages = [self._format_message(m, compress_tool_results) for m in messages]
209
217
 
210
218
  request_params = self.get_request_params(
211
219
  response_format=response_format, tools=tools, tool_choice=tool_choice
@@ -231,13 +239,14 @@ class WatsonX(Model):
231
239
  tools: Optional[List[Dict[str, Any]]] = None,
232
240
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
233
241
  run_response: Optional[RunOutput] = None,
242
+ compress_tool_results: bool = False,
234
243
  ) -> Iterator[ModelResponse]:
235
244
  """
236
245
  Send a streaming chat completion request to the WatsonX API.
237
246
  """
238
247
  try:
239
248
  client = self.get_client()
240
- formatted_messages = [self._format_message(m) for m in messages]
249
+ formatted_messages = [self._format_message(m, compress_tool_results) for m in messages]
241
250
 
242
251
  request_params = self.get_request_params(
243
252
  response_format=response_format, tools=tools, tool_choice=tool_choice
@@ -265,6 +274,7 @@ class WatsonX(Model):
265
274
  tools: Optional[List[Dict[str, Any]]] = None,
266
275
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
267
276
  run_response: Optional[RunOutput] = None,
277
+ compress_tool_results: bool = False,
268
278
  ) -> AsyncIterator[ModelResponse]:
269
279
  """
270
280
  Sends an asynchronous streaming chat completion request to the WatsonX API.
@@ -274,7 +284,7 @@ class WatsonX(Model):
274
284
  run_response.metrics.set_time_to_first_token()
275
285
 
276
286
  client = self.get_client()
277
- formatted_messages = [self._format_message(m) for m in messages]
287
+ formatted_messages = [self._format_message(m, compress_tool_results) for m in messages]
278
288
 
279
289
  # Get parameters for chat
280
290
  request_params = self.get_request_params(
@@ -74,11 +74,17 @@ class LiteLLM(Model):
74
74
  self.client = litellm
75
75
  return self.client
76
76
 
77
- def _format_messages(self, messages: List[Message]) -> List[Dict[str, Any]]:
77
+ def _format_messages(self, messages: List[Message], compress_tool_results: bool = False) -> List[Dict[str, Any]]:
78
78
  """Format messages for LiteLLM API."""
79
79
  formatted_messages = []
80
80
  for m in messages:
81
- msg = {"role": m.role, "content": m.content if m.content is not None else ""}
81
+ # Use compressed content for tool messages if compression is active
82
+ if m.role == "tool":
83
+ content = m.get_content(use_compressed_content=compress_tool_results)
84
+ else:
85
+ content = m.content if m.content is not None else ""
86
+
87
+ msg = {"role": m.role, "content": content}
82
88
 
83
89
  # Handle media
84
90
  if (m.images is not None and len(m.images) > 0) or (m.audio is not None and len(m.audio) > 0):
@@ -98,7 +104,7 @@ class LiteLLM(Model):
98
104
  if isinstance(msg["content"], str):
99
105
  content_list = [{"type": "text", "text": msg["content"]}]
100
106
  else:
101
- content_list = msg["content"]
107
+ content_list = msg["content"] if isinstance(msg["content"], list) else []
102
108
  for file in m.files:
103
109
  file_part = _format_file_for_message(file)
104
110
  if file_part:
@@ -186,10 +192,11 @@ class LiteLLM(Model):
186
192
  tools: Optional[List[Dict[str, Any]]] = None,
187
193
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
188
194
  run_response: Optional[RunOutput] = None,
195
+ compress_tool_results: bool = False,
189
196
  ) -> ModelResponse:
190
197
  """Sends a chat completion request to the LiteLLM API."""
191
198
  completion_kwargs = self.get_request_params(tools=tools)
192
- completion_kwargs["messages"] = self._format_messages(messages)
199
+ completion_kwargs["messages"] = self._format_messages(messages, compress_tool_results)
193
200
 
194
201
  if run_response and run_response.metrics:
195
202
  run_response.metrics.set_time_to_first_token()
@@ -211,10 +218,11 @@ class LiteLLM(Model):
211
218
  tools: Optional[List[Dict[str, Any]]] = None,
212
219
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
213
220
  run_response: Optional[RunOutput] = None,
221
+ compress_tool_results: bool = False,
214
222
  ) -> Iterator[ModelResponse]:
215
223
  """Sends a streaming chat completion request to the LiteLLM API."""
216
224
  completion_kwargs = self.get_request_params(tools=tools)
217
- completion_kwargs["messages"] = self._format_messages(messages)
225
+ completion_kwargs["messages"] = self._format_messages(messages, compress_tool_results)
218
226
  completion_kwargs["stream"] = True
219
227
  completion_kwargs["stream_options"] = {"include_usage": True}
220
228
 
@@ -236,10 +244,11 @@ class LiteLLM(Model):
236
244
  tools: Optional[List[Dict[str, Any]]] = None,
237
245
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
238
246
  run_response: Optional[RunOutput] = None,
247
+ compress_tool_results: bool = False,
239
248
  ) -> ModelResponse:
240
249
  """Sends an asynchronous chat completion request to the LiteLLM API."""
241
250
  completion_kwargs = self.get_request_params(tools=tools)
242
- completion_kwargs["messages"] = self._format_messages(messages)
251
+ completion_kwargs["messages"] = self._format_messages(messages, compress_tool_results)
243
252
 
244
253
  if run_response and run_response.metrics:
245
254
  run_response.metrics.set_time_to_first_token()
@@ -261,10 +270,11 @@ class LiteLLM(Model):
261
270
  tools: Optional[List[Dict[str, Any]]] = None,
262
271
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
263
272
  run_response: Optional[RunOutput] = None,
273
+ compress_tool_results: bool = False,
264
274
  ) -> AsyncIterator[ModelResponse]:
265
275
  """Sends an asynchronous streaming chat request to the LiteLLM API."""
266
276
  completion_kwargs = self.get_request_params(tools=tools)
267
- completion_kwargs["messages"] = self._format_messages(messages)
277
+ completion_kwargs["messages"] = self._format_messages(messages, compress_tool_results)
268
278
  completion_kwargs["stream"] = True
269
279
  completion_kwargs["stream_options"] = {"include_usage": True}
270
280
 
agno/models/message.py CHANGED
@@ -59,6 +59,9 @@ class Message(BaseModel):
59
59
  role: str
60
60
  # The contents of the message.
61
61
  content: Optional[Union[List[Any], str]] = None
62
+ # Compressed content of the message
63
+ compressed_content: Optional[str] = None
64
+
62
65
  # An optional name for the participant.
63
66
  # Provides the model information to differentiate between participants of the same role.
64
67
  name: Optional[str] = None
@@ -123,6 +126,12 @@ class Message(BaseModel):
123
126
  return json.dumps(self.content)
124
127
  return ""
125
128
 
129
+ def get_content(self, use_compressed_content: bool = False) -> Optional[Union[List[Any], str]]:
130
+ """Return tool result content to send to API"""
131
+ if use_compressed_content and self.compressed_content is not None:
132
+ return self.compressed_content
133
+ return self.content
134
+
126
135
  @classmethod
127
136
  def from_dict(cls, data: Dict[str, Any]) -> "Message":
128
137
  # Handle image reconstruction properly
@@ -266,6 +275,7 @@ class Message(BaseModel):
266
275
  "content": self.content,
267
276
  "reasoning_content": self.reasoning_content,
268
277
  "from_history": self.from_history,
278
+ "compressed_content": self.compressed_content,
269
279
  "stop_after_tool_call": self.stop_after_tool_call,
270
280
  "role": self.role,
271
281
  "name": self.name,
@@ -315,13 +325,14 @@ class Message(BaseModel):
315
325
  "created_at": self.created_at,
316
326
  }
317
327
 
318
- def log(self, metrics: bool = True, level: Optional[str] = None):
328
+ def log(self, metrics: bool = True, level: Optional[str] = None, use_compressed_content: bool = False):
319
329
  """Log the message to the console
320
330
 
321
331
  Args:
322
332
  metrics (bool): Whether to log the metrics.
323
333
  level (str): The level to log the message at. One of debug, info, warning, or error.
324
334
  Defaults to debug.
335
+ use_compressed_content (bool): Whether to use compressed content.
325
336
  """
326
337
  _logger = log_debug
327
338
  if level == "info":
@@ -348,10 +359,13 @@ class Message(BaseModel):
348
359
  if self.reasoning_content:
349
360
  _logger(f"<reasoning>\n{self.reasoning_content}\n</reasoning>")
350
361
  if self.content:
351
- if isinstance(self.content, str) or isinstance(self.content, list):
352
- _logger(self.content)
353
- elif isinstance(self.content, dict):
354
- _logger(json.dumps(self.content, indent=2))
362
+ if use_compressed_content and self.compressed_content:
363
+ _logger("Compressed content:\n" + self.compressed_content)
364
+ else:
365
+ if isinstance(self.content, str) or isinstance(self.content, list):
366
+ _logger(self.content)
367
+ elif isinstance(self.content, dict):
368
+ _logger(json.dumps(self.content, indent=2))
355
369
  if self.tool_calls:
356
370
  tool_calls_list = ["Tool Calls:"]
357
371
  for tool_call in self.tool_calls:
agno/models/meta/llama.py CHANGED
@@ -217,6 +217,7 @@ class Llama(Model):
217
217
  tools: Optional[List[Dict[str, Any]]] = None,
218
218
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
219
219
  run_response: Optional[RunOutput] = None,
220
+ compress_tool_results: bool = False,
220
221
  ) -> ModelResponse:
221
222
  """
222
223
  Send a chat completion request to the Llama API.
@@ -225,7 +226,10 @@ class Llama(Model):
225
226
 
226
227
  provider_response = self.get_client().chat.completions.create(
227
228
  model=self.id,
228
- messages=[format_message(m, tool_calls=bool(tools)) for m in messages], # type: ignore
229
+ messages=[
230
+ format_message(m, tool_calls=bool(tools), compress_tool_results=compress_tool_results) # type: ignore
231
+ for m in messages
232
+ ],
229
233
  **self.get_request_params(tools=tools, response_format=response_format),
230
234
  )
231
235
 
@@ -242,6 +246,7 @@ class Llama(Model):
242
246
  tools: Optional[List[Dict[str, Any]]] = None,
243
247
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
244
248
  run_response: Optional[RunOutput] = None,
249
+ compress_tool_results: bool = False,
245
250
  ) -> ModelResponse:
246
251
  """
247
252
  Sends an asynchronous chat completion request to the Llama API.
@@ -253,7 +258,10 @@ class Llama(Model):
253
258
 
254
259
  provider_response = await self.get_async_client().chat.completions.create(
255
260
  model=self.id,
256
- messages=[format_message(m, tool_calls=bool(tools)) for m in messages], # type: ignore
261
+ messages=[
262
+ format_message(m, tool_calls=bool(tools), compress_tool_results=compress_tool_results) # type: ignore
263
+ for m in messages
264
+ ],
257
265
  **self.get_request_params(tools=tools, response_format=response_format),
258
266
  )
259
267
 
@@ -270,6 +278,7 @@ class Llama(Model):
270
278
  tools: Optional[List[Dict[str, Any]]] = None,
271
279
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
272
280
  run_response: Optional[RunOutput] = None,
281
+ compress_tool_results: bool = False,
273
282
  ) -> Iterator[ModelResponse]:
274
283
  """
275
284
  Send a streaming chat completion request to the Llama API.
@@ -282,7 +291,10 @@ class Llama(Model):
282
291
 
283
292
  for chunk in self.get_client().chat.completions.create(
284
293
  model=self.id,
285
- messages=[format_message(m, tool_calls=bool(tools)) for m in messages], # type: ignore
294
+ messages=[
295
+ format_message(m, tool_calls=bool(tools), compress_tool_results=compress_tool_results) # type: ignore
296
+ for m in messages
297
+ ],
286
298
  stream=True,
287
299
  **self.get_request_params(tools=tools, response_format=response_format),
288
300
  ):
@@ -302,6 +314,7 @@ class Llama(Model):
302
314
  tools: Optional[List[Dict[str, Any]]] = None,
303
315
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
304
316
  run_response: Optional[RunOutput] = None,
317
+ compress_tool_results: bool = False,
305
318
  ) -> AsyncIterator[ModelResponse]:
306
319
  """
307
320
  Sends an asynchronous streaming chat completion request to the Llama API.
@@ -314,7 +327,10 @@ class Llama(Model):
314
327
  try:
315
328
  async for chunk in await self.get_async_client().chat.completions.create(
316
329
  model=self.id,
317
- messages=[format_message(m, tool_calls=bool(tools)) for m in messages], # type: ignore
330
+ messages=[
331
+ format_message(m, tool_calls=bool(tools), compress_tool_results=compress_tool_results) # type: ignore
332
+ for m in messages
333
+ ],
318
334
  stream=True,
319
335
  **self.get_request_params(tools=tools, response_format=response_format),
320
336
  ):
@@ -174,11 +174,12 @@ class MistralChat(Model):
174
174
  tools: Optional[List[Dict[str, Any]]] = None,
175
175
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
176
176
  run_response: Optional[RunOutput] = None,
177
+ compress_tool_results: bool = False,
177
178
  ) -> ModelResponse:
178
179
  """
179
180
  Send a chat completion request to the Mistral model.
180
181
  """
181
- mistral_messages = format_messages(messages)
182
+ mistral_messages = format_messages(messages, compress_tool_results)
182
183
  try:
183
184
  response: Union[ChatCompletionResponse, ParsedChatCompletionResponse]
184
185
  if (
@@ -229,11 +230,12 @@ class MistralChat(Model):
229
230
  tools: Optional[List[Dict[str, Any]]] = None,
230
231
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
231
232
  run_response: Optional[RunOutput] = None,
233
+ compress_tool_results: bool = False,
232
234
  ) -> Iterator[ModelResponse]:
233
235
  """
234
236
  Stream the response from the Mistral model.
235
237
  """
236
- mistral_messages = format_messages(messages)
238
+ mistral_messages = format_messages(messages, compress_tool_results)
237
239
 
238
240
  if run_response and run_response.metrics:
239
241
  run_response.metrics.set_time_to_first_token()
@@ -265,11 +267,12 @@ class MistralChat(Model):
265
267
  tools: Optional[List[Dict[str, Any]]] = None,
266
268
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
267
269
  run_response: Optional[RunOutput] = None,
270
+ compress_tool_results: bool = False,
268
271
  ) -> ModelResponse:
269
272
  """
270
273
  Send an asynchronous chat completion request to the Mistral API.
271
274
  """
272
- mistral_messages = format_messages(messages)
275
+ mistral_messages = format_messages(messages, compress_tool_results)
273
276
  try:
274
277
  response: Union[ChatCompletionResponse, ParsedChatCompletionResponse]
275
278
  if (
@@ -316,11 +319,12 @@ class MistralChat(Model):
316
319
  tools: Optional[List[Dict[str, Any]]] = None,
317
320
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
318
321
  run_response: Optional[RunOutput] = None,
322
+ compress_tool_results: bool = False,
319
323
  ) -> AsyncIterator[ModelResponse]:
320
324
  """
321
325
  Stream an asynchronous response from the Mistral API.
322
326
  """
323
- mistral_messages = format_messages(messages)
327
+ mistral_messages = format_messages(messages, compress_tool_results)
324
328
  try:
325
329
  if run_response and run_response.metrics:
326
330
  run_response.metrics.set_time_to_first_token()
@@ -147,19 +147,26 @@ class Ollama(Model):
147
147
  cleaned_dict = {k: v for k, v in model_dict.items() if v is not None}
148
148
  return cleaned_dict
149
149
 
150
- def _format_message(self, message: Message) -> Dict[str, Any]:
150
+ def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
151
151
  """
152
152
  Format a message into the format expected by Ollama.
153
153
 
154
154
  Args:
155
155
  message (Message): The message to format.
156
+ compress_tool_results: Whether to compress tool results.
156
157
 
157
158
  Returns:
158
159
  Dict[str, Any]: The formatted message.
159
160
  """
161
+ # Use compressed content for tool messages if compression is active
162
+ if message.role == "tool":
163
+ content = message.get_content(use_compressed_content=compress_tool_results)
164
+ else:
165
+ content = message.content
166
+
160
167
  _message: Dict[str, Any] = {
161
168
  "role": message.role,
162
- "content": message.content,
169
+ "content": content,
163
170
  }
164
171
 
165
172
  if message.role == "assistant" and message.tool_calls is not None:
@@ -228,6 +235,7 @@ class Ollama(Model):
228
235
  tools: Optional[List[Dict[str, Any]]] = None,
229
236
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
230
237
  run_response: Optional[RunOutput] = None,
238
+ compress_tool_results: bool = False,
231
239
  ) -> ModelResponse:
232
240
  """
233
241
  Send a chat request to the Ollama API.
@@ -241,7 +249,7 @@ class Ollama(Model):
241
249
 
242
250
  provider_response = self.get_client().chat(
243
251
  model=self.id.strip(),
244
- messages=[self._format_message(m) for m in messages], # type: ignore
252
+ messages=[self._format_message(m, compress_tool_results) for m in messages], # type: ignore
245
253
  **request_kwargs,
246
254
  ) # type: ignore
247
255
 
@@ -258,6 +266,7 @@ class Ollama(Model):
258
266
  tools: Optional[List[Dict[str, Any]]] = None,
259
267
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
260
268
  run_response: Optional[RunOutput] = None,
269
+ compress_tool_results: bool = False,
261
270
  ) -> ModelResponse:
262
271
  """
263
272
  Sends an asynchronous chat request to the Ollama API.
@@ -271,7 +280,7 @@ class Ollama(Model):
271
280
 
272
281
  provider_response = await self.get_async_client().chat(
273
282
  model=self.id.strip(),
274
- messages=[self._format_message(m) for m in messages], # type: ignore
283
+ messages=[self._format_message(m, compress_tool_results) for m in messages], # type: ignore
275
284
  **request_kwargs,
276
285
  ) # type: ignore
277
286
 
@@ -288,6 +297,7 @@ class Ollama(Model):
288
297
  tools: Optional[List[Dict[str, Any]]] = None,
289
298
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
290
299
  run_response: Optional[RunOutput] = None,
300
+ compress_tool_results: bool = False,
291
301
  ) -> Iterator[ModelResponse]:
292
302
  """
293
303
  Sends a streaming chat request to the Ollama API.
@@ -299,7 +309,7 @@ class Ollama(Model):
299
309
 
300
310
  for chunk in self.get_client().chat(
301
311
  model=self.id,
302
- messages=[self._format_message(m) for m in messages], # type: ignore
312
+ messages=[self._format_message(m, compress_tool_results) for m in messages], # type: ignore
303
313
  stream=True,
304
314
  **self.get_request_params(tools=tools),
305
315
  ):
@@ -315,6 +325,7 @@ class Ollama(Model):
315
325
  tools: Optional[List[Dict[str, Any]]] = None,
316
326
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
317
327
  run_response: Optional[RunOutput] = None,
328
+ compress_tool_results: bool = False,
318
329
  ) -> AsyncIterator[ModelResponse]:
319
330
  """
320
331
  Sends an asynchronous streaming chat completion request to the Ollama API.
@@ -326,7 +337,7 @@ class Ollama(Model):
326
337
 
327
338
  async for chunk in await self.get_async_client().chat(
328
339
  model=self.id.strip(),
329
- messages=[self._format_message(m) for m in messages], # type: ignore
340
+ messages=[self._format_message(m, compress_tool_results) for m in messages], # type: ignore
330
341
  stream=True,
331
342
  **self.get_request_params(tools=tools),
332
343
  ):