agno 2.3.1__py3-none-any.whl → 2.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +514 -186
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +176 -0
- agno/db/dynamo/dynamo.py +11 -0
- agno/db/firestore/firestore.py +5 -1
- agno/db/gcs_json/gcs_json_db.py +5 -2
- agno/db/in_memory/in_memory_db.py +5 -2
- agno/db/json/json_db.py +5 -1
- agno/db/migrations/manager.py +4 -4
- agno/db/mongo/async_mongo.py +158 -34
- agno/db/mongo/mongo.py +6 -2
- agno/db/mysql/mysql.py +48 -54
- agno/db/postgres/async_postgres.py +61 -51
- agno/db/postgres/postgres.py +42 -50
- agno/db/redis/redis.py +5 -0
- agno/db/redis/utils.py +5 -5
- agno/db/schemas/memory.py +7 -5
- agno/db/singlestore/singlestore.py +99 -108
- agno/db/sqlite/async_sqlite.py +32 -30
- agno/db/sqlite/sqlite.py +34 -30
- agno/knowledge/reader/pdf_reader.py +2 -2
- agno/knowledge/reader/tavily_reader.py +0 -1
- agno/memory/__init__.py +14 -1
- agno/memory/manager.py +223 -8
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +67 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/anthropic/claude.py +84 -80
- agno/models/aws/bedrock.py +38 -16
- agno/models/aws/claude.py +97 -277
- agno/models/azure/ai_foundry.py +8 -4
- agno/models/base.py +101 -14
- agno/models/cerebras/cerebras.py +18 -7
- agno/models/cerebras/cerebras_openai.py +4 -2
- agno/models/cohere/chat.py +8 -4
- agno/models/google/gemini.py +578 -20
- agno/models/groq/groq.py +18 -5
- agno/models/huggingface/huggingface.py +17 -6
- agno/models/ibm/watsonx.py +16 -6
- agno/models/litellm/chat.py +17 -7
- agno/models/message.py +19 -5
- agno/models/meta/llama.py +20 -4
- agno/models/mistral/mistral.py +8 -4
- agno/models/ollama/chat.py +17 -6
- agno/models/openai/chat.py +17 -6
- agno/models/openai/responses.py +23 -9
- agno/models/vertexai/claude.py +99 -5
- agno/os/interfaces/agui/router.py +1 -0
- agno/os/interfaces/agui/utils.py +97 -57
- agno/os/router.py +16 -1
- agno/os/routers/memory/memory.py +146 -0
- agno/os/routers/memory/schemas.py +26 -0
- agno/os/schema.py +21 -6
- agno/os/utils.py +134 -10
- agno/run/base.py +2 -1
- agno/run/workflow.py +1 -1
- agno/team/team.py +571 -225
- agno/tools/mcp/mcp.py +1 -1
- agno/utils/agent.py +119 -1
- agno/utils/dttm.py +33 -0
- agno/utils/models/ai_foundry.py +9 -2
- agno/utils/models/claude.py +12 -5
- agno/utils/models/cohere.py +9 -2
- agno/utils/models/llama.py +9 -2
- agno/utils/models/mistral.py +4 -2
- agno/utils/print_response/agent.py +37 -2
- agno/utils/print_response/team.py +52 -0
- agno/utils/tokens.py +41 -0
- agno/workflow/types.py +2 -2
- {agno-2.3.1.dist-info → agno-2.3.3.dist-info}/METADATA +45 -40
- {agno-2.3.1.dist-info → agno-2.3.3.dist-info}/RECORD +75 -68
- {agno-2.3.1.dist-info → agno-2.3.3.dist-info}/WHEEL +0 -0
- {agno-2.3.1.dist-info → agno-2.3.3.dist-info}/licenses/LICENSE +0 -0
- {agno-2.3.1.dist-info → agno-2.3.3.dist-info}/top_level.txt +0 -0
agno/models/groq/groq.py
CHANGED
|
@@ -221,19 +221,28 @@ class Groq(Model):
|
|
|
221
221
|
self,
|
|
222
222
|
message: Message,
|
|
223
223
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
224
|
+
compress_tool_results: bool = False,
|
|
224
225
|
) -> Dict[str, Any]:
|
|
225
226
|
"""
|
|
226
227
|
Format a message into the format expected by Groq.
|
|
227
228
|
|
|
228
229
|
Args:
|
|
229
230
|
message (Message): The message to format.
|
|
231
|
+
response_format: Optional response format specification.
|
|
232
|
+
compress_tool_results: Whether to compress tool results.
|
|
230
233
|
|
|
231
234
|
Returns:
|
|
232
235
|
Dict[str, Any]: The formatted message.
|
|
233
236
|
"""
|
|
237
|
+
# Use compressed content for tool messages if compression is active
|
|
238
|
+
if message.role == "tool":
|
|
239
|
+
content = message.get_content(use_compressed_content=compress_tool_results)
|
|
240
|
+
else:
|
|
241
|
+
content = message.content
|
|
242
|
+
|
|
234
243
|
message_dict: Dict[str, Any] = {
|
|
235
244
|
"role": message.role,
|
|
236
|
-
"content":
|
|
245
|
+
"content": content,
|
|
237
246
|
"name": message.name,
|
|
238
247
|
"tool_call_id": message.tool_call_id,
|
|
239
248
|
"tool_calls": message.tool_calls,
|
|
@@ -276,6 +285,7 @@ class Groq(Model):
|
|
|
276
285
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
277
286
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
278
287
|
run_response: Optional[RunOutput] = None,
|
|
288
|
+
compress_tool_results: bool = False,
|
|
279
289
|
) -> ModelResponse:
|
|
280
290
|
"""
|
|
281
291
|
Send a chat completion request to the Groq API.
|
|
@@ -287,7 +297,7 @@ class Groq(Model):
|
|
|
287
297
|
assistant_message.metrics.start_timer()
|
|
288
298
|
provider_response = self.get_client().chat.completions.create(
|
|
289
299
|
model=self.id,
|
|
290
|
-
messages=[self.format_message(m) for m in messages], # type: ignore
|
|
300
|
+
messages=[self.format_message(m, response_format, compress_tool_results) for m in messages], # type: ignore
|
|
291
301
|
**self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
|
|
292
302
|
)
|
|
293
303
|
assistant_message.metrics.stop_timer()
|
|
@@ -316,6 +326,7 @@ class Groq(Model):
|
|
|
316
326
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
317
327
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
318
328
|
run_response: Optional[RunOutput] = None,
|
|
329
|
+
compress_tool_results: bool = False,
|
|
319
330
|
) -> ModelResponse:
|
|
320
331
|
"""
|
|
321
332
|
Sends an asynchronous chat completion request to the Groq API.
|
|
@@ -327,7 +338,7 @@ class Groq(Model):
|
|
|
327
338
|
assistant_message.metrics.start_timer()
|
|
328
339
|
response = await self.get_async_client().chat.completions.create(
|
|
329
340
|
model=self.id,
|
|
330
|
-
messages=[self.format_message(m) for m in messages], # type: ignore
|
|
341
|
+
messages=[self.format_message(m, response_format, compress_tool_results) for m in messages], # type: ignore
|
|
331
342
|
**self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
|
|
332
343
|
)
|
|
333
344
|
assistant_message.metrics.stop_timer()
|
|
@@ -356,6 +367,7 @@ class Groq(Model):
|
|
|
356
367
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
357
368
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
358
369
|
run_response: Optional[RunOutput] = None,
|
|
370
|
+
compress_tool_results: bool = False,
|
|
359
371
|
) -> Iterator[ModelResponse]:
|
|
360
372
|
"""
|
|
361
373
|
Send a streaming chat completion request to the Groq API.
|
|
@@ -368,7 +380,7 @@ class Groq(Model):
|
|
|
368
380
|
|
|
369
381
|
for chunk in self.get_client().chat.completions.create(
|
|
370
382
|
model=self.id,
|
|
371
|
-
messages=[self.format_message(m) for m in messages], # type: ignore
|
|
383
|
+
messages=[self.format_message(m, response_format, compress_tool_results) for m in messages], # type: ignore
|
|
372
384
|
stream=True,
|
|
373
385
|
**self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
|
|
374
386
|
):
|
|
@@ -396,6 +408,7 @@ class Groq(Model):
|
|
|
396
408
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
397
409
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
398
410
|
run_response: Optional[RunOutput] = None,
|
|
411
|
+
compress_tool_results: bool = False,
|
|
399
412
|
) -> AsyncIterator[ModelResponse]:
|
|
400
413
|
"""
|
|
401
414
|
Sends an asynchronous streaming chat completion request to the Groq API.
|
|
@@ -409,7 +422,7 @@ class Groq(Model):
|
|
|
409
422
|
|
|
410
423
|
async_stream = await self.get_async_client().chat.completions.create(
|
|
411
424
|
model=self.id,
|
|
412
|
-
messages=[self.format_message(m) for m in messages], # type: ignore
|
|
425
|
+
messages=[self.format_message(m, response_format, compress_tool_results) for m in messages], # type: ignore
|
|
413
426
|
stream=True,
|
|
414
427
|
**self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
|
|
415
428
|
)
|
|
@@ -191,19 +191,26 @@ class HuggingFace(Model):
|
|
|
191
191
|
cleaned_dict = {k: v for k, v in _dict.items() if v is not None}
|
|
192
192
|
return cleaned_dict
|
|
193
193
|
|
|
194
|
-
def _format_message(self, message: Message) -> Dict[str, Any]:
|
|
194
|
+
def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
|
|
195
195
|
"""
|
|
196
196
|
Format a message into the format expected by HuggingFace.
|
|
197
197
|
|
|
198
198
|
Args:
|
|
199
199
|
message (Message): The message to format.
|
|
200
|
+
compress_tool_results: Whether to compress tool results.
|
|
200
201
|
|
|
201
202
|
Returns:
|
|
202
203
|
Dict[str, Any]: The formatted message.
|
|
203
204
|
"""
|
|
205
|
+
# Use compressed content for tool messages if compression is active
|
|
206
|
+
if message.role == "tool":
|
|
207
|
+
content = message.get_content(use_compressed_content=compress_tool_results)
|
|
208
|
+
else:
|
|
209
|
+
content = message.content if message.content is not None else ""
|
|
210
|
+
|
|
204
211
|
message_dict: Dict[str, Any] = {
|
|
205
212
|
"role": message.role,
|
|
206
|
-
"content":
|
|
213
|
+
"content": content,
|
|
207
214
|
"name": message.name or message.tool_name,
|
|
208
215
|
"tool_call_id": message.tool_call_id,
|
|
209
216
|
"tool_calls": message.tool_calls,
|
|
@@ -236,6 +243,7 @@ class HuggingFace(Model):
|
|
|
236
243
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
237
244
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
238
245
|
run_response: Optional[RunOutput] = None,
|
|
246
|
+
compress_tool_results: bool = False,
|
|
239
247
|
) -> ModelResponse:
|
|
240
248
|
"""
|
|
241
249
|
Send a chat completion request to the HuggingFace Hub.
|
|
@@ -247,7 +255,7 @@ class HuggingFace(Model):
|
|
|
247
255
|
assistant_message.metrics.start_timer()
|
|
248
256
|
provider_response = self.get_client().chat.completions.create(
|
|
249
257
|
model=self.id,
|
|
250
|
-
messages=[self._format_message(m) for m in messages],
|
|
258
|
+
messages=[self._format_message(m, compress_tool_results) for m in messages],
|
|
251
259
|
**self.get_request_params(tools=tools, tool_choice=tool_choice),
|
|
252
260
|
)
|
|
253
261
|
assistant_message.metrics.stop_timer()
|
|
@@ -269,6 +277,7 @@ class HuggingFace(Model):
|
|
|
269
277
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
270
278
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
271
279
|
run_response: Optional[RunOutput] = None,
|
|
280
|
+
compress_tool_results: bool = False,
|
|
272
281
|
) -> ModelResponse:
|
|
273
282
|
"""
|
|
274
283
|
Sends an asynchronous chat completion request to the HuggingFace Hub Inference.
|
|
@@ -280,7 +289,7 @@ class HuggingFace(Model):
|
|
|
280
289
|
assistant_message.metrics.start_timer()
|
|
281
290
|
provider_response = await self.get_async_client().chat.completions.create(
|
|
282
291
|
model=self.id,
|
|
283
|
-
messages=[self._format_message(m) for m in messages],
|
|
292
|
+
messages=[self._format_message(m, compress_tool_results) for m in messages],
|
|
284
293
|
**self.get_request_params(tools=tools, tool_choice=tool_choice),
|
|
285
294
|
)
|
|
286
295
|
assistant_message.metrics.stop_timer()
|
|
@@ -302,6 +311,7 @@ class HuggingFace(Model):
|
|
|
302
311
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
303
312
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
304
313
|
run_response: Optional[RunOutput] = None,
|
|
314
|
+
compress_tool_results: bool = False,
|
|
305
315
|
) -> Iterator[ModelResponse]:
|
|
306
316
|
"""
|
|
307
317
|
Send a streaming chat completion request to the HuggingFace API.
|
|
@@ -314,7 +324,7 @@ class HuggingFace(Model):
|
|
|
314
324
|
|
|
315
325
|
stream = self.get_client().chat.completions.create(
|
|
316
326
|
model=self.id,
|
|
317
|
-
messages=[self._format_message(m) for m in messages],
|
|
327
|
+
messages=[self._format_message(m, compress_tool_results) for m in messages],
|
|
318
328
|
stream=True,
|
|
319
329
|
stream_options=ChatCompletionInputStreamOptions(include_usage=True), # type: ignore
|
|
320
330
|
**self.get_request_params(tools=tools, tool_choice=tool_choice),
|
|
@@ -340,6 +350,7 @@ class HuggingFace(Model):
|
|
|
340
350
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
341
351
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
342
352
|
run_response: Optional[RunOutput] = None,
|
|
353
|
+
compress_tool_results: bool = False,
|
|
343
354
|
) -> AsyncIterator[Any]:
|
|
344
355
|
"""
|
|
345
356
|
Sends an asynchronous streaming chat completion request to the HuggingFace API.
|
|
@@ -351,7 +362,7 @@ class HuggingFace(Model):
|
|
|
351
362
|
assistant_message.metrics.start_timer()
|
|
352
363
|
provider_response = await self.get_async_client().chat.completions.create(
|
|
353
364
|
model=self.id,
|
|
354
|
-
messages=[self._format_message(m) for m in messages],
|
|
365
|
+
messages=[self._format_message(m, compress_tool_results) for m in messages],
|
|
355
366
|
stream=True,
|
|
356
367
|
stream_options=ChatCompletionInputStreamOptions(include_usage=True), # type: ignore
|
|
357
368
|
**self.get_request_params(tools=tools, tool_choice=tool_choice),
|
agno/models/ibm/watsonx.py
CHANGED
|
@@ -129,12 +129,13 @@ class WatsonX(Model):
|
|
|
129
129
|
log_debug(f"Calling {self.provider} with request parameters: {request_params}", log_level=2)
|
|
130
130
|
return request_params
|
|
131
131
|
|
|
132
|
-
def _format_message(self, message: Message) -> Dict[str, Any]:
|
|
132
|
+
def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
|
|
133
133
|
"""
|
|
134
134
|
Format a message into the format expected by WatsonX.
|
|
135
135
|
|
|
136
136
|
Args:
|
|
137
137
|
message (Message): The message to format.
|
|
138
|
+
compress_tool_results: Whether to compress tool results.
|
|
138
139
|
|
|
139
140
|
Returns:
|
|
140
141
|
Dict[str, Any]: The formatted message.
|
|
@@ -151,7 +152,12 @@ class WatsonX(Model):
|
|
|
151
152
|
if message.videos is not None and len(message.videos) > 0:
|
|
152
153
|
log_warning("Video input is currently unsupported.")
|
|
153
154
|
|
|
154
|
-
|
|
155
|
+
message_dict = message.to_dict()
|
|
156
|
+
|
|
157
|
+
# Use compressed content for tool messages if compression is active
|
|
158
|
+
if message.role == "tool" and compress_tool_results:
|
|
159
|
+
message_dict["content"] = message.get_content(use_compressed_content=True)
|
|
160
|
+
return message_dict
|
|
155
161
|
|
|
156
162
|
def invoke(
|
|
157
163
|
self,
|
|
@@ -161,6 +167,7 @@ class WatsonX(Model):
|
|
|
161
167
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
162
168
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
163
169
|
run_response: Optional[RunOutput] = None,
|
|
170
|
+
compress_tool_results: bool = False,
|
|
164
171
|
) -> ModelResponse:
|
|
165
172
|
"""
|
|
166
173
|
Send a chat completion request to the WatsonX API.
|
|
@@ -171,7 +178,7 @@ class WatsonX(Model):
|
|
|
171
178
|
|
|
172
179
|
client = self.get_client()
|
|
173
180
|
|
|
174
|
-
formatted_messages = [self._format_message(m) for m in messages]
|
|
181
|
+
formatted_messages = [self._format_message(m, compress_tool_results) for m in messages]
|
|
175
182
|
request_params = self.get_request_params(
|
|
176
183
|
response_format=response_format, tools=tools, tool_choice=tool_choice
|
|
177
184
|
)
|
|
@@ -196,6 +203,7 @@ class WatsonX(Model):
|
|
|
196
203
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
197
204
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
198
205
|
run_response: Optional[RunOutput] = None,
|
|
206
|
+
compress_tool_results: bool = False,
|
|
199
207
|
) -> Any:
|
|
200
208
|
"""
|
|
201
209
|
Sends an asynchronous chat completion request to the WatsonX API.
|
|
@@ -205,7 +213,7 @@ class WatsonX(Model):
|
|
|
205
213
|
run_response.metrics.set_time_to_first_token()
|
|
206
214
|
|
|
207
215
|
client = self.get_client()
|
|
208
|
-
formatted_messages = [self._format_message(m) for m in messages]
|
|
216
|
+
formatted_messages = [self._format_message(m, compress_tool_results) for m in messages]
|
|
209
217
|
|
|
210
218
|
request_params = self.get_request_params(
|
|
211
219
|
response_format=response_format, tools=tools, tool_choice=tool_choice
|
|
@@ -231,13 +239,14 @@ class WatsonX(Model):
|
|
|
231
239
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
232
240
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
233
241
|
run_response: Optional[RunOutput] = None,
|
|
242
|
+
compress_tool_results: bool = False,
|
|
234
243
|
) -> Iterator[ModelResponse]:
|
|
235
244
|
"""
|
|
236
245
|
Send a streaming chat completion request to the WatsonX API.
|
|
237
246
|
"""
|
|
238
247
|
try:
|
|
239
248
|
client = self.get_client()
|
|
240
|
-
formatted_messages = [self._format_message(m) for m in messages]
|
|
249
|
+
formatted_messages = [self._format_message(m, compress_tool_results) for m in messages]
|
|
241
250
|
|
|
242
251
|
request_params = self.get_request_params(
|
|
243
252
|
response_format=response_format, tools=tools, tool_choice=tool_choice
|
|
@@ -265,6 +274,7 @@ class WatsonX(Model):
|
|
|
265
274
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
266
275
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
267
276
|
run_response: Optional[RunOutput] = None,
|
|
277
|
+
compress_tool_results: bool = False,
|
|
268
278
|
) -> AsyncIterator[ModelResponse]:
|
|
269
279
|
"""
|
|
270
280
|
Sends an asynchronous streaming chat completion request to the WatsonX API.
|
|
@@ -274,7 +284,7 @@ class WatsonX(Model):
|
|
|
274
284
|
run_response.metrics.set_time_to_first_token()
|
|
275
285
|
|
|
276
286
|
client = self.get_client()
|
|
277
|
-
formatted_messages = [self._format_message(m) for m in messages]
|
|
287
|
+
formatted_messages = [self._format_message(m, compress_tool_results) for m in messages]
|
|
278
288
|
|
|
279
289
|
# Get parameters for chat
|
|
280
290
|
request_params = self.get_request_params(
|
agno/models/litellm/chat.py
CHANGED
|
@@ -74,11 +74,17 @@ class LiteLLM(Model):
|
|
|
74
74
|
self.client = litellm
|
|
75
75
|
return self.client
|
|
76
76
|
|
|
77
|
-
def _format_messages(self, messages: List[Message]) -> List[Dict[str, Any]]:
|
|
77
|
+
def _format_messages(self, messages: List[Message], compress_tool_results: bool = False) -> List[Dict[str, Any]]:
|
|
78
78
|
"""Format messages for LiteLLM API."""
|
|
79
79
|
formatted_messages = []
|
|
80
80
|
for m in messages:
|
|
81
|
-
|
|
81
|
+
# Use compressed content for tool messages if compression is active
|
|
82
|
+
if m.role == "tool":
|
|
83
|
+
content = m.get_content(use_compressed_content=compress_tool_results)
|
|
84
|
+
else:
|
|
85
|
+
content = m.content if m.content is not None else ""
|
|
86
|
+
|
|
87
|
+
msg = {"role": m.role, "content": content}
|
|
82
88
|
|
|
83
89
|
# Handle media
|
|
84
90
|
if (m.images is not None and len(m.images) > 0) or (m.audio is not None and len(m.audio) > 0):
|
|
@@ -98,7 +104,7 @@ class LiteLLM(Model):
|
|
|
98
104
|
if isinstance(msg["content"], str):
|
|
99
105
|
content_list = [{"type": "text", "text": msg["content"]}]
|
|
100
106
|
else:
|
|
101
|
-
content_list = msg["content"]
|
|
107
|
+
content_list = msg["content"] if isinstance(msg["content"], list) else []
|
|
102
108
|
for file in m.files:
|
|
103
109
|
file_part = _format_file_for_message(file)
|
|
104
110
|
if file_part:
|
|
@@ -186,10 +192,11 @@ class LiteLLM(Model):
|
|
|
186
192
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
187
193
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
188
194
|
run_response: Optional[RunOutput] = None,
|
|
195
|
+
compress_tool_results: bool = False,
|
|
189
196
|
) -> ModelResponse:
|
|
190
197
|
"""Sends a chat completion request to the LiteLLM API."""
|
|
191
198
|
completion_kwargs = self.get_request_params(tools=tools)
|
|
192
|
-
completion_kwargs["messages"] = self._format_messages(messages)
|
|
199
|
+
completion_kwargs["messages"] = self._format_messages(messages, compress_tool_results)
|
|
193
200
|
|
|
194
201
|
if run_response and run_response.metrics:
|
|
195
202
|
run_response.metrics.set_time_to_first_token()
|
|
@@ -211,10 +218,11 @@ class LiteLLM(Model):
|
|
|
211
218
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
212
219
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
213
220
|
run_response: Optional[RunOutput] = None,
|
|
221
|
+
compress_tool_results: bool = False,
|
|
214
222
|
) -> Iterator[ModelResponse]:
|
|
215
223
|
"""Sends a streaming chat completion request to the LiteLLM API."""
|
|
216
224
|
completion_kwargs = self.get_request_params(tools=tools)
|
|
217
|
-
completion_kwargs["messages"] = self._format_messages(messages)
|
|
225
|
+
completion_kwargs["messages"] = self._format_messages(messages, compress_tool_results)
|
|
218
226
|
completion_kwargs["stream"] = True
|
|
219
227
|
completion_kwargs["stream_options"] = {"include_usage": True}
|
|
220
228
|
|
|
@@ -236,10 +244,11 @@ class LiteLLM(Model):
|
|
|
236
244
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
237
245
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
238
246
|
run_response: Optional[RunOutput] = None,
|
|
247
|
+
compress_tool_results: bool = False,
|
|
239
248
|
) -> ModelResponse:
|
|
240
249
|
"""Sends an asynchronous chat completion request to the LiteLLM API."""
|
|
241
250
|
completion_kwargs = self.get_request_params(tools=tools)
|
|
242
|
-
completion_kwargs["messages"] = self._format_messages(messages)
|
|
251
|
+
completion_kwargs["messages"] = self._format_messages(messages, compress_tool_results)
|
|
243
252
|
|
|
244
253
|
if run_response and run_response.metrics:
|
|
245
254
|
run_response.metrics.set_time_to_first_token()
|
|
@@ -261,10 +270,11 @@ class LiteLLM(Model):
|
|
|
261
270
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
262
271
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
263
272
|
run_response: Optional[RunOutput] = None,
|
|
273
|
+
compress_tool_results: bool = False,
|
|
264
274
|
) -> AsyncIterator[ModelResponse]:
|
|
265
275
|
"""Sends an asynchronous streaming chat request to the LiteLLM API."""
|
|
266
276
|
completion_kwargs = self.get_request_params(tools=tools)
|
|
267
|
-
completion_kwargs["messages"] = self._format_messages(messages)
|
|
277
|
+
completion_kwargs["messages"] = self._format_messages(messages, compress_tool_results)
|
|
268
278
|
completion_kwargs["stream"] = True
|
|
269
279
|
completion_kwargs["stream_options"] = {"include_usage": True}
|
|
270
280
|
|
agno/models/message.py
CHANGED
|
@@ -59,6 +59,9 @@ class Message(BaseModel):
|
|
|
59
59
|
role: str
|
|
60
60
|
# The contents of the message.
|
|
61
61
|
content: Optional[Union[List[Any], str]] = None
|
|
62
|
+
# Compressed content of the message
|
|
63
|
+
compressed_content: Optional[str] = None
|
|
64
|
+
|
|
62
65
|
# An optional name for the participant.
|
|
63
66
|
# Provides the model information to differentiate between participants of the same role.
|
|
64
67
|
name: Optional[str] = None
|
|
@@ -123,6 +126,12 @@ class Message(BaseModel):
|
|
|
123
126
|
return json.dumps(self.content)
|
|
124
127
|
return ""
|
|
125
128
|
|
|
129
|
+
def get_content(self, use_compressed_content: bool = False) -> Optional[Union[List[Any], str]]:
|
|
130
|
+
"""Return tool result content to send to API"""
|
|
131
|
+
if use_compressed_content and self.compressed_content is not None:
|
|
132
|
+
return self.compressed_content
|
|
133
|
+
return self.content
|
|
134
|
+
|
|
126
135
|
@classmethod
|
|
127
136
|
def from_dict(cls, data: Dict[str, Any]) -> "Message":
|
|
128
137
|
# Handle image reconstruction properly
|
|
@@ -266,6 +275,7 @@ class Message(BaseModel):
|
|
|
266
275
|
"content": self.content,
|
|
267
276
|
"reasoning_content": self.reasoning_content,
|
|
268
277
|
"from_history": self.from_history,
|
|
278
|
+
"compressed_content": self.compressed_content,
|
|
269
279
|
"stop_after_tool_call": self.stop_after_tool_call,
|
|
270
280
|
"role": self.role,
|
|
271
281
|
"name": self.name,
|
|
@@ -315,13 +325,14 @@ class Message(BaseModel):
|
|
|
315
325
|
"created_at": self.created_at,
|
|
316
326
|
}
|
|
317
327
|
|
|
318
|
-
def log(self, metrics: bool = True, level: Optional[str] = None):
|
|
328
|
+
def log(self, metrics: bool = True, level: Optional[str] = None, use_compressed_content: bool = False):
|
|
319
329
|
"""Log the message to the console
|
|
320
330
|
|
|
321
331
|
Args:
|
|
322
332
|
metrics (bool): Whether to log the metrics.
|
|
323
333
|
level (str): The level to log the message at. One of debug, info, warning, or error.
|
|
324
334
|
Defaults to debug.
|
|
335
|
+
use_compressed_content (bool): Whether to use compressed content.
|
|
325
336
|
"""
|
|
326
337
|
_logger = log_debug
|
|
327
338
|
if level == "info":
|
|
@@ -348,10 +359,13 @@ class Message(BaseModel):
|
|
|
348
359
|
if self.reasoning_content:
|
|
349
360
|
_logger(f"<reasoning>\n{self.reasoning_content}\n</reasoning>")
|
|
350
361
|
if self.content:
|
|
351
|
-
if
|
|
352
|
-
_logger(self.
|
|
353
|
-
|
|
354
|
-
|
|
362
|
+
if use_compressed_content and self.compressed_content:
|
|
363
|
+
_logger("Compressed content:\n" + self.compressed_content)
|
|
364
|
+
else:
|
|
365
|
+
if isinstance(self.content, str) or isinstance(self.content, list):
|
|
366
|
+
_logger(self.content)
|
|
367
|
+
elif isinstance(self.content, dict):
|
|
368
|
+
_logger(json.dumps(self.content, indent=2))
|
|
355
369
|
if self.tool_calls:
|
|
356
370
|
tool_calls_list = ["Tool Calls:"]
|
|
357
371
|
for tool_call in self.tool_calls:
|
agno/models/meta/llama.py
CHANGED
|
@@ -217,6 +217,7 @@ class Llama(Model):
|
|
|
217
217
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
218
218
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
219
219
|
run_response: Optional[RunOutput] = None,
|
|
220
|
+
compress_tool_results: bool = False,
|
|
220
221
|
) -> ModelResponse:
|
|
221
222
|
"""
|
|
222
223
|
Send a chat completion request to the Llama API.
|
|
@@ -225,7 +226,10 @@ class Llama(Model):
|
|
|
225
226
|
|
|
226
227
|
provider_response = self.get_client().chat.completions.create(
|
|
227
228
|
model=self.id,
|
|
228
|
-
messages=[
|
|
229
|
+
messages=[
|
|
230
|
+
format_message(m, tool_calls=bool(tools), compress_tool_results=compress_tool_results) # type: ignore
|
|
231
|
+
for m in messages
|
|
232
|
+
],
|
|
229
233
|
**self.get_request_params(tools=tools, response_format=response_format),
|
|
230
234
|
)
|
|
231
235
|
|
|
@@ -242,6 +246,7 @@ class Llama(Model):
|
|
|
242
246
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
243
247
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
244
248
|
run_response: Optional[RunOutput] = None,
|
|
249
|
+
compress_tool_results: bool = False,
|
|
245
250
|
) -> ModelResponse:
|
|
246
251
|
"""
|
|
247
252
|
Sends an asynchronous chat completion request to the Llama API.
|
|
@@ -253,7 +258,10 @@ class Llama(Model):
|
|
|
253
258
|
|
|
254
259
|
provider_response = await self.get_async_client().chat.completions.create(
|
|
255
260
|
model=self.id,
|
|
256
|
-
messages=[
|
|
261
|
+
messages=[
|
|
262
|
+
format_message(m, tool_calls=bool(tools), compress_tool_results=compress_tool_results) # type: ignore
|
|
263
|
+
for m in messages
|
|
264
|
+
],
|
|
257
265
|
**self.get_request_params(tools=tools, response_format=response_format),
|
|
258
266
|
)
|
|
259
267
|
|
|
@@ -270,6 +278,7 @@ class Llama(Model):
|
|
|
270
278
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
271
279
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
272
280
|
run_response: Optional[RunOutput] = None,
|
|
281
|
+
compress_tool_results: bool = False,
|
|
273
282
|
) -> Iterator[ModelResponse]:
|
|
274
283
|
"""
|
|
275
284
|
Send a streaming chat completion request to the Llama API.
|
|
@@ -282,7 +291,10 @@ class Llama(Model):
|
|
|
282
291
|
|
|
283
292
|
for chunk in self.get_client().chat.completions.create(
|
|
284
293
|
model=self.id,
|
|
285
|
-
messages=[
|
|
294
|
+
messages=[
|
|
295
|
+
format_message(m, tool_calls=bool(tools), compress_tool_results=compress_tool_results) # type: ignore
|
|
296
|
+
for m in messages
|
|
297
|
+
],
|
|
286
298
|
stream=True,
|
|
287
299
|
**self.get_request_params(tools=tools, response_format=response_format),
|
|
288
300
|
):
|
|
@@ -302,6 +314,7 @@ class Llama(Model):
|
|
|
302
314
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
303
315
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
304
316
|
run_response: Optional[RunOutput] = None,
|
|
317
|
+
compress_tool_results: bool = False,
|
|
305
318
|
) -> AsyncIterator[ModelResponse]:
|
|
306
319
|
"""
|
|
307
320
|
Sends an asynchronous streaming chat completion request to the Llama API.
|
|
@@ -314,7 +327,10 @@ class Llama(Model):
|
|
|
314
327
|
try:
|
|
315
328
|
async for chunk in await self.get_async_client().chat.completions.create(
|
|
316
329
|
model=self.id,
|
|
317
|
-
messages=[
|
|
330
|
+
messages=[
|
|
331
|
+
format_message(m, tool_calls=bool(tools), compress_tool_results=compress_tool_results) # type: ignore
|
|
332
|
+
for m in messages
|
|
333
|
+
],
|
|
318
334
|
stream=True,
|
|
319
335
|
**self.get_request_params(tools=tools, response_format=response_format),
|
|
320
336
|
):
|
agno/models/mistral/mistral.py
CHANGED
|
@@ -174,11 +174,12 @@ class MistralChat(Model):
|
|
|
174
174
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
175
175
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
176
176
|
run_response: Optional[RunOutput] = None,
|
|
177
|
+
compress_tool_results: bool = False,
|
|
177
178
|
) -> ModelResponse:
|
|
178
179
|
"""
|
|
179
180
|
Send a chat completion request to the Mistral model.
|
|
180
181
|
"""
|
|
181
|
-
mistral_messages = format_messages(messages)
|
|
182
|
+
mistral_messages = format_messages(messages, compress_tool_results)
|
|
182
183
|
try:
|
|
183
184
|
response: Union[ChatCompletionResponse, ParsedChatCompletionResponse]
|
|
184
185
|
if (
|
|
@@ -229,11 +230,12 @@ class MistralChat(Model):
|
|
|
229
230
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
230
231
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
231
232
|
run_response: Optional[RunOutput] = None,
|
|
233
|
+
compress_tool_results: bool = False,
|
|
232
234
|
) -> Iterator[ModelResponse]:
|
|
233
235
|
"""
|
|
234
236
|
Stream the response from the Mistral model.
|
|
235
237
|
"""
|
|
236
|
-
mistral_messages = format_messages(messages)
|
|
238
|
+
mistral_messages = format_messages(messages, compress_tool_results)
|
|
237
239
|
|
|
238
240
|
if run_response and run_response.metrics:
|
|
239
241
|
run_response.metrics.set_time_to_first_token()
|
|
@@ -265,11 +267,12 @@ class MistralChat(Model):
|
|
|
265
267
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
266
268
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
267
269
|
run_response: Optional[RunOutput] = None,
|
|
270
|
+
compress_tool_results: bool = False,
|
|
268
271
|
) -> ModelResponse:
|
|
269
272
|
"""
|
|
270
273
|
Send an asynchronous chat completion request to the Mistral API.
|
|
271
274
|
"""
|
|
272
|
-
mistral_messages = format_messages(messages)
|
|
275
|
+
mistral_messages = format_messages(messages, compress_tool_results)
|
|
273
276
|
try:
|
|
274
277
|
response: Union[ChatCompletionResponse, ParsedChatCompletionResponse]
|
|
275
278
|
if (
|
|
@@ -316,11 +319,12 @@ class MistralChat(Model):
|
|
|
316
319
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
317
320
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
318
321
|
run_response: Optional[RunOutput] = None,
|
|
322
|
+
compress_tool_results: bool = False,
|
|
319
323
|
) -> AsyncIterator[ModelResponse]:
|
|
320
324
|
"""
|
|
321
325
|
Stream an asynchronous response from the Mistral API.
|
|
322
326
|
"""
|
|
323
|
-
mistral_messages = format_messages(messages)
|
|
327
|
+
mistral_messages = format_messages(messages, compress_tool_results)
|
|
324
328
|
try:
|
|
325
329
|
if run_response and run_response.metrics:
|
|
326
330
|
run_response.metrics.set_time_to_first_token()
|
agno/models/ollama/chat.py
CHANGED
|
@@ -147,19 +147,26 @@ class Ollama(Model):
|
|
|
147
147
|
cleaned_dict = {k: v for k, v in model_dict.items() if v is not None}
|
|
148
148
|
return cleaned_dict
|
|
149
149
|
|
|
150
|
-
def _format_message(self, message: Message) -> Dict[str, Any]:
|
|
150
|
+
def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
|
|
151
151
|
"""
|
|
152
152
|
Format a message into the format expected by Ollama.
|
|
153
153
|
|
|
154
154
|
Args:
|
|
155
155
|
message (Message): The message to format.
|
|
156
|
+
compress_tool_results: Whether to compress tool results.
|
|
156
157
|
|
|
157
158
|
Returns:
|
|
158
159
|
Dict[str, Any]: The formatted message.
|
|
159
160
|
"""
|
|
161
|
+
# Use compressed content for tool messages if compression is active
|
|
162
|
+
if message.role == "tool":
|
|
163
|
+
content = message.get_content(use_compressed_content=compress_tool_results)
|
|
164
|
+
else:
|
|
165
|
+
content = message.content
|
|
166
|
+
|
|
160
167
|
_message: Dict[str, Any] = {
|
|
161
168
|
"role": message.role,
|
|
162
|
-
"content":
|
|
169
|
+
"content": content,
|
|
163
170
|
}
|
|
164
171
|
|
|
165
172
|
if message.role == "assistant" and message.tool_calls is not None:
|
|
@@ -228,6 +235,7 @@ class Ollama(Model):
|
|
|
228
235
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
229
236
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
230
237
|
run_response: Optional[RunOutput] = None,
|
|
238
|
+
compress_tool_results: bool = False,
|
|
231
239
|
) -> ModelResponse:
|
|
232
240
|
"""
|
|
233
241
|
Send a chat request to the Ollama API.
|
|
@@ -241,7 +249,7 @@ class Ollama(Model):
|
|
|
241
249
|
|
|
242
250
|
provider_response = self.get_client().chat(
|
|
243
251
|
model=self.id.strip(),
|
|
244
|
-
messages=[self._format_message(m) for m in messages], # type: ignore
|
|
252
|
+
messages=[self._format_message(m, compress_tool_results) for m in messages], # type: ignore
|
|
245
253
|
**request_kwargs,
|
|
246
254
|
) # type: ignore
|
|
247
255
|
|
|
@@ -258,6 +266,7 @@ class Ollama(Model):
|
|
|
258
266
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
259
267
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
260
268
|
run_response: Optional[RunOutput] = None,
|
|
269
|
+
compress_tool_results: bool = False,
|
|
261
270
|
) -> ModelResponse:
|
|
262
271
|
"""
|
|
263
272
|
Sends an asynchronous chat request to the Ollama API.
|
|
@@ -271,7 +280,7 @@ class Ollama(Model):
|
|
|
271
280
|
|
|
272
281
|
provider_response = await self.get_async_client().chat(
|
|
273
282
|
model=self.id.strip(),
|
|
274
|
-
messages=[self._format_message(m) for m in messages], # type: ignore
|
|
283
|
+
messages=[self._format_message(m, compress_tool_results) for m in messages], # type: ignore
|
|
275
284
|
**request_kwargs,
|
|
276
285
|
) # type: ignore
|
|
277
286
|
|
|
@@ -288,6 +297,7 @@ class Ollama(Model):
|
|
|
288
297
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
289
298
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
290
299
|
run_response: Optional[RunOutput] = None,
|
|
300
|
+
compress_tool_results: bool = False,
|
|
291
301
|
) -> Iterator[ModelResponse]:
|
|
292
302
|
"""
|
|
293
303
|
Sends a streaming chat request to the Ollama API.
|
|
@@ -299,7 +309,7 @@ class Ollama(Model):
|
|
|
299
309
|
|
|
300
310
|
for chunk in self.get_client().chat(
|
|
301
311
|
model=self.id,
|
|
302
|
-
messages=[self._format_message(m) for m in messages], # type: ignore
|
|
312
|
+
messages=[self._format_message(m, compress_tool_results) for m in messages], # type: ignore
|
|
303
313
|
stream=True,
|
|
304
314
|
**self.get_request_params(tools=tools),
|
|
305
315
|
):
|
|
@@ -315,6 +325,7 @@ class Ollama(Model):
|
|
|
315
325
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
316
326
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
317
327
|
run_response: Optional[RunOutput] = None,
|
|
328
|
+
compress_tool_results: bool = False,
|
|
318
329
|
) -> AsyncIterator[ModelResponse]:
|
|
319
330
|
"""
|
|
320
331
|
Sends an asynchronous streaming chat completion request to the Ollama API.
|
|
@@ -326,7 +337,7 @@ class Ollama(Model):
|
|
|
326
337
|
|
|
327
338
|
async for chunk in await self.get_async_client().chat(
|
|
328
339
|
model=self.id.strip(),
|
|
329
|
-
messages=[self._format_message(m) for m in messages], # type: ignore
|
|
340
|
+
messages=[self._format_message(m, compress_tool_results) for m in messages], # type: ignore
|
|
330
341
|
stream=True,
|
|
331
342
|
**self.get_request_params(tools=tools),
|
|
332
343
|
):
|