agno 2.3.2__py3-none-any.whl → 2.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +513 -185
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +176 -0
- agno/db/dynamo/dynamo.py +11 -0
- agno/db/firestore/firestore.py +5 -1
- agno/db/gcs_json/gcs_json_db.py +5 -2
- agno/db/in_memory/in_memory_db.py +5 -2
- agno/db/json/json_db.py +5 -1
- agno/db/migrations/manager.py +4 -4
- agno/db/mongo/async_mongo.py +158 -34
- agno/db/mongo/mongo.py +6 -2
- agno/db/mysql/mysql.py +48 -54
- agno/db/postgres/async_postgres.py +66 -52
- agno/db/postgres/postgres.py +42 -50
- agno/db/redis/redis.py +5 -0
- agno/db/redis/utils.py +5 -5
- agno/db/singlestore/singlestore.py +99 -108
- agno/db/sqlite/async_sqlite.py +29 -27
- agno/db/sqlite/sqlite.py +30 -26
- agno/knowledge/reader/pdf_reader.py +2 -2
- agno/knowledge/reader/tavily_reader.py +0 -1
- agno/memory/__init__.py +14 -1
- agno/memory/manager.py +217 -4
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +67 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/aimlapi.py +18 -0
- agno/models/anthropic/claude.py +87 -81
- agno/models/aws/bedrock.py +38 -16
- agno/models/aws/claude.py +97 -277
- agno/models/azure/ai_foundry.py +8 -4
- agno/models/base.py +101 -14
- agno/models/cerebras/cerebras.py +25 -9
- agno/models/cerebras/cerebras_openai.py +22 -2
- agno/models/cohere/chat.py +18 -6
- agno/models/cometapi/cometapi.py +19 -1
- agno/models/deepinfra/deepinfra.py +19 -1
- agno/models/fireworks/fireworks.py +19 -1
- agno/models/google/gemini.py +583 -21
- agno/models/groq/groq.py +23 -6
- agno/models/huggingface/huggingface.py +22 -7
- agno/models/ibm/watsonx.py +21 -7
- agno/models/internlm/internlm.py +19 -1
- agno/models/langdb/langdb.py +10 -0
- agno/models/litellm/chat.py +17 -7
- agno/models/litellm/litellm_openai.py +19 -1
- agno/models/message.py +19 -5
- agno/models/meta/llama.py +25 -5
- agno/models/meta/llama_openai.py +18 -0
- agno/models/mistral/mistral.py +13 -5
- agno/models/nvidia/nvidia.py +19 -1
- agno/models/ollama/chat.py +17 -6
- agno/models/openai/chat.py +22 -7
- agno/models/openai/responses.py +28 -10
- agno/models/openrouter/openrouter.py +20 -0
- agno/models/perplexity/perplexity.py +17 -0
- agno/models/requesty/requesty.py +18 -0
- agno/models/sambanova/sambanova.py +19 -1
- agno/models/siliconflow/siliconflow.py +19 -1
- agno/models/together/together.py +19 -1
- agno/models/vercel/v0.py +19 -1
- agno/models/vertexai/claude.py +99 -5
- agno/models/xai/xai.py +18 -0
- agno/os/interfaces/agui/router.py +1 -0
- agno/os/interfaces/agui/utils.py +97 -57
- agno/os/router.py +16 -0
- agno/os/routers/memory/memory.py +143 -0
- agno/os/routers/memory/schemas.py +26 -0
- agno/os/schema.py +33 -6
- agno/os/utils.py +134 -10
- agno/run/base.py +2 -1
- agno/run/workflow.py +1 -1
- agno/team/team.py +566 -219
- agno/tools/mcp/mcp.py +1 -1
- agno/utils/agent.py +119 -1
- agno/utils/models/ai_foundry.py +9 -2
- agno/utils/models/claude.py +12 -5
- agno/utils/models/cohere.py +9 -2
- agno/utils/models/llama.py +9 -2
- agno/utils/models/mistral.py +4 -2
- agno/utils/print_response/agent.py +37 -2
- agno/utils/print_response/team.py +52 -0
- agno/utils/tokens.py +41 -0
- agno/workflow/types.py +2 -2
- {agno-2.3.2.dist-info → agno-2.3.4.dist-info}/METADATA +45 -40
- {agno-2.3.2.dist-info → agno-2.3.4.dist-info}/RECORD +90 -83
- {agno-2.3.2.dist-info → agno-2.3.4.dist-info}/WHEEL +0 -0
- {agno-2.3.2.dist-info → agno-2.3.4.dist-info}/licenses/LICENSE +0 -0
- {agno-2.3.2.dist-info → agno-2.3.4.dist-info}/top_level.txt +0 -0
agno/models/groq/groq.py
CHANGED
|
@@ -74,7 +74,11 @@ class Groq(Model):
|
|
|
74
74
|
if not self.api_key:
|
|
75
75
|
self.api_key = getenv("GROQ_API_KEY")
|
|
76
76
|
if not self.api_key:
|
|
77
|
-
|
|
77
|
+
raise ModelProviderError(
|
|
78
|
+
message="GROQ_API_KEY not set. Please set the GROQ_API_KEY environment variable.",
|
|
79
|
+
model_name=self.name,
|
|
80
|
+
model_id=self.id,
|
|
81
|
+
)
|
|
78
82
|
|
|
79
83
|
# Define base client params
|
|
80
84
|
base_params = {
|
|
@@ -221,19 +225,28 @@ class Groq(Model):
|
|
|
221
225
|
self,
|
|
222
226
|
message: Message,
|
|
223
227
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
228
|
+
compress_tool_results: bool = False,
|
|
224
229
|
) -> Dict[str, Any]:
|
|
225
230
|
"""
|
|
226
231
|
Format a message into the format expected by Groq.
|
|
227
232
|
|
|
228
233
|
Args:
|
|
229
234
|
message (Message): The message to format.
|
|
235
|
+
response_format: Optional response format specification.
|
|
236
|
+
compress_tool_results: Whether to compress tool results.
|
|
230
237
|
|
|
231
238
|
Returns:
|
|
232
239
|
Dict[str, Any]: The formatted message.
|
|
233
240
|
"""
|
|
241
|
+
# Use compressed content for tool messages if compression is active
|
|
242
|
+
if message.role == "tool":
|
|
243
|
+
content = message.get_content(use_compressed_content=compress_tool_results)
|
|
244
|
+
else:
|
|
245
|
+
content = message.content
|
|
246
|
+
|
|
234
247
|
message_dict: Dict[str, Any] = {
|
|
235
248
|
"role": message.role,
|
|
236
|
-
"content":
|
|
249
|
+
"content": content,
|
|
237
250
|
"name": message.name,
|
|
238
251
|
"tool_call_id": message.tool_call_id,
|
|
239
252
|
"tool_calls": message.tool_calls,
|
|
@@ -276,6 +289,7 @@ class Groq(Model):
|
|
|
276
289
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
277
290
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
278
291
|
run_response: Optional[RunOutput] = None,
|
|
292
|
+
compress_tool_results: bool = False,
|
|
279
293
|
) -> ModelResponse:
|
|
280
294
|
"""
|
|
281
295
|
Send a chat completion request to the Groq API.
|
|
@@ -287,7 +301,7 @@ class Groq(Model):
|
|
|
287
301
|
assistant_message.metrics.start_timer()
|
|
288
302
|
provider_response = self.get_client().chat.completions.create(
|
|
289
303
|
model=self.id,
|
|
290
|
-
messages=[self.format_message(m) for m in messages], # type: ignore
|
|
304
|
+
messages=[self.format_message(m, response_format, compress_tool_results) for m in messages], # type: ignore
|
|
291
305
|
**self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
|
|
292
306
|
)
|
|
293
307
|
assistant_message.metrics.stop_timer()
|
|
@@ -316,6 +330,7 @@ class Groq(Model):
|
|
|
316
330
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
317
331
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
318
332
|
run_response: Optional[RunOutput] = None,
|
|
333
|
+
compress_tool_results: bool = False,
|
|
319
334
|
) -> ModelResponse:
|
|
320
335
|
"""
|
|
321
336
|
Sends an asynchronous chat completion request to the Groq API.
|
|
@@ -327,7 +342,7 @@ class Groq(Model):
|
|
|
327
342
|
assistant_message.metrics.start_timer()
|
|
328
343
|
response = await self.get_async_client().chat.completions.create(
|
|
329
344
|
model=self.id,
|
|
330
|
-
messages=[self.format_message(m) for m in messages], # type: ignore
|
|
345
|
+
messages=[self.format_message(m, response_format, compress_tool_results) for m in messages], # type: ignore
|
|
331
346
|
**self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
|
|
332
347
|
)
|
|
333
348
|
assistant_message.metrics.stop_timer()
|
|
@@ -356,6 +371,7 @@ class Groq(Model):
|
|
|
356
371
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
357
372
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
358
373
|
run_response: Optional[RunOutput] = None,
|
|
374
|
+
compress_tool_results: bool = False,
|
|
359
375
|
) -> Iterator[ModelResponse]:
|
|
360
376
|
"""
|
|
361
377
|
Send a streaming chat completion request to the Groq API.
|
|
@@ -368,7 +384,7 @@ class Groq(Model):
|
|
|
368
384
|
|
|
369
385
|
for chunk in self.get_client().chat.completions.create(
|
|
370
386
|
model=self.id,
|
|
371
|
-
messages=[self.format_message(m) for m in messages], # type: ignore
|
|
387
|
+
messages=[self.format_message(m, response_format, compress_tool_results) for m in messages], # type: ignore
|
|
372
388
|
stream=True,
|
|
373
389
|
**self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
|
|
374
390
|
):
|
|
@@ -396,6 +412,7 @@ class Groq(Model):
|
|
|
396
412
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
397
413
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
398
414
|
run_response: Optional[RunOutput] = None,
|
|
415
|
+
compress_tool_results: bool = False,
|
|
399
416
|
) -> AsyncIterator[ModelResponse]:
|
|
400
417
|
"""
|
|
401
418
|
Sends an asynchronous streaming chat completion request to the Groq API.
|
|
@@ -409,7 +426,7 @@ class Groq(Model):
|
|
|
409
426
|
|
|
410
427
|
async_stream = await self.get_async_client().chat.completions.create(
|
|
411
428
|
model=self.id,
|
|
412
|
-
messages=[self.format_message(m) for m in messages], # type: ignore
|
|
429
|
+
messages=[self.format_message(m, response_format, compress_tool_results) for m in messages], # type: ignore
|
|
413
430
|
stream=True,
|
|
414
431
|
**self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
|
|
415
432
|
)
|
|
@@ -73,7 +73,11 @@ class HuggingFace(Model):
|
|
|
73
73
|
def get_client_params(self) -> Dict[str, Any]:
|
|
74
74
|
self.api_key = self.api_key or getenv("HF_TOKEN")
|
|
75
75
|
if not self.api_key:
|
|
76
|
-
|
|
76
|
+
raise ModelProviderError(
|
|
77
|
+
message="HF_TOKEN not set. Please set the HF_TOKEN environment variable.",
|
|
78
|
+
model_name=self.name,
|
|
79
|
+
model_id=self.id,
|
|
80
|
+
)
|
|
77
81
|
|
|
78
82
|
_client_params: Dict[str, Any] = {}
|
|
79
83
|
if self.api_key is not None:
|
|
@@ -191,19 +195,26 @@ class HuggingFace(Model):
|
|
|
191
195
|
cleaned_dict = {k: v for k, v in _dict.items() if v is not None}
|
|
192
196
|
return cleaned_dict
|
|
193
197
|
|
|
194
|
-
def _format_message(self, message: Message) -> Dict[str, Any]:
|
|
198
|
+
def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
|
|
195
199
|
"""
|
|
196
200
|
Format a message into the format expected by HuggingFace.
|
|
197
201
|
|
|
198
202
|
Args:
|
|
199
203
|
message (Message): The message to format.
|
|
204
|
+
compress_tool_results: Whether to compress tool results.
|
|
200
205
|
|
|
201
206
|
Returns:
|
|
202
207
|
Dict[str, Any]: The formatted message.
|
|
203
208
|
"""
|
|
209
|
+
# Use compressed content for tool messages if compression is active
|
|
210
|
+
if message.role == "tool":
|
|
211
|
+
content = message.get_content(use_compressed_content=compress_tool_results)
|
|
212
|
+
else:
|
|
213
|
+
content = message.content if message.content is not None else ""
|
|
214
|
+
|
|
204
215
|
message_dict: Dict[str, Any] = {
|
|
205
216
|
"role": message.role,
|
|
206
|
-
"content":
|
|
217
|
+
"content": content,
|
|
207
218
|
"name": message.name or message.tool_name,
|
|
208
219
|
"tool_call_id": message.tool_call_id,
|
|
209
220
|
"tool_calls": message.tool_calls,
|
|
@@ -236,6 +247,7 @@ class HuggingFace(Model):
|
|
|
236
247
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
237
248
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
238
249
|
run_response: Optional[RunOutput] = None,
|
|
250
|
+
compress_tool_results: bool = False,
|
|
239
251
|
) -> ModelResponse:
|
|
240
252
|
"""
|
|
241
253
|
Send a chat completion request to the HuggingFace Hub.
|
|
@@ -247,7 +259,7 @@ class HuggingFace(Model):
|
|
|
247
259
|
assistant_message.metrics.start_timer()
|
|
248
260
|
provider_response = self.get_client().chat.completions.create(
|
|
249
261
|
model=self.id,
|
|
250
|
-
messages=[self._format_message(m) for m in messages],
|
|
262
|
+
messages=[self._format_message(m, compress_tool_results) for m in messages],
|
|
251
263
|
**self.get_request_params(tools=tools, tool_choice=tool_choice),
|
|
252
264
|
)
|
|
253
265
|
assistant_message.metrics.stop_timer()
|
|
@@ -269,6 +281,7 @@ class HuggingFace(Model):
|
|
|
269
281
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
270
282
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
271
283
|
run_response: Optional[RunOutput] = None,
|
|
284
|
+
compress_tool_results: bool = False,
|
|
272
285
|
) -> ModelResponse:
|
|
273
286
|
"""
|
|
274
287
|
Sends an asynchronous chat completion request to the HuggingFace Hub Inference.
|
|
@@ -280,7 +293,7 @@ class HuggingFace(Model):
|
|
|
280
293
|
assistant_message.metrics.start_timer()
|
|
281
294
|
provider_response = await self.get_async_client().chat.completions.create(
|
|
282
295
|
model=self.id,
|
|
283
|
-
messages=[self._format_message(m) for m in messages],
|
|
296
|
+
messages=[self._format_message(m, compress_tool_results) for m in messages],
|
|
284
297
|
**self.get_request_params(tools=tools, tool_choice=tool_choice),
|
|
285
298
|
)
|
|
286
299
|
assistant_message.metrics.stop_timer()
|
|
@@ -302,6 +315,7 @@ class HuggingFace(Model):
|
|
|
302
315
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
303
316
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
304
317
|
run_response: Optional[RunOutput] = None,
|
|
318
|
+
compress_tool_results: bool = False,
|
|
305
319
|
) -> Iterator[ModelResponse]:
|
|
306
320
|
"""
|
|
307
321
|
Send a streaming chat completion request to the HuggingFace API.
|
|
@@ -314,7 +328,7 @@ class HuggingFace(Model):
|
|
|
314
328
|
|
|
315
329
|
stream = self.get_client().chat.completions.create(
|
|
316
330
|
model=self.id,
|
|
317
|
-
messages=[self._format_message(m) for m in messages],
|
|
331
|
+
messages=[self._format_message(m, compress_tool_results) for m in messages],
|
|
318
332
|
stream=True,
|
|
319
333
|
stream_options=ChatCompletionInputStreamOptions(include_usage=True), # type: ignore
|
|
320
334
|
**self.get_request_params(tools=tools, tool_choice=tool_choice),
|
|
@@ -340,6 +354,7 @@ class HuggingFace(Model):
|
|
|
340
354
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
341
355
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
342
356
|
run_response: Optional[RunOutput] = None,
|
|
357
|
+
compress_tool_results: bool = False,
|
|
343
358
|
) -> AsyncIterator[Any]:
|
|
344
359
|
"""
|
|
345
360
|
Sends an asynchronous streaming chat completion request to the HuggingFace API.
|
|
@@ -351,7 +366,7 @@ class HuggingFace(Model):
|
|
|
351
366
|
assistant_message.metrics.start_timer()
|
|
352
367
|
provider_response = await self.get_async_client().chat.completions.create(
|
|
353
368
|
model=self.id,
|
|
354
|
-
messages=[self._format_message(m) for m in messages],
|
|
369
|
+
messages=[self._format_message(m, compress_tool_results) for m in messages],
|
|
355
370
|
stream=True,
|
|
356
371
|
stream_options=ChatCompletionInputStreamOptions(include_usage=True), # type: ignore
|
|
357
372
|
**self.get_request_params(tools=tools, tool_choice=tool_choice),
|
agno/models/ibm/watsonx.py
CHANGED
|
@@ -59,7 +59,11 @@ class WatsonX(Model):
|
|
|
59
59
|
# Fetch API key and project ID from env if not already set
|
|
60
60
|
self.api_key = self.api_key or getenv("IBM_WATSONX_API_KEY")
|
|
61
61
|
if not self.api_key:
|
|
62
|
-
|
|
62
|
+
raise ModelProviderError(
|
|
63
|
+
message="IBM_WATSONX_API_KEY not set. Please set the IBM_WATSONX_API_KEY environment variable.",
|
|
64
|
+
model_name=self.name,
|
|
65
|
+
model_id=self.id,
|
|
66
|
+
)
|
|
63
67
|
|
|
64
68
|
self.project_id = self.project_id or getenv("IBM_WATSONX_PROJECT_ID")
|
|
65
69
|
if not self.project_id:
|
|
@@ -129,12 +133,13 @@ class WatsonX(Model):
|
|
|
129
133
|
log_debug(f"Calling {self.provider} with request parameters: {request_params}", log_level=2)
|
|
130
134
|
return request_params
|
|
131
135
|
|
|
132
|
-
def _format_message(self, message: Message) -> Dict[str, Any]:
|
|
136
|
+
def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
|
|
133
137
|
"""
|
|
134
138
|
Format a message into the format expected by WatsonX.
|
|
135
139
|
|
|
136
140
|
Args:
|
|
137
141
|
message (Message): The message to format.
|
|
142
|
+
compress_tool_results: Whether to compress tool results.
|
|
138
143
|
|
|
139
144
|
Returns:
|
|
140
145
|
Dict[str, Any]: The formatted message.
|
|
@@ -151,7 +156,12 @@ class WatsonX(Model):
|
|
|
151
156
|
if message.videos is not None and len(message.videos) > 0:
|
|
152
157
|
log_warning("Video input is currently unsupported.")
|
|
153
158
|
|
|
154
|
-
|
|
159
|
+
message_dict = message.to_dict()
|
|
160
|
+
|
|
161
|
+
# Use compressed content for tool messages if compression is active
|
|
162
|
+
if message.role == "tool" and compress_tool_results:
|
|
163
|
+
message_dict["content"] = message.get_content(use_compressed_content=True)
|
|
164
|
+
return message_dict
|
|
155
165
|
|
|
156
166
|
def invoke(
|
|
157
167
|
self,
|
|
@@ -161,6 +171,7 @@ class WatsonX(Model):
|
|
|
161
171
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
162
172
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
163
173
|
run_response: Optional[RunOutput] = None,
|
|
174
|
+
compress_tool_results: bool = False,
|
|
164
175
|
) -> ModelResponse:
|
|
165
176
|
"""
|
|
166
177
|
Send a chat completion request to the WatsonX API.
|
|
@@ -171,7 +182,7 @@ class WatsonX(Model):
|
|
|
171
182
|
|
|
172
183
|
client = self.get_client()
|
|
173
184
|
|
|
174
|
-
formatted_messages = [self._format_message(m) for m in messages]
|
|
185
|
+
formatted_messages = [self._format_message(m, compress_tool_results) for m in messages]
|
|
175
186
|
request_params = self.get_request_params(
|
|
176
187
|
response_format=response_format, tools=tools, tool_choice=tool_choice
|
|
177
188
|
)
|
|
@@ -196,6 +207,7 @@ class WatsonX(Model):
|
|
|
196
207
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
197
208
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
198
209
|
run_response: Optional[RunOutput] = None,
|
|
210
|
+
compress_tool_results: bool = False,
|
|
199
211
|
) -> Any:
|
|
200
212
|
"""
|
|
201
213
|
Sends an asynchronous chat completion request to the WatsonX API.
|
|
@@ -205,7 +217,7 @@ class WatsonX(Model):
|
|
|
205
217
|
run_response.metrics.set_time_to_first_token()
|
|
206
218
|
|
|
207
219
|
client = self.get_client()
|
|
208
|
-
formatted_messages = [self._format_message(m) for m in messages]
|
|
220
|
+
formatted_messages = [self._format_message(m, compress_tool_results) for m in messages]
|
|
209
221
|
|
|
210
222
|
request_params = self.get_request_params(
|
|
211
223
|
response_format=response_format, tools=tools, tool_choice=tool_choice
|
|
@@ -231,13 +243,14 @@ class WatsonX(Model):
|
|
|
231
243
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
232
244
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
233
245
|
run_response: Optional[RunOutput] = None,
|
|
246
|
+
compress_tool_results: bool = False,
|
|
234
247
|
) -> Iterator[ModelResponse]:
|
|
235
248
|
"""
|
|
236
249
|
Send a streaming chat completion request to the WatsonX API.
|
|
237
250
|
"""
|
|
238
251
|
try:
|
|
239
252
|
client = self.get_client()
|
|
240
|
-
formatted_messages = [self._format_message(m) for m in messages]
|
|
253
|
+
formatted_messages = [self._format_message(m, compress_tool_results) for m in messages]
|
|
241
254
|
|
|
242
255
|
request_params = self.get_request_params(
|
|
243
256
|
response_format=response_format, tools=tools, tool_choice=tool_choice
|
|
@@ -265,6 +278,7 @@ class WatsonX(Model):
|
|
|
265
278
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
266
279
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
267
280
|
run_response: Optional[RunOutput] = None,
|
|
281
|
+
compress_tool_results: bool = False,
|
|
268
282
|
) -> AsyncIterator[ModelResponse]:
|
|
269
283
|
"""
|
|
270
284
|
Sends an asynchronous streaming chat completion request to the WatsonX API.
|
|
@@ -274,7 +288,7 @@ class WatsonX(Model):
|
|
|
274
288
|
run_response.metrics.set_time_to_first_token()
|
|
275
289
|
|
|
276
290
|
client = self.get_client()
|
|
277
|
-
formatted_messages = [self._format_message(m) for m in messages]
|
|
291
|
+
formatted_messages = [self._format_message(m, compress_tool_results) for m in messages]
|
|
278
292
|
|
|
279
293
|
# Get parameters for chat
|
|
280
294
|
request_params = self.get_request_params(
|
agno/models/internlm/internlm.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
from dataclasses import dataclass, field
|
|
2
2
|
from os import getenv
|
|
3
|
-
from typing import Optional
|
|
3
|
+
from typing import Any, Dict, Optional
|
|
4
4
|
|
|
5
|
+
from agno.exceptions import ModelProviderError
|
|
5
6
|
from agno.models.openai.like import OpenAILike
|
|
6
7
|
|
|
7
8
|
|
|
@@ -24,3 +25,20 @@ class InternLM(OpenAILike):
|
|
|
24
25
|
|
|
25
26
|
api_key: Optional[str] = field(default_factory=lambda: getenv("INTERNLM_API_KEY"))
|
|
26
27
|
base_url: Optional[str] = "https://internlm-chat.intern-ai.org.cn/puyu/api/v1/chat/completions"
|
|
28
|
+
|
|
29
|
+
def _get_client_params(self) -> Dict[str, Any]:
|
|
30
|
+
"""
|
|
31
|
+
Returns client parameters for API requests, checking for INTERNLM_API_KEY.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Dict[str, Any]: A dictionary of client parameters for API requests.
|
|
35
|
+
"""
|
|
36
|
+
if not self.api_key:
|
|
37
|
+
self.api_key = getenv("INTERNLM_API_KEY")
|
|
38
|
+
if not self.api_key:
|
|
39
|
+
raise ModelProviderError(
|
|
40
|
+
message="INTERNLM_API_KEY not set. Please set the INTERNLM_API_KEY environment variable.",
|
|
41
|
+
model_name=self.name,
|
|
42
|
+
model_id=self.id,
|
|
43
|
+
)
|
|
44
|
+
return super()._get_client_params()
|
agno/models/langdb/langdb.py
CHANGED
|
@@ -2,6 +2,7 @@ from dataclasses import dataclass, field
|
|
|
2
2
|
from os import getenv
|
|
3
3
|
from typing import Any, Dict, Optional
|
|
4
4
|
|
|
5
|
+
from agno.exceptions import ModelProviderError
|
|
5
6
|
from agno.models.openai.like import OpenAILike
|
|
6
7
|
|
|
7
8
|
|
|
@@ -32,6 +33,15 @@ class LangDB(OpenAILike):
|
|
|
32
33
|
default_headers: Optional[dict] = None
|
|
33
34
|
|
|
34
35
|
def _get_client_params(self) -> Dict[str, Any]:
|
|
36
|
+
if not self.api_key:
|
|
37
|
+
self.api_key = getenv("LANGDB_API_KEY")
|
|
38
|
+
if not self.api_key:
|
|
39
|
+
raise ModelProviderError(
|
|
40
|
+
message="LANGDB_API_KEY not set. Please set the LANGDB_API_KEY environment variable.",
|
|
41
|
+
model_name=self.name,
|
|
42
|
+
model_id=self.id,
|
|
43
|
+
)
|
|
44
|
+
|
|
35
45
|
if not self.project_id:
|
|
36
46
|
raise ValueError("LANGDB_PROJECT_ID not set in the environment")
|
|
37
47
|
|
agno/models/litellm/chat.py
CHANGED
|
@@ -74,11 +74,17 @@ class LiteLLM(Model):
|
|
|
74
74
|
self.client = litellm
|
|
75
75
|
return self.client
|
|
76
76
|
|
|
77
|
-
def _format_messages(self, messages: List[Message]) -> List[Dict[str, Any]]:
|
|
77
|
+
def _format_messages(self, messages: List[Message], compress_tool_results: bool = False) -> List[Dict[str, Any]]:
|
|
78
78
|
"""Format messages for LiteLLM API."""
|
|
79
79
|
formatted_messages = []
|
|
80
80
|
for m in messages:
|
|
81
|
-
|
|
81
|
+
# Use compressed content for tool messages if compression is active
|
|
82
|
+
if m.role == "tool":
|
|
83
|
+
content = m.get_content(use_compressed_content=compress_tool_results)
|
|
84
|
+
else:
|
|
85
|
+
content = m.content if m.content is not None else ""
|
|
86
|
+
|
|
87
|
+
msg = {"role": m.role, "content": content}
|
|
82
88
|
|
|
83
89
|
# Handle media
|
|
84
90
|
if (m.images is not None and len(m.images) > 0) or (m.audio is not None and len(m.audio) > 0):
|
|
@@ -98,7 +104,7 @@ class LiteLLM(Model):
|
|
|
98
104
|
if isinstance(msg["content"], str):
|
|
99
105
|
content_list = [{"type": "text", "text": msg["content"]}]
|
|
100
106
|
else:
|
|
101
|
-
content_list = msg["content"]
|
|
107
|
+
content_list = msg["content"] if isinstance(msg["content"], list) else []
|
|
102
108
|
for file in m.files:
|
|
103
109
|
file_part = _format_file_for_message(file)
|
|
104
110
|
if file_part:
|
|
@@ -186,10 +192,11 @@ class LiteLLM(Model):
|
|
|
186
192
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
187
193
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
188
194
|
run_response: Optional[RunOutput] = None,
|
|
195
|
+
compress_tool_results: bool = False,
|
|
189
196
|
) -> ModelResponse:
|
|
190
197
|
"""Sends a chat completion request to the LiteLLM API."""
|
|
191
198
|
completion_kwargs = self.get_request_params(tools=tools)
|
|
192
|
-
completion_kwargs["messages"] = self._format_messages(messages)
|
|
199
|
+
completion_kwargs["messages"] = self._format_messages(messages, compress_tool_results)
|
|
193
200
|
|
|
194
201
|
if run_response and run_response.metrics:
|
|
195
202
|
run_response.metrics.set_time_to_first_token()
|
|
@@ -211,10 +218,11 @@ class LiteLLM(Model):
|
|
|
211
218
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
212
219
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
213
220
|
run_response: Optional[RunOutput] = None,
|
|
221
|
+
compress_tool_results: bool = False,
|
|
214
222
|
) -> Iterator[ModelResponse]:
|
|
215
223
|
"""Sends a streaming chat completion request to the LiteLLM API."""
|
|
216
224
|
completion_kwargs = self.get_request_params(tools=tools)
|
|
217
|
-
completion_kwargs["messages"] = self._format_messages(messages)
|
|
225
|
+
completion_kwargs["messages"] = self._format_messages(messages, compress_tool_results)
|
|
218
226
|
completion_kwargs["stream"] = True
|
|
219
227
|
completion_kwargs["stream_options"] = {"include_usage": True}
|
|
220
228
|
|
|
@@ -236,10 +244,11 @@ class LiteLLM(Model):
|
|
|
236
244
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
237
245
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
238
246
|
run_response: Optional[RunOutput] = None,
|
|
247
|
+
compress_tool_results: bool = False,
|
|
239
248
|
) -> ModelResponse:
|
|
240
249
|
"""Sends an asynchronous chat completion request to the LiteLLM API."""
|
|
241
250
|
completion_kwargs = self.get_request_params(tools=tools)
|
|
242
|
-
completion_kwargs["messages"] = self._format_messages(messages)
|
|
251
|
+
completion_kwargs["messages"] = self._format_messages(messages, compress_tool_results)
|
|
243
252
|
|
|
244
253
|
if run_response and run_response.metrics:
|
|
245
254
|
run_response.metrics.set_time_to_first_token()
|
|
@@ -261,10 +270,11 @@ class LiteLLM(Model):
|
|
|
261
270
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
262
271
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
263
272
|
run_response: Optional[RunOutput] = None,
|
|
273
|
+
compress_tool_results: bool = False,
|
|
264
274
|
) -> AsyncIterator[ModelResponse]:
|
|
265
275
|
"""Sends an asynchronous streaming chat request to the LiteLLM API."""
|
|
266
276
|
completion_kwargs = self.get_request_params(tools=tools)
|
|
267
|
-
completion_kwargs["messages"] = self._format_messages(messages)
|
|
277
|
+
completion_kwargs["messages"] = self._format_messages(messages, compress_tool_results)
|
|
268
278
|
completion_kwargs["stream"] = True
|
|
269
279
|
completion_kwargs["stream_options"] = {"include_usage": True}
|
|
270
280
|
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
from dataclasses import dataclass, field
|
|
2
2
|
from os import getenv
|
|
3
|
-
from typing import Optional
|
|
3
|
+
from typing import Any, Dict, Optional
|
|
4
4
|
|
|
5
|
+
from agno.exceptions import ModelProviderError
|
|
5
6
|
from agno.models.openai.like import OpenAILike
|
|
6
7
|
|
|
7
8
|
|
|
@@ -23,3 +24,20 @@ class LiteLLMOpenAI(OpenAILike):
|
|
|
23
24
|
|
|
24
25
|
api_key: Optional[str] = field(default_factory=lambda: getenv("LITELLM_API_KEY"))
|
|
25
26
|
base_url: str = "http://0.0.0.0:4000"
|
|
27
|
+
|
|
28
|
+
def _get_client_params(self) -> Dict[str, Any]:
|
|
29
|
+
"""
|
|
30
|
+
Returns client parameters for API requests, checking for LITELLM_API_KEY.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
Dict[str, Any]: A dictionary of client parameters for API requests.
|
|
34
|
+
"""
|
|
35
|
+
if not self.api_key:
|
|
36
|
+
self.api_key = getenv("LITELLM_API_KEY")
|
|
37
|
+
if not self.api_key:
|
|
38
|
+
raise ModelProviderError(
|
|
39
|
+
message="LITELLM_API_KEY not set. Please set the LITELLM_API_KEY environment variable.",
|
|
40
|
+
model_name=self.name,
|
|
41
|
+
model_id=self.id,
|
|
42
|
+
)
|
|
43
|
+
return super()._get_client_params()
|
agno/models/message.py
CHANGED
|
@@ -59,6 +59,9 @@ class Message(BaseModel):
|
|
|
59
59
|
role: str
|
|
60
60
|
# The contents of the message.
|
|
61
61
|
content: Optional[Union[List[Any], str]] = None
|
|
62
|
+
# Compressed content of the message
|
|
63
|
+
compressed_content: Optional[str] = None
|
|
64
|
+
|
|
62
65
|
# An optional name for the participant.
|
|
63
66
|
# Provides the model information to differentiate between participants of the same role.
|
|
64
67
|
name: Optional[str] = None
|
|
@@ -123,6 +126,12 @@ class Message(BaseModel):
|
|
|
123
126
|
return json.dumps(self.content)
|
|
124
127
|
return ""
|
|
125
128
|
|
|
129
|
+
def get_content(self, use_compressed_content: bool = False) -> Optional[Union[List[Any], str]]:
|
|
130
|
+
"""Return tool result content to send to API"""
|
|
131
|
+
if use_compressed_content and self.compressed_content is not None:
|
|
132
|
+
return self.compressed_content
|
|
133
|
+
return self.content
|
|
134
|
+
|
|
126
135
|
@classmethod
|
|
127
136
|
def from_dict(cls, data: Dict[str, Any]) -> "Message":
|
|
128
137
|
# Handle image reconstruction properly
|
|
@@ -266,6 +275,7 @@ class Message(BaseModel):
|
|
|
266
275
|
"content": self.content,
|
|
267
276
|
"reasoning_content": self.reasoning_content,
|
|
268
277
|
"from_history": self.from_history,
|
|
278
|
+
"compressed_content": self.compressed_content,
|
|
269
279
|
"stop_after_tool_call": self.stop_after_tool_call,
|
|
270
280
|
"role": self.role,
|
|
271
281
|
"name": self.name,
|
|
@@ -315,13 +325,14 @@ class Message(BaseModel):
|
|
|
315
325
|
"created_at": self.created_at,
|
|
316
326
|
}
|
|
317
327
|
|
|
318
|
-
def log(self, metrics: bool = True, level: Optional[str] = None):
|
|
328
|
+
def log(self, metrics: bool = True, level: Optional[str] = None, use_compressed_content: bool = False):
|
|
319
329
|
"""Log the message to the console
|
|
320
330
|
|
|
321
331
|
Args:
|
|
322
332
|
metrics (bool): Whether to log the metrics.
|
|
323
333
|
level (str): The level to log the message at. One of debug, info, warning, or error.
|
|
324
334
|
Defaults to debug.
|
|
335
|
+
use_compressed_content (bool): Whether to use compressed content.
|
|
325
336
|
"""
|
|
326
337
|
_logger = log_debug
|
|
327
338
|
if level == "info":
|
|
@@ -348,10 +359,13 @@ class Message(BaseModel):
|
|
|
348
359
|
if self.reasoning_content:
|
|
349
360
|
_logger(f"<reasoning>\n{self.reasoning_content}\n</reasoning>")
|
|
350
361
|
if self.content:
|
|
351
|
-
if
|
|
352
|
-
_logger(self.
|
|
353
|
-
|
|
354
|
-
|
|
362
|
+
if use_compressed_content and self.compressed_content:
|
|
363
|
+
_logger("Compressed content:\n" + self.compressed_content)
|
|
364
|
+
else:
|
|
365
|
+
if isinstance(self.content, str) or isinstance(self.content, list):
|
|
366
|
+
_logger(self.content)
|
|
367
|
+
elif isinstance(self.content, dict):
|
|
368
|
+
_logger(json.dumps(self.content, indent=2))
|
|
355
369
|
if self.tool_calls:
|
|
356
370
|
tool_calls_list = ["Tool Calls:"]
|
|
357
371
|
for tool_call in self.tool_calls:
|
agno/models/meta/llama.py
CHANGED
|
@@ -74,7 +74,11 @@ class Llama(Model):
|
|
|
74
74
|
if not self.api_key:
|
|
75
75
|
self.api_key = getenv("LLAMA_API_KEY")
|
|
76
76
|
if not self.api_key:
|
|
77
|
-
|
|
77
|
+
raise ModelProviderError(
|
|
78
|
+
message="LLAMA_API_KEY not set. Please set the LLAMA_API_KEY environment variable.",
|
|
79
|
+
model_name=self.name,
|
|
80
|
+
model_id=self.id,
|
|
81
|
+
)
|
|
78
82
|
|
|
79
83
|
# Define base client params
|
|
80
84
|
base_params = {
|
|
@@ -217,6 +221,7 @@ class Llama(Model):
|
|
|
217
221
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
218
222
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
219
223
|
run_response: Optional[RunOutput] = None,
|
|
224
|
+
compress_tool_results: bool = False,
|
|
220
225
|
) -> ModelResponse:
|
|
221
226
|
"""
|
|
222
227
|
Send a chat completion request to the Llama API.
|
|
@@ -225,7 +230,10 @@ class Llama(Model):
|
|
|
225
230
|
|
|
226
231
|
provider_response = self.get_client().chat.completions.create(
|
|
227
232
|
model=self.id,
|
|
228
|
-
messages=[
|
|
233
|
+
messages=[
|
|
234
|
+
format_message(m, tool_calls=bool(tools), compress_tool_results=compress_tool_results) # type: ignore
|
|
235
|
+
for m in messages
|
|
236
|
+
],
|
|
229
237
|
**self.get_request_params(tools=tools, response_format=response_format),
|
|
230
238
|
)
|
|
231
239
|
|
|
@@ -242,6 +250,7 @@ class Llama(Model):
|
|
|
242
250
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
243
251
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
244
252
|
run_response: Optional[RunOutput] = None,
|
|
253
|
+
compress_tool_results: bool = False,
|
|
245
254
|
) -> ModelResponse:
|
|
246
255
|
"""
|
|
247
256
|
Sends an asynchronous chat completion request to the Llama API.
|
|
@@ -253,7 +262,10 @@ class Llama(Model):
|
|
|
253
262
|
|
|
254
263
|
provider_response = await self.get_async_client().chat.completions.create(
|
|
255
264
|
model=self.id,
|
|
256
|
-
messages=[
|
|
265
|
+
messages=[
|
|
266
|
+
format_message(m, tool_calls=bool(tools), compress_tool_results=compress_tool_results) # type: ignore
|
|
267
|
+
for m in messages
|
|
268
|
+
],
|
|
257
269
|
**self.get_request_params(tools=tools, response_format=response_format),
|
|
258
270
|
)
|
|
259
271
|
|
|
@@ -270,6 +282,7 @@ class Llama(Model):
|
|
|
270
282
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
271
283
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
272
284
|
run_response: Optional[RunOutput] = None,
|
|
285
|
+
compress_tool_results: bool = False,
|
|
273
286
|
) -> Iterator[ModelResponse]:
|
|
274
287
|
"""
|
|
275
288
|
Send a streaming chat completion request to the Llama API.
|
|
@@ -282,7 +295,10 @@ class Llama(Model):
|
|
|
282
295
|
|
|
283
296
|
for chunk in self.get_client().chat.completions.create(
|
|
284
297
|
model=self.id,
|
|
285
|
-
messages=[
|
|
298
|
+
messages=[
|
|
299
|
+
format_message(m, tool_calls=bool(tools), compress_tool_results=compress_tool_results) # type: ignore
|
|
300
|
+
for m in messages
|
|
301
|
+
],
|
|
286
302
|
stream=True,
|
|
287
303
|
**self.get_request_params(tools=tools, response_format=response_format),
|
|
288
304
|
):
|
|
@@ -302,6 +318,7 @@ class Llama(Model):
|
|
|
302
318
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
303
319
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
304
320
|
run_response: Optional[RunOutput] = None,
|
|
321
|
+
compress_tool_results: bool = False,
|
|
305
322
|
) -> AsyncIterator[ModelResponse]:
|
|
306
323
|
"""
|
|
307
324
|
Sends an asynchronous streaming chat completion request to the Llama API.
|
|
@@ -314,7 +331,10 @@ class Llama(Model):
|
|
|
314
331
|
try:
|
|
315
332
|
async for chunk in await self.get_async_client().chat.completions.create(
|
|
316
333
|
model=self.id,
|
|
317
|
-
messages=[
|
|
334
|
+
messages=[
|
|
335
|
+
format_message(m, tool_calls=bool(tools), compress_tool_results=compress_tool_results) # type: ignore
|
|
336
|
+
for m in messages
|
|
337
|
+
],
|
|
318
338
|
stream=True,
|
|
319
339
|
**self.get_request_params(tools=tools, response_format=response_format),
|
|
320
340
|
):
|