agno 2.3.7__py3-none-any.whl → 2.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. agno/agent/agent.py +391 -335
  2. agno/db/mongo/async_mongo.py +0 -24
  3. agno/db/mongo/mongo.py +0 -16
  4. agno/db/mysql/__init__.py +2 -1
  5. agno/db/mysql/async_mysql.py +2888 -0
  6. agno/db/mysql/mysql.py +17 -27
  7. agno/db/mysql/utils.py +139 -6
  8. agno/db/postgres/async_postgres.py +10 -26
  9. agno/db/postgres/postgres.py +7 -25
  10. agno/db/redis/redis.py +0 -4
  11. agno/db/schemas/evals.py +1 -0
  12. agno/db/singlestore/singlestore.py +5 -12
  13. agno/db/sqlite/async_sqlite.py +2 -26
  14. agno/db/sqlite/sqlite.py +0 -20
  15. agno/eval/__init__.py +10 -0
  16. agno/eval/agent_as_judge.py +860 -0
  17. agno/eval/base.py +29 -0
  18. agno/eval/utils.py +2 -1
  19. agno/exceptions.py +7 -0
  20. agno/knowledge/embedder/openai.py +8 -8
  21. agno/knowledge/knowledge.py +1142 -176
  22. agno/media.py +22 -6
  23. agno/models/aws/claude.py +8 -7
  24. agno/models/base.py +160 -11
  25. agno/models/deepseek/deepseek.py +67 -0
  26. agno/models/google/gemini.py +65 -11
  27. agno/models/google/utils.py +22 -0
  28. agno/models/message.py +2 -0
  29. agno/models/openai/chat.py +4 -0
  30. agno/models/openai/responses.py +3 -2
  31. agno/os/app.py +64 -74
  32. agno/os/interfaces/a2a/router.py +3 -4
  33. agno/os/interfaces/a2a/utils.py +1 -1
  34. agno/os/interfaces/agui/router.py +2 -0
  35. agno/os/middleware/jwt.py +8 -6
  36. agno/os/router.py +3 -1607
  37. agno/os/routers/agents/__init__.py +3 -0
  38. agno/os/routers/agents/router.py +581 -0
  39. agno/os/routers/agents/schema.py +261 -0
  40. agno/os/routers/evals/evals.py +26 -6
  41. agno/os/routers/evals/schemas.py +34 -2
  42. agno/os/routers/evals/utils.py +101 -20
  43. agno/os/routers/knowledge/knowledge.py +1 -1
  44. agno/os/routers/teams/__init__.py +3 -0
  45. agno/os/routers/teams/router.py +496 -0
  46. agno/os/routers/teams/schema.py +257 -0
  47. agno/os/routers/workflows/__init__.py +3 -0
  48. agno/os/routers/workflows/router.py +545 -0
  49. agno/os/routers/workflows/schema.py +75 -0
  50. agno/os/schema.py +1 -559
  51. agno/os/utils.py +139 -2
  52. agno/team/team.py +159 -100
  53. agno/tools/file_generation.py +12 -6
  54. agno/tools/firecrawl.py +15 -7
  55. agno/tools/workflow.py +8 -1
  56. agno/utils/hooks.py +64 -5
  57. agno/utils/http.py +2 -2
  58. agno/utils/media.py +11 -1
  59. agno/utils/print_response/agent.py +8 -0
  60. agno/utils/print_response/team.py +8 -0
  61. agno/vectordb/pgvector/pgvector.py +88 -51
  62. agno/workflow/parallel.py +11 -5
  63. agno/workflow/step.py +17 -5
  64. agno/workflow/types.py +38 -2
  65. agno/workflow/workflow.py +12 -4
  66. {agno-2.3.7.dist-info → agno-2.3.9.dist-info}/METADATA +8 -3
  67. {agno-2.3.7.dist-info → agno-2.3.9.dist-info}/RECORD +70 -58
  68. agno/tools/memori.py +0 -339
  69. {agno-2.3.7.dist-info → agno-2.3.9.dist-info}/WHEEL +0 -0
  70. {agno-2.3.7.dist-info → agno-2.3.9.dist-info}/licenses/LICENSE +0 -0
  71. {agno-2.3.7.dist-info → agno-2.3.9.dist-info}/top_level.txt +0 -0
agno/media.py CHANGED
@@ -4,6 +4,8 @@ from uuid import uuid4
4
4
 
5
5
  from pydantic import BaseModel, field_validator, model_validator
6
6
 
7
+ from agno.utils.log import log_error
8
+
7
9
 
8
10
  class Image(BaseModel):
9
11
  """Unified Image class for all use cases (input, output, artifacts)"""
@@ -395,10 +397,20 @@ class File(BaseModel):
395
397
  name: Optional[str] = None,
396
398
  format: Optional[str] = None,
397
399
  ) -> "File":
398
- """Create File from base64 encoded content"""
400
+ """Create File from base64 encoded content or plain text.
401
+
402
+ Handles both base64-encoded binary content and plain text content
403
+ (which is stored as UTF-8 strings for text/* MIME types).
404
+ """
399
405
  import base64
400
406
 
401
- content_bytes = base64.b64decode(base64_content)
407
+ try:
408
+ content_bytes = base64.b64decode(base64_content)
409
+ except Exception:
410
+ # If not valid base64, it might be plain text content (text/csv, text/plain, etc.)
411
+ # which is stored as UTF-8 strings, not base64
412
+ content_bytes = base64_content.encode("utf-8")
413
+
402
414
  return cls(
403
415
  content=content_bytes,
404
416
  id=id,
@@ -413,10 +425,14 @@ class File(BaseModel):
413
425
  import httpx
414
426
 
415
427
  if self.url:
416
- response = httpx.get(self.url)
417
- content = response.content
418
- mime_type = response.headers.get("Content-Type", "").split(";")[0]
419
- return content, mime_type
428
+ try:
429
+ response = httpx.get(self.url)
430
+ content = response.content
431
+ mime_type = response.headers.get("Content-Type", "").split(";")[0]
432
+ return content, mime_type
433
+ except Exception:
434
+ log_error(f"Failed to download file from {self.url}")
435
+ return None
420
436
  else:
421
437
  return None
422
438
 
agno/models/aws/claude.py CHANGED
@@ -7,7 +7,7 @@ from pydantic import BaseModel
7
7
 
8
8
  from agno.models.anthropic import Claude as AnthropicClaude
9
9
  from agno.utils.http import get_default_async_client, get_default_sync_client
10
- from agno.utils.log import log_debug, log_error, log_warning
10
+ from agno.utils.log import log_debug, log_warning
11
11
  from agno.utils.models.claude import format_tools_for_model
12
12
 
13
13
  try:
@@ -70,8 +70,8 @@ class Claude(AnthropicClaude):
70
70
  if self.aws_region:
71
71
  client_params["aws_region"] = self.aws_region
72
72
  else:
73
- self.aws_access_key = self.aws_access_key or getenv("AWS_ACCESS_KEY")
74
- self.aws_secret_key = self.aws_secret_key or getenv("AWS_SECRET_KEY")
73
+ self.aws_access_key = self.aws_access_key or getenv("AWS_ACCESS_KEY_ID") or getenv("AWS_ACCESS_KEY")
74
+ self.aws_secret_key = self.aws_secret_key or getenv("AWS_SECRET_ACCESS_KEY") or getenv("AWS_SECRET_KEY")
75
75
  self.aws_region = self.aws_region or getenv("AWS_REGION")
76
76
 
77
77
  client_params = {
@@ -79,10 +79,11 @@ class Claude(AnthropicClaude):
79
79
  "aws_access_key": self.aws_access_key,
80
80
  "aws_region": self.aws_region,
81
81
  }
82
- if not (self.aws_access_key or (self.aws_access_key and self.aws_secret_key)):
83
- log_error(
84
- "AWS credentials not found. Please either set the AWS_BEDROCK_API_KEY or AWS_ACCESS_KEY and AWS_SECRET_KEY environment variables."
85
- )
82
+
83
+ if not (self.api_key or (self.aws_access_key and self.aws_secret_key)):
84
+ log_warning(
85
+ "AWS credentials not found. Please set AWS_BEDROCK_API_KEY or AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables or provide a boto3 session."
86
+ )
86
87
 
87
88
  if self.timeout is not None:
88
89
  client_params["timeout"] = self.timeout
agno/models/base.py CHANGED
@@ -5,7 +5,7 @@ from abc import ABC, abstractmethod
5
5
  from dataclasses import dataclass, field
6
6
  from hashlib import md5
7
7
  from pathlib import Path
8
- from time import time
8
+ from time import sleep, time
9
9
  from types import AsyncGeneratorType, GeneratorType
10
10
  from typing import (
11
11
  Any,
@@ -24,7 +24,7 @@ from uuid import uuid4
24
24
 
25
25
  from pydantic import BaseModel
26
26
 
27
- from agno.exceptions import AgentRunException
27
+ from agno.exceptions import AgentRunException, ModelProviderError, RetryableModelProviderError
28
28
  from agno.media import Audio, File, Image, Video
29
29
  from agno.models.message import Citations, Message
30
30
  from agno.models.metrics import Metrics
@@ -146,15 +146,164 @@ class Model(ABC):
146
146
  cache_ttl: Optional[int] = None
147
147
  cache_dir: Optional[str] = None
148
148
 
149
+ # Retry configuration for model provider errors
150
+ # Number of retries to attempt when a ModelProviderError occurs
151
+ retries: int = 0
152
+ # Delay between retries (in seconds)
153
+ delay_between_retries: int = 1
154
+ # Exponential backoff: if True, the delay between retries is doubled each time
155
+ exponential_backoff: bool = False
156
+ # Enable retrying a model invocation once with a guidance message.
157
+ # This is useful for known errors avoidable with extra instructions.
158
+ retry_with_guidance: bool = True
159
+
149
160
  def __post_init__(self):
150
161
  if self.provider is None and self.name is not None:
151
162
  self.provider = f"{self.name} ({self.id})"
152
163
 
164
+ def _get_retry_delay(self, attempt: int) -> float:
165
+ """Calculate the delay before the next retry attempt."""
166
+ if self.exponential_backoff:
167
+ return self.delay_between_retries * (2**attempt)
168
+ return self.delay_between_retries
169
+
170
+ def _invoke_with_retry(self, **kwargs) -> ModelResponse:
171
+ """
172
+ Invoke the model with retry logic for ModelProviderError.
173
+
174
+ This method wraps the invoke() call and retries on ModelProviderError
175
+ with optional exponential backoff.
176
+ """
177
+ last_exception: Optional[ModelProviderError] = None
178
+
179
+ for attempt in range(self.retries + 1):
180
+ try:
181
+ return self.invoke(**kwargs)
182
+ except ModelProviderError as e:
183
+ last_exception = e
184
+ if attempt < self.retries:
185
+ delay = self._get_retry_delay(attempt)
186
+ log_warning(
187
+ f"Model provider error (attempt {attempt + 1}/{self.retries + 1}): {e}. Retrying in {delay}s..."
188
+ )
189
+ sleep(delay)
190
+ else:
191
+ log_error(f"Model provider error after {self.retries + 1} attempts: {e}")
192
+ except RetryableModelProviderError as e:
193
+ kwargs["messages"].append(Message(role="user", content=e.retry_guidance_message, temporary=True))
194
+ return self._invoke_with_retry(**kwargs, retrying_with_guidance=True)
195
+
196
+ # If we've exhausted all retries, raise the last exception
197
+ raise last_exception # type: ignore
198
+
199
+ async def _ainvoke_with_retry(self, **kwargs) -> ModelResponse:
200
+ """
201
+ Asynchronously invoke the model with retry logic for ModelProviderError.
202
+
203
+ This method wraps the ainvoke() call and retries on ModelProviderError
204
+ with optional exponential backoff.
205
+ """
206
+ last_exception: Optional[ModelProviderError] = None
207
+
208
+ for attempt in range(self.retries + 1):
209
+ try:
210
+ return await self.ainvoke(**kwargs)
211
+ except ModelProviderError as e:
212
+ last_exception = e
213
+ if attempt < self.retries:
214
+ delay = self._get_retry_delay(attempt)
215
+ log_warning(
216
+ f"Model provider error (attempt {attempt + 1}/{self.retries + 1}): {e}. Retrying in {delay}s..."
217
+ )
218
+ await asyncio.sleep(delay)
219
+ else:
220
+ log_error(f"Model provider error after {self.retries + 1} attempts: {e}")
221
+ except RetryableModelProviderError as e:
222
+ kwargs["messages"].append(Message(role="user", content=e.retry_guidance_message, temporary=True))
223
+ return await self._ainvoke_with_retry(**kwargs, retrying_with_guidance=True)
224
+
225
+ # If we've exhausted all retries, raise the last exception
226
+ raise last_exception # type: ignore
227
+
228
+ def _invoke_stream_with_retry(self, **kwargs) -> Iterator[ModelResponse]:
229
+ """
230
+ Invoke the model stream with retry logic for ModelProviderError.
231
+
232
+ This method wraps the invoke_stream() call and retries on ModelProviderError
233
+ with optional exponential backoff. Note that retries restart the entire stream.
234
+ """
235
+ last_exception: Optional[ModelProviderError] = None
236
+
237
+ for attempt in range(self.retries + 1):
238
+ try:
239
+ yield from self.invoke_stream(**kwargs)
240
+ return # Success, exit the retry loop
241
+ except ModelProviderError as e:
242
+ last_exception = e
243
+ if attempt < self.retries:
244
+ delay = self._get_retry_delay(attempt)
245
+ log_warning(
246
+ f"Model provider error during stream (attempt {attempt + 1}/{self.retries + 1}): {e}. "
247
+ f"Retrying in {delay}s..."
248
+ )
249
+ sleep(delay)
250
+ else:
251
+ log_error(f"Model provider error after {self.retries + 1} attempts: {e}")
252
+ except RetryableModelProviderError as e:
253
+ kwargs["messages"].append(Message(role="user", content=e.retry_guidance_message, temporary=True))
254
+ yield from self._invoke_stream_with_retry(**kwargs, retrying_with_guidance=True)
255
+ return # Success, exit after regeneration
256
+
257
+ # If we've exhausted all retries, raise the last exception
258
+ raise last_exception # type: ignore
259
+
260
+ async def _ainvoke_stream_with_retry(self, **kwargs) -> AsyncIterator[ModelResponse]:
261
+ """
262
+ Asynchronously invoke the model stream with retry logic for ModelProviderError.
263
+
264
+ This method wraps the ainvoke_stream() call and retries on ModelProviderError
265
+ with optional exponential backoff. Note that retries restart the entire stream.
266
+ """
267
+ last_exception: Optional[ModelProviderError] = None
268
+
269
+ for attempt in range(self.retries + 1):
270
+ try:
271
+ async for response in self.ainvoke_stream(**kwargs):
272
+ yield response
273
+ return # Success, exit the retry loop
274
+ except ModelProviderError as e:
275
+ last_exception = e
276
+ if attempt < self.retries:
277
+ delay = self._get_retry_delay(attempt)
278
+ log_warning(
279
+ f"Model provider error during stream (attempt {attempt + 1}/{self.retries + 1}): {e}. "
280
+ f"Retrying in {delay}s..."
281
+ )
282
+ await asyncio.sleep(delay)
283
+ else:
284
+ log_error(f"Model provider error after {self.retries + 1} attempts: {e}")
285
+ except RetryableModelProviderError as e:
286
+ kwargs["messages"].append(Message(role="user", content=e.retry_guidance_message, temporary=True))
287
+ async for response in self._ainvoke_stream_with_retry(**kwargs, retrying_with_guidance=True):
288
+ yield response
289
+ return # Success, exit after regeneration
290
+
291
+ # If we've exhausted all retries, raise the last exception
292
+ raise last_exception # type: ignore
293
+
153
294
  def to_dict(self) -> Dict[str, Any]:
154
295
  fields = {"name", "id", "provider"}
155
296
  _dict = {field: getattr(self, field) for field in fields if getattr(self, field) is not None}
156
297
  return _dict
157
298
 
299
+ def _remove_temporarys(self, messages: List[Message]) -> None:
300
+ """Remove temporal messages from the given list.
301
+
302
+ Args:
303
+ messages: The list of messages to filter (modified in place).
304
+ """
305
+ messages[:] = [m for m in messages if not m.temporary]
306
+
158
307
  def get_provider(self) -> str:
159
308
  return self.provider or self.name or self.__class__.__name__
160
309
 
@@ -734,8 +883,8 @@ class Model(ABC):
734
883
  Returns:
735
884
  Tuple[Message, bool]: (assistant_message, should_continue)
736
885
  """
737
- # Generate response
738
- provider_response = self.invoke(
886
+ # Generate response with retry logic for ModelProviderError
887
+ provider_response = self._invoke_with_retry(
739
888
  assistant_message=assistant_message,
740
889
  messages=messages,
741
890
  response_format=response_format,
@@ -791,8 +940,8 @@ class Model(ABC):
791
940
  Returns:
792
941
  Tuple[Message, bool]: (assistant_message, should_continue)
793
942
  """
794
- # Generate response
795
- provider_response = await self.ainvoke(
943
+ # Generate response with retry logic for ModelProviderError
944
+ provider_response = await self._ainvoke_with_retry(
796
945
  messages=messages,
797
946
  response_format=response_format,
798
947
  tools=tools,
@@ -913,10 +1062,10 @@ class Model(ABC):
913
1062
  compress_tool_results: bool = False,
914
1063
  ) -> Iterator[ModelResponse]:
915
1064
  """
916
- Process a streaming response from the model.
1065
+ Process a streaming response from the model with retry logic for ModelProviderError.
917
1066
  """
918
1067
 
919
- for response_delta in self.invoke_stream(
1068
+ for response_delta in self._invoke_stream_with_retry(
920
1069
  messages=messages,
921
1070
  assistant_message=assistant_message,
922
1071
  response_format=response_format,
@@ -1132,9 +1281,9 @@ class Model(ABC):
1132
1281
  compress_tool_results: bool = False,
1133
1282
  ) -> AsyncIterator[ModelResponse]:
1134
1283
  """
1135
- Process a streaming response from the model.
1284
+ Process a streaming response from the model with retry logic for ModelProviderError.
1136
1285
  """
1137
- async for response_delta in self.ainvoke_stream(
1286
+ async for response_delta in self._ainvoke_stream_with_retry(
1138
1287
  messages=messages,
1139
1288
  assistant_message=assistant_message,
1140
1289
  response_format=response_format,
@@ -1142,7 +1291,7 @@ class Model(ABC):
1142
1291
  tool_choice=tool_choice or self._tool_choice,
1143
1292
  run_response=run_response,
1144
1293
  compress_tool_results=compress_tool_results,
1145
- ): # type: ignore
1294
+ ):
1146
1295
  for model_response_delta in self._populate_stream_data(
1147
1296
  stream_data=stream_data,
1148
1297
  model_response_delta=response_delta,
@@ -3,7 +3,10 @@ from os import getenv
3
3
  from typing import Any, Dict, Optional
4
4
 
5
5
  from agno.exceptions import ModelAuthenticationError
6
+ from agno.models.message import Message
6
7
  from agno.models.openai.like import OpenAILike
8
+ from agno.utils.log import log_warning
9
+ from agno.utils.openai import _format_file_for_message, audio_to_message, images_to_message
7
10
 
8
11
 
9
12
  @dataclass
@@ -58,3 +61,67 @@ class DeepSeek(OpenAILike):
58
61
  if self.client_params:
59
62
  client_params.update(self.client_params)
60
63
  return client_params
64
+
65
+ def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
66
+ """
67
+ Format a message into the format expected by OpenAI.
68
+
69
+ Args:
70
+ message (Message): The message to format.
71
+ compress_tool_results: Whether to compress tool results.
72
+
73
+ Returns:
74
+ Dict[str, Any]: The formatted message.
75
+ """
76
+ tool_result = message.get_content(use_compressed_content=compress_tool_results)
77
+
78
+ message_dict: Dict[str, Any] = {
79
+ "role": self.role_map[message.role] if self.role_map else self.default_role_map[message.role],
80
+ "content": tool_result,
81
+ "name": message.name,
82
+ "tool_call_id": message.tool_call_id,
83
+ "tool_calls": message.tool_calls,
84
+ "reasoning_content": message.reasoning_content,
85
+ }
86
+ message_dict = {k: v for k, v in message_dict.items() if v is not None}
87
+
88
+ # Ignore non-string message content
89
+ # because we assume that the images/audio are already added to the message
90
+ if (message.images is not None and len(message.images) > 0) or (
91
+ message.audio is not None and len(message.audio) > 0
92
+ ):
93
+ # Ignore non-string message content
94
+ # because we assume that the images/audio are already added to the message
95
+ if isinstance(message.content, str):
96
+ message_dict["content"] = [{"type": "text", "text": message.content}]
97
+ if message.images is not None:
98
+ message_dict["content"].extend(images_to_message(images=message.images))
99
+
100
+ if message.audio is not None:
101
+ message_dict["content"].extend(audio_to_message(audio=message.audio))
102
+
103
+ if message.audio_output is not None:
104
+ message_dict["content"] = ""
105
+ message_dict["audio"] = {"id": message.audio_output.id}
106
+
107
+ if message.videos is not None and len(message.videos) > 0:
108
+ log_warning("Video input is currently unsupported.")
109
+
110
+ if message.files is not None:
111
+ # Ensure content is a list of parts
112
+ content = message_dict.get("content")
113
+ if isinstance(content, str): # wrap existing text
114
+ text = content
115
+ message_dict["content"] = [{"type": "text", "text": text}]
116
+ elif content is None:
117
+ message_dict["content"] = []
118
+ # Insert each file part before text parts
119
+ for file in message.files:
120
+ file_part = _format_file_for_message(file)
121
+ if file_part:
122
+ message_dict["content"].insert(0, file_part)
123
+
124
+ # Manually add the content field even if it is None
125
+ if message.content is None:
126
+ message_dict["content"] = ""
127
+ return message_dict
@@ -13,7 +13,8 @@ from pydantic import BaseModel
13
13
 
14
14
  from agno.exceptions import ModelProviderError
15
15
  from agno.media import Audio, File, Image, Video
16
- from agno.models.base import Model
16
+ from agno.models.base import Model, RetryableModelProviderError
17
+ from agno.models.google.utils import MALFORMED_FUNCTION_CALL_GUIDANCE, GeminiFinishReason
17
18
  from agno.models.message import Citations, Message, UrlCitation
18
19
  from agno.models.metrics import Metrics
19
20
  from agno.models.response import ModelResponse
@@ -317,6 +318,7 @@ class Gemini(Model):
317
318
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
318
319
  run_response: Optional[RunOutput] = None,
319
320
  compress_tool_results: bool = False,
321
+ retrying_with_guidance: bool = False,
320
322
  ) -> ModelResponse:
321
323
  """
322
324
  Invokes the model with a list of messages and returns the response.
@@ -337,7 +339,13 @@ class Gemini(Model):
337
339
  )
338
340
  assistant_message.metrics.stop_timer()
339
341
 
340
- model_response = self._parse_provider_response(provider_response, response_format=response_format)
342
+ model_response = self._parse_provider_response(
343
+ provider_response, response_format=response_format, retrying_with_guidance=retrying_with_guidance
344
+ )
345
+
346
+ # If we were retrying the invoke with guidance, remove the guidance message
347
+ if retrying_with_guidance is True:
348
+ self._remove_temporarys(messages)
341
349
 
342
350
  return model_response
343
351
 
@@ -350,6 +358,8 @@ class Gemini(Model):
350
358
  model_name=self.name,
351
359
  model_id=self.id,
352
360
  ) from e
361
+ except RetryableModelProviderError:
362
+ raise
353
363
  except Exception as e:
354
364
  log_error(f"Unknown error from Gemini API: {e}")
355
365
  raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
@@ -363,6 +373,7 @@ class Gemini(Model):
363
373
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
364
374
  run_response: Optional[RunOutput] = None,
365
375
  compress_tool_results: bool = False,
376
+ retrying_with_guidance: bool = False,
366
377
  ) -> Iterator[ModelResponse]:
367
378
  """
368
379
  Invokes the model with a list of messages and returns the response as a stream.
@@ -382,7 +393,11 @@ class Gemini(Model):
382
393
  contents=formatted_messages,
383
394
  **request_kwargs,
384
395
  ):
385
- yield self._parse_provider_response_delta(response)
396
+ yield self._parse_provider_response_delta(response, retrying_with_guidance=retrying_with_guidance)
397
+
398
+ # If we were retrying the invoke with guidance, remove the guidance message
399
+ if retrying_with_guidance is True:
400
+ self._remove_temporarys(messages)
386
401
 
387
402
  assistant_message.metrics.stop_timer()
388
403
 
@@ -394,6 +409,8 @@ class Gemini(Model):
394
409
  model_name=self.name,
395
410
  model_id=self.id,
396
411
  ) from e
412
+ except RetryableModelProviderError:
413
+ raise
397
414
  except Exception as e:
398
415
  log_error(f"Unknown error from Gemini API: {e}")
399
416
  raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
@@ -407,6 +424,7 @@ class Gemini(Model):
407
424
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
408
425
  run_response: Optional[RunOutput] = None,
409
426
  compress_tool_results: bool = False,
427
+ retrying_with_guidance: bool = False,
410
428
  ) -> ModelResponse:
411
429
  """
412
430
  Invokes the model with a list of messages and returns the response.
@@ -429,7 +447,13 @@ class Gemini(Model):
429
447
  )
430
448
  assistant_message.metrics.stop_timer()
431
449
 
432
- model_response = self._parse_provider_response(provider_response, response_format=response_format)
450
+ model_response = self._parse_provider_response(
451
+ provider_response, response_format=response_format, retrying_with_guidance=retrying_with_guidance
452
+ )
453
+
454
+ # If we were retrying the invoke with guidance, remove the guidance message
455
+ if retrying_with_guidance is True:
456
+ self._remove_temporarys(messages)
433
457
 
434
458
  return model_response
435
459
 
@@ -441,6 +465,8 @@ class Gemini(Model):
441
465
  model_name=self.name,
442
466
  model_id=self.id,
443
467
  ) from e
468
+ except RetryableModelProviderError:
469
+ raise
444
470
  except Exception as e:
445
471
  log_error(f"Unknown error from Gemini API: {e}")
446
472
  raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
@@ -454,6 +480,7 @@ class Gemini(Model):
454
480
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
455
481
  run_response: Optional[RunOutput] = None,
456
482
  compress_tool_results: bool = False,
483
+ retrying_with_guidance: bool = False,
457
484
  ) -> AsyncIterator[ModelResponse]:
458
485
  """
459
486
  Invokes the model with a list of messages and returns the response as a stream.
@@ -476,7 +503,11 @@ class Gemini(Model):
476
503
  **request_kwargs,
477
504
  )
478
505
  async for chunk in async_stream:
479
- yield self._parse_provider_response_delta(chunk)
506
+ yield self._parse_provider_response_delta(chunk, retrying_with_guidance=retrying_with_guidance)
507
+
508
+ # If we were retrying the invoke with guidance, remove the guidance message
509
+ if retrying_with_guidance is True:
510
+ self._remove_temporarys(messages)
480
511
 
481
512
  assistant_message.metrics.stop_timer()
482
513
 
@@ -488,6 +519,8 @@ class Gemini(Model):
488
519
  model_name=self.name,
489
520
  model_id=self.id,
490
521
  ) from e
522
+ except RetryableModelProviderError:
523
+ raise
491
524
  except Exception as e:
492
525
  log_error(f"Unknown error from Gemini API: {e}")
493
526
  raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
@@ -863,10 +896,10 @@ class Gemini(Model):
863
896
 
864
897
  def _parse_provider_response(self, response: GenerateContentResponse, **kwargs) -> ModelResponse:
865
898
  """
866
- Parse the OpenAI response into a ModelResponse.
899
+ Parse the Gemini response into a ModelResponse.
867
900
 
868
901
  Args:
869
- response: Raw response from OpenAI
902
+ response: Raw response from Gemini
870
903
 
871
904
  Returns:
872
905
  ModelResponse: Parsed response data
@@ -875,8 +908,20 @@ class Gemini(Model):
875
908
 
876
909
  # Get response message
877
910
  response_message = Content(role="model", parts=[])
878
- if response.candidates and response.candidates[0].content:
879
- response_message = response.candidates[0].content
911
+ if response.candidates and len(response.candidates) > 0:
912
+ candidate = response.candidates[0]
913
+
914
+ # Raise if the request failed because of a malformed function call
915
+ if hasattr(candidate, "finish_reason") and candidate.finish_reason:
916
+ if candidate.finish_reason == GeminiFinishReason.MALFORMED_FUNCTION_CALL.value:
917
+ # We only want to raise errors that trigger regeneration attempts once
918
+ if kwargs.get("retrying_with_guidance") is True:
919
+ pass
920
+ if self.retry_with_guidance:
921
+ raise RetryableModelProviderError(retry_guidance_message=MALFORMED_FUNCTION_CALL_GUIDANCE)
922
+
923
+ if candidate.content:
924
+ response_message = candidate.content
880
925
 
881
926
  # Add role
882
927
  if response_message.role is not None:
@@ -1019,11 +1064,20 @@ class Gemini(Model):
1019
1064
 
1020
1065
  return model_response
1021
1066
 
1022
- def _parse_provider_response_delta(self, response_delta: GenerateContentResponse) -> ModelResponse:
1067
+ def _parse_provider_response_delta(self, response_delta: GenerateContentResponse, **kwargs) -> ModelResponse:
1023
1068
  model_response = ModelResponse()
1024
1069
 
1025
1070
  if response_delta.candidates and len(response_delta.candidates) > 0:
1026
- candidate_content = response_delta.candidates[0].content
1071
+ candidate = response_delta.candidates[0]
1072
+ candidate_content = candidate.content
1073
+
1074
+ # Raise if the request failed because of a malformed function call
1075
+ if hasattr(candidate, "finish_reason") and candidate.finish_reason:
1076
+ if candidate.finish_reason == GeminiFinishReason.MALFORMED_FUNCTION_CALL.value:
1077
+ if kwargs.get("retrying_with_guidance") is True:
1078
+ pass
1079
+ raise RetryableModelProviderError(retry_guidance_message=MALFORMED_FUNCTION_CALL_GUIDANCE)
1080
+
1027
1081
  response_message: Content = Content(role="model", parts=[])
1028
1082
  if candidate_content is not None:
1029
1083
  response_message = candidate_content
@@ -0,0 +1,22 @@
1
+ from enum import Enum
2
+
3
+
4
+ class GeminiFinishReason(Enum):
5
+ """Gemini API finish reasons"""
6
+
7
+ STOP = "STOP"
8
+ MAX_TOKENS = "MAX_TOKENS"
9
+ SAFETY = "SAFETY"
10
+ RECITATION = "RECITATION"
11
+ MALFORMED_FUNCTION_CALL = "MALFORMED_FUNCTION_CALL"
12
+ OTHER = "OTHER"
13
+
14
+
15
+ # Guidance message used to retry a Gemini invocation after a MALFORMED_FUNCTION_CALL error
16
+ MALFORMED_FUNCTION_CALL_GUIDANCE = """The previous function call was malformed. Please try again with a valid function call.
17
+
18
+ Guidelines:
19
+ - Generate the function call JSON directly, do not generate code
20
+ - Use the function name exactly as defined (no namespace prefixes like 'default_api.')
21
+ - Ensure all required parameters are provided with correct types
22
+ """
agno/models/message.py CHANGED
@@ -112,6 +112,8 @@ class Message(BaseModel):
112
112
  references: Optional[MessageReferences] = None
113
113
  # The Unix timestamp the message was created.
114
114
  created_at: int = Field(default_factory=lambda: int(time()))
115
+ # When True, the message will be sent to the Model but not persisted afterwards.
116
+ temporary: bool = False
115
117
 
116
118
  model_config = ConfigDict(extra="allow", populate_by_name=True, arbitrary_types_allowed=True)
117
119
 
@@ -814,6 +814,8 @@ class OpenAIChat(Model):
814
814
 
815
815
  if hasattr(response_message, "reasoning_content") and response_message.reasoning_content is not None: # type: ignore
816
816
  model_response.reasoning_content = response_message.reasoning_content # type: ignore
817
+ elif hasattr(response_message, "reasoning") and response_message.reasoning is not None: # type: ignore
818
+ model_response.reasoning_content = response_message.reasoning # type: ignore
817
819
 
818
820
  if response.usage is not None:
819
821
  model_response.response_usage = self._get_metrics(response.usage)
@@ -846,6 +848,8 @@ class OpenAIChat(Model):
846
848
 
847
849
  if hasattr(choice_delta, "reasoning_content") and choice_delta.reasoning_content is not None:
848
850
  model_response.reasoning_content = choice_delta.reasoning_content
851
+ elif hasattr(choice_delta, "reasoning") and choice_delta.reasoning is not None:
852
+ model_response.reasoning_content = choice_delta.reasoning
849
853
 
850
854
  # Add audio if present
851
855
  if hasattr(choice_delta, "audio") and choice_delta.audio is not None:
@@ -307,6 +307,8 @@ class OpenAIResponses(Model):
307
307
 
308
308
  def _upload_file(self, file: File) -> Optional[str]:
309
309
  """Upload a file to the OpenAI vector database."""
310
+ from pathlib import Path
311
+ from urllib.parse import urlparse
310
312
 
311
313
  if file.url is not None:
312
314
  file_content_tuple = file.file_url_content
@@ -314,13 +316,12 @@ class OpenAIResponses(Model):
314
316
  file_content = file_content_tuple[0]
315
317
  else:
316
318
  return None
317
- file_name = file.url.split("/")[-1]
319
+ file_name = Path(urlparse(file.url).path).name or "file"
318
320
  file_tuple = (file_name, file_content)
319
321
  result = self.get_client().files.create(file=file_tuple, purpose="assistants")
320
322
  return result.id
321
323
  elif file.filepath is not None:
322
324
  import mimetypes
323
- from pathlib import Path
324
325
 
325
326
  file_path = file.filepath if isinstance(file.filepath, Path) else Path(file.filepath)
326
327
  if file_path.exists() and file_path.is_file():