agno 2.3.2__py3-none-any.whl → 2.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. agno/agent/agent.py +513 -185
  2. agno/compression/__init__.py +3 -0
  3. agno/compression/manager.py +176 -0
  4. agno/db/dynamo/dynamo.py +11 -0
  5. agno/db/firestore/firestore.py +5 -1
  6. agno/db/gcs_json/gcs_json_db.py +5 -2
  7. agno/db/in_memory/in_memory_db.py +5 -2
  8. agno/db/json/json_db.py +5 -1
  9. agno/db/migrations/manager.py +4 -4
  10. agno/db/mongo/async_mongo.py +158 -34
  11. agno/db/mongo/mongo.py +6 -2
  12. agno/db/mysql/mysql.py +48 -54
  13. agno/db/postgres/async_postgres.py +66 -52
  14. agno/db/postgres/postgres.py +42 -50
  15. agno/db/redis/redis.py +5 -0
  16. agno/db/redis/utils.py +5 -5
  17. agno/db/singlestore/singlestore.py +99 -108
  18. agno/db/sqlite/async_sqlite.py +29 -27
  19. agno/db/sqlite/sqlite.py +30 -26
  20. agno/knowledge/reader/pdf_reader.py +2 -2
  21. agno/knowledge/reader/tavily_reader.py +0 -1
  22. agno/memory/__init__.py +14 -1
  23. agno/memory/manager.py +217 -4
  24. agno/memory/strategies/__init__.py +15 -0
  25. agno/memory/strategies/base.py +67 -0
  26. agno/memory/strategies/summarize.py +196 -0
  27. agno/memory/strategies/types.py +37 -0
  28. agno/models/aimlapi/aimlapi.py +18 -0
  29. agno/models/anthropic/claude.py +87 -81
  30. agno/models/aws/bedrock.py +38 -16
  31. agno/models/aws/claude.py +97 -277
  32. agno/models/azure/ai_foundry.py +8 -4
  33. agno/models/base.py +101 -14
  34. agno/models/cerebras/cerebras.py +25 -9
  35. agno/models/cerebras/cerebras_openai.py +22 -2
  36. agno/models/cohere/chat.py +18 -6
  37. agno/models/cometapi/cometapi.py +19 -1
  38. agno/models/deepinfra/deepinfra.py +19 -1
  39. agno/models/fireworks/fireworks.py +19 -1
  40. agno/models/google/gemini.py +583 -21
  41. agno/models/groq/groq.py +23 -6
  42. agno/models/huggingface/huggingface.py +22 -7
  43. agno/models/ibm/watsonx.py +21 -7
  44. agno/models/internlm/internlm.py +19 -1
  45. agno/models/langdb/langdb.py +10 -0
  46. agno/models/litellm/chat.py +17 -7
  47. agno/models/litellm/litellm_openai.py +19 -1
  48. agno/models/message.py +19 -5
  49. agno/models/meta/llama.py +25 -5
  50. agno/models/meta/llama_openai.py +18 -0
  51. agno/models/mistral/mistral.py +13 -5
  52. agno/models/nvidia/nvidia.py +19 -1
  53. agno/models/ollama/chat.py +17 -6
  54. agno/models/openai/chat.py +22 -7
  55. agno/models/openai/responses.py +28 -10
  56. agno/models/openrouter/openrouter.py +20 -0
  57. agno/models/perplexity/perplexity.py +17 -0
  58. agno/models/requesty/requesty.py +18 -0
  59. agno/models/sambanova/sambanova.py +19 -1
  60. agno/models/siliconflow/siliconflow.py +19 -1
  61. agno/models/together/together.py +19 -1
  62. agno/models/vercel/v0.py +19 -1
  63. agno/models/vertexai/claude.py +99 -5
  64. agno/models/xai/xai.py +18 -0
  65. agno/os/interfaces/agui/router.py +1 -0
  66. agno/os/interfaces/agui/utils.py +97 -57
  67. agno/os/router.py +16 -0
  68. agno/os/routers/memory/memory.py +143 -0
  69. agno/os/routers/memory/schemas.py +26 -0
  70. agno/os/schema.py +33 -6
  71. agno/os/utils.py +134 -10
  72. agno/run/base.py +2 -1
  73. agno/run/workflow.py +1 -1
  74. agno/team/team.py +566 -219
  75. agno/tools/mcp/mcp.py +1 -1
  76. agno/utils/agent.py +119 -1
  77. agno/utils/models/ai_foundry.py +9 -2
  78. agno/utils/models/claude.py +12 -5
  79. agno/utils/models/cohere.py +9 -2
  80. agno/utils/models/llama.py +9 -2
  81. agno/utils/models/mistral.py +4 -2
  82. agno/utils/print_response/agent.py +37 -2
  83. agno/utils/print_response/team.py +52 -0
  84. agno/utils/tokens.py +41 -0
  85. agno/workflow/types.py +2 -2
  86. {agno-2.3.2.dist-info → agno-2.3.4.dist-info}/METADATA +45 -40
  87. {agno-2.3.2.dist-info → agno-2.3.4.dist-info}/RECORD +90 -83
  88. {agno-2.3.2.dist-info → agno-2.3.4.dist-info}/WHEEL +0 -0
  89. {agno-2.3.2.dist-info → agno-2.3.4.dist-info}/licenses/LICENSE +0 -0
  90. {agno-2.3.2.dist-info → agno-2.3.4.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,8 @@
1
1
  from dataclasses import dataclass, field
2
2
  from os import getenv
3
- from typing import Optional
3
+ from typing import Any, Dict, Optional
4
4
 
5
+ from agno.exceptions import ModelProviderError
5
6
  from agno.models.openai.like import OpenAILike
6
7
 
7
8
 
@@ -23,3 +24,20 @@ class Together(OpenAILike):
23
24
  provider: str = "Together"
24
25
  api_key: Optional[str] = field(default_factory=lambda: getenv("TOGETHER_API_KEY"))
25
26
  base_url: str = "https://api.together.xyz/v1"
27
+
28
+ def _get_client_params(self) -> Dict[str, Any]:
29
+ """
30
+ Returns client parameters for API requests, checking for TOGETHER_API_KEY.
31
+
32
+ Returns:
33
+ Dict[str, Any]: A dictionary of client parameters for API requests.
34
+ """
35
+ if not self.api_key:
36
+ self.api_key = getenv("TOGETHER_API_KEY")
37
+ if not self.api_key:
38
+ raise ModelProviderError(
39
+ message="TOGETHER_API_KEY not set. Please set the TOGETHER_API_KEY environment variable.",
40
+ model_name=self.name,
41
+ model_id=self.id,
42
+ )
43
+ return super()._get_client_params()
agno/models/vercel/v0.py CHANGED
@@ -1,7 +1,8 @@
1
1
  from dataclasses import dataclass, field
2
2
  from os import getenv
3
- from typing import Optional
3
+ from typing import Any, Dict, Optional
4
4
 
5
+ from agno.exceptions import ModelProviderError
5
6
  from agno.models.openai.like import OpenAILike
6
7
 
7
8
 
@@ -24,3 +25,20 @@ class V0(OpenAILike):
24
25
 
25
26
  api_key: Optional[str] = field(default_factory=lambda: getenv("V0_API_KEY"))
26
27
  base_url: str = "https://api.v0.dev/v1/"
28
+
29
+ def _get_client_params(self) -> Dict[str, Any]:
30
+ """
31
+ Returns client parameters for API requests, checking for V0_API_KEY.
32
+
33
+ Returns:
34
+ Dict[str, Any]: A dictionary of client parameters for API requests.
35
+ """
36
+ if not self.api_key:
37
+ self.api_key = getenv("V0_API_KEY")
38
+ if not self.api_key:
39
+ raise ModelProviderError(
40
+ message="V0_API_KEY not set. Please set the V0_API_KEY environment variable.",
41
+ model_name=self.name,
42
+ model_id=self.id,
43
+ )
44
+ return super()._get_client_params()
@@ -1,12 +1,14 @@
1
1
  from dataclasses import dataclass
2
2
  from os import getenv
3
- from typing import Any, Dict, Optional
3
+ from typing import Any, Dict, List, Optional, Type, Union
4
4
 
5
5
  import httpx
6
+ from pydantic import BaseModel
6
7
 
7
8
  from agno.models.anthropic import Claude as AnthropicClaude
8
9
  from agno.utils.http import get_default_async_client, get_default_sync_client
9
- from agno.utils.log import log_warning
10
+ from agno.utils.log import log_debug, log_warning
11
+ from agno.utils.models.claude import format_tools_for_model
10
12
 
11
13
  try:
12
14
  from anthropic import AnthropicVertex, AsyncAnthropicVertex
@@ -26,14 +28,23 @@ class Claude(AnthropicClaude):
26
28
  name: str = "Claude"
27
29
  provider: str = "VertexAI"
28
30
 
29
- client: Optional[AnthropicVertex] = None # type: ignore
30
- async_client: Optional[AsyncAnthropicVertex] = None # type: ignore
31
-
32
31
  # Client parameters
33
32
  region: Optional[str] = None
34
33
  project_id: Optional[str] = None
35
34
  base_url: Optional[str] = None
36
35
 
36
+ client: Optional[AnthropicVertex] = None # type: ignore
37
+ async_client: Optional[AsyncAnthropicVertex] = None # type: ignore
38
+
39
+ def __post_init__(self):
40
+ """Validate model configuration after initialization"""
41
+ # Validate thinking support immediately at model creation
42
+ if self.thinking:
43
+ self._validate_thinking_support()
44
+ # Overwrite output schema support for VertexAI Claude
45
+ self.supports_native_structured_outputs = False
46
+ self.supports_json_schema_outputs = False
47
+
37
48
  def _get_client_params(self) -> Dict[str, Any]:
38
49
  client_params: Dict[str, Any] = {}
39
50
 
@@ -94,3 +105,86 @@ class Claude(AnthropicClaude):
94
105
  _client_params["http_client"] = get_default_async_client()
95
106
  self.async_client = AsyncAnthropicVertex(**_client_params)
96
107
  return self.async_client
108
+
109
+ def get_request_params(
110
+ self,
111
+ response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
112
+ tools: Optional[List[Dict[str, Any]]] = None,
113
+ ) -> Dict[str, Any]:
114
+ """
115
+ Generate keyword arguments for API requests.
116
+
117
+ Returns:
118
+ Dict[str, Any]: The keyword arguments for API requests.
119
+ """
120
+ # Validate thinking support if thinking is enabled
121
+ if self.thinking:
122
+ self._validate_thinking_support()
123
+
124
+ _request_params: Dict[str, Any] = {}
125
+ if self.max_tokens:
126
+ _request_params["max_tokens"] = self.max_tokens
127
+ if self.thinking:
128
+ _request_params["thinking"] = self.thinking
129
+ if self.temperature:
130
+ _request_params["temperature"] = self.temperature
131
+ if self.stop_sequences:
132
+ _request_params["stop_sequences"] = self.stop_sequences
133
+ if self.top_p:
134
+ _request_params["top_p"] = self.top_p
135
+ if self.top_k:
136
+ _request_params["top_k"] = self.top_k
137
+ if self.timeout:
138
+ _request_params["timeout"] = self.timeout
139
+
140
+ # Build betas list - include existing betas and add new one if needed
141
+ betas_list = list(self.betas) if self.betas else []
142
+
143
+ # Include betas if any are present
144
+ if betas_list:
145
+ _request_params["betas"] = betas_list
146
+
147
+ if self.request_params:
148
+ _request_params.update(self.request_params)
149
+
150
+ if _request_params:
151
+ log_debug(f"Calling {self.provider} with request parameters: {_request_params}", log_level=2)
152
+ return _request_params
153
+
154
+ def _prepare_request_kwargs(
155
+ self,
156
+ system_message: str,
157
+ tools: Optional[List[Dict[str, Any]]] = None,
158
+ response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
159
+ ) -> Dict[str, Any]:
160
+ """
161
+ Prepare the request keyword arguments for the API call.
162
+
163
+ Args:
164
+ system_message (str): The concatenated system messages.
165
+ tools: Optional list of tools
166
+ response_format: Optional response format (Pydantic model or dict)
167
+
168
+ Returns:
169
+ Dict[str, Any]: The request keyword arguments.
170
+ """
171
+ # Pass response_format and tools to get_request_params for beta header handling
172
+ request_kwargs = self.get_request_params(response_format=response_format, tools=tools).copy()
173
+ if system_message:
174
+ if self.cache_system_prompt:
175
+ cache_control = (
176
+ {"type": "ephemeral", "ttl": "1h"}
177
+ if self.extended_cache_time is not None and self.extended_cache_time is True
178
+ else {"type": "ephemeral"}
179
+ )
180
+ request_kwargs["system"] = [{"text": system_message, "type": "text", "cache_control": cache_control}]
181
+ else:
182
+ request_kwargs["system"] = [{"text": system_message, "type": "text"}]
183
+
184
+ # Format tools (this will handle strict mode)
185
+ if tools:
186
+ request_kwargs["tools"] = format_tools_for_model(tools)
187
+
188
+ if request_kwargs:
189
+ log_debug(f"Calling {self.provider} with request parameters: {request_kwargs}", log_level=2)
190
+ return request_kwargs
agno/models/xai/xai.py CHANGED
@@ -4,6 +4,7 @@ from typing import Any, Dict, List, Optional, Type, Union
4
4
 
5
5
  from pydantic import BaseModel
6
6
 
7
+ from agno.exceptions import ModelProviderError
7
8
  from agno.models.message import Citations, UrlCitation
8
9
  from agno.models.openai.like import OpenAILike
9
10
  from agno.models.response import ModelResponse
@@ -39,6 +40,23 @@ class xAI(OpenAILike):
39
40
 
40
41
  search_parameters: Optional[Dict[str, Any]] = None
41
42
 
43
+ def _get_client_params(self) -> Dict[str, Any]:
44
+ """
45
+ Returns client parameters for API requests, checking for XAI_API_KEY.
46
+
47
+ Returns:
48
+ Dict[str, Any]: A dictionary of client parameters for API requests.
49
+ """
50
+ if not self.api_key:
51
+ self.api_key = getenv("XAI_API_KEY")
52
+ if not self.api_key:
53
+ raise ModelProviderError(
54
+ message="XAI_API_KEY not set. Please set the XAI_API_KEY environment variable.",
55
+ model_name=self.name,
56
+ model_id=self.id,
57
+ )
58
+ return super()._get_client_params()
59
+
42
60
  def get_request_params(
43
61
  self,
44
62
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
@@ -33,6 +33,7 @@ async def run_agent(agent: Agent, run_input: RunAgentInput) -> AsyncIterator[Bas
33
33
  try:
34
34
  # Preparing the input for the Agent and emitting the run started event
35
35
  messages = convert_agui_messages_to_agno_messages(run_input.messages or [])
36
+
36
37
  yield RunStartedEvent(type=EventType.RUN_STARTED, thread_id=run_input.thread_id, run_id=run_id)
37
38
 
38
39
  # Look for user_id in run_input.forwarded_props
@@ -28,7 +28,7 @@ from agno.models.message import Message
28
28
  from agno.run.agent import RunContentEvent, RunEvent, RunOutputEvent, RunPausedEvent
29
29
  from agno.run.team import RunContentEvent as TeamRunContentEvent
30
30
  from agno.run.team import TeamRunEvent, TeamRunOutputEvent
31
- from agno.utils.log import log_warning
31
+ from agno.utils.log import log_debug, log_warning
32
32
  from agno.utils.message import get_text_from_message
33
33
 
34
34
 
@@ -116,23 +116,43 @@ class EventBuffer:
116
116
 
117
117
  def convert_agui_messages_to_agno_messages(messages: List[AGUIMessage]) -> List[Message]:
118
118
  """Convert AG-UI messages to Agno messages."""
119
- result = []
119
+ # First pass: collect all tool_call_ids that have results
120
+ tool_call_ids_with_results: Set[str] = set()
121
+ for msg in messages:
122
+ if msg.role == "tool" and msg.tool_call_id:
123
+ tool_call_ids_with_results.add(msg.tool_call_id)
124
+
125
+ # Second pass: convert messages
126
+ result: List[Message] = []
127
+ seen_tool_call_ids: Set[str] = set()
128
+
120
129
  for msg in messages:
121
130
  if msg.role == "tool":
131
+ # Deduplicate tool results - keep only first occurrence
132
+ if msg.tool_call_id in seen_tool_call_ids:
133
+ log_debug(f"Skipping duplicate AGUI tool result: {msg.tool_call_id}")
134
+ continue
135
+ seen_tool_call_ids.add(msg.tool_call_id)
122
136
  result.append(Message(role="tool", tool_call_id=msg.tool_call_id, content=msg.content))
137
+
123
138
  elif msg.role == "assistant":
124
139
  tool_calls = None
125
140
  if msg.tool_calls:
126
- tool_calls = [call.model_dump() for call in msg.tool_calls]
127
- result.append(
128
- Message(
129
- role="assistant",
130
- content=msg.content,
131
- tool_calls=tool_calls,
132
- )
133
- )
141
+ # Filter tool_calls to only those with results in this message sequence
142
+ filtered_calls = [call for call in msg.tool_calls if call.id in tool_call_ids_with_results]
143
+ if filtered_calls:
144
+ tool_calls = [call.model_dump() for call in filtered_calls]
145
+ result.append(Message(role="assistant", content=msg.content, tool_calls=tool_calls))
146
+
134
147
  elif msg.role == "user":
135
148
  result.append(Message(role="user", content=msg.content))
149
+
150
+ elif msg.role == "system":
151
+ pass # Skip - agent builds its own system message from configuration
152
+
153
+ else:
154
+ log_warning(f"Unknown AGUI message role: {msg.role}")
155
+
136
156
  return result
137
157
 
138
158
 
@@ -250,7 +270,25 @@ def _create_events_from_chunk(
250
270
  parent_message_id = event_buffer.get_parent_message_id_for_tool_call()
251
271
 
252
272
  if not parent_message_id:
253
- parent_message_id = current_message_id
273
+ # Create parent message for tool calls without preceding assistant message
274
+ parent_message_id = str(uuid.uuid4())
275
+
276
+ # Emit a text message to serve as the parent
277
+ text_start = TextMessageStartEvent(
278
+ type=EventType.TEXT_MESSAGE_START,
279
+ message_id=parent_message_id,
280
+ role="assistant",
281
+ )
282
+ events_to_emit.append(text_start)
283
+
284
+ text_end = TextMessageEndEvent(
285
+ type=EventType.TEXT_MESSAGE_END,
286
+ message_id=parent_message_id,
287
+ )
288
+ events_to_emit.append(text_end)
289
+
290
+ # Set this as the pending parent for subsequent tool calls in this batch
291
+ event_buffer.set_pending_tool_calls_parent_id(parent_message_id)
254
292
 
255
293
  start_event = ToolCallStartEvent(
256
294
  type=EventType.TOOL_CALL_START,
@@ -341,58 +379,60 @@ def _create_completion_events(
341
379
  end_message_event = TextMessageEndEvent(type=EventType.TEXT_MESSAGE_END, message_id=message_id)
342
380
  events_to_emit.append(end_message_event)
343
381
 
344
- # emit frontend tool calls, i.e. external_execution=True
345
- if isinstance(chunk, RunPausedEvent) and chunk.tools is not None:
346
- # First, emit an assistant message for external tool calls
347
- assistant_message_id = str(uuid.uuid4())
348
- assistant_start_event = TextMessageStartEvent(
349
- type=EventType.TEXT_MESSAGE_START,
350
- message_id=assistant_message_id,
351
- role="assistant",
352
- )
353
- events_to_emit.append(assistant_start_event)
354
-
355
- # Add any text content if present for the assistant message
356
- if chunk.content:
357
- content_event = TextMessageContentEvent(
358
- type=EventType.TEXT_MESSAGE_CONTENT,
382
+ # Emit external execution tools
383
+ if isinstance(chunk, RunPausedEvent):
384
+ external_tools = chunk.tools_awaiting_external_execution
385
+ if external_tools:
386
+ # First, emit an assistant message for external tool calls
387
+ assistant_message_id = str(uuid.uuid4())
388
+ assistant_start_event = TextMessageStartEvent(
389
+ type=EventType.TEXT_MESSAGE_START,
359
390
  message_id=assistant_message_id,
360
- delta=str(chunk.content),
391
+ role="assistant",
361
392
  )
362
- events_to_emit.append(content_event)
363
-
364
- # End the assistant message
365
- assistant_end_event = TextMessageEndEvent(
366
- type=EventType.TEXT_MESSAGE_END,
367
- message_id=assistant_message_id,
368
- )
369
- events_to_emit.append(assistant_end_event)
370
-
371
- # Now emit the tool call events with the assistant message as parent
372
- for tool in chunk.tools:
373
- if tool.tool_call_id is None or tool.tool_name is None:
374
- continue
393
+ events_to_emit.append(assistant_start_event)
394
+
395
+ # Add any text content if present for the assistant message
396
+ if chunk.content:
397
+ content_event = TextMessageContentEvent(
398
+ type=EventType.TEXT_MESSAGE_CONTENT,
399
+ message_id=assistant_message_id,
400
+ delta=str(chunk.content),
401
+ )
402
+ events_to_emit.append(content_event)
375
403
 
376
- start_event = ToolCallStartEvent(
377
- type=EventType.TOOL_CALL_START,
378
- tool_call_id=tool.tool_call_id,
379
- tool_call_name=tool.tool_name,
380
- parent_message_id=assistant_message_id, # Use the assistant message as parent
404
+ # End the assistant message
405
+ assistant_end_event = TextMessageEndEvent(
406
+ type=EventType.TEXT_MESSAGE_END,
407
+ message_id=assistant_message_id,
381
408
  )
382
- events_to_emit.append(start_event)
409
+ events_to_emit.append(assistant_end_event)
410
+
411
+ # Emit tool call events for external execution
412
+ for tool in external_tools:
413
+ if tool.tool_call_id is None or tool.tool_name is None:
414
+ continue
415
+
416
+ start_event = ToolCallStartEvent(
417
+ type=EventType.TOOL_CALL_START,
418
+ tool_call_id=tool.tool_call_id,
419
+ tool_call_name=tool.tool_name,
420
+ parent_message_id=assistant_message_id, # Use the assistant message as parent
421
+ )
422
+ events_to_emit.append(start_event)
383
423
 
384
- args_event = ToolCallArgsEvent(
385
- type=EventType.TOOL_CALL_ARGS,
386
- tool_call_id=tool.tool_call_id,
387
- delta=json.dumps(tool.tool_args),
388
- )
389
- events_to_emit.append(args_event)
424
+ args_event = ToolCallArgsEvent(
425
+ type=EventType.TOOL_CALL_ARGS,
426
+ tool_call_id=tool.tool_call_id,
427
+ delta=json.dumps(tool.tool_args),
428
+ )
429
+ events_to_emit.append(args_event)
390
430
 
391
- end_event = ToolCallEndEvent(
392
- type=EventType.TOOL_CALL_END,
393
- tool_call_id=tool.tool_call_id,
394
- )
395
- events_to_emit.append(end_event)
431
+ end_event = ToolCallEndEvent(
432
+ type=EventType.TOOL_CALL_END,
433
+ tool_call_id=tool.tool_call_id,
434
+ )
435
+ events_to_emit.append(end_event)
396
436
 
397
437
  run_finished_event = RunFinishedEvent(type=EventType.RUN_FINISHED, thread_id=thread_id, run_id=run_id)
398
438
  events_to_emit.append(run_finished_event)
agno/os/router.py CHANGED
@@ -139,6 +139,22 @@ async def _get_request_kwargs(request: Request, endpoint_func: Callable) -> Dict
139
139
  kwargs.pop("knowledge_filters")
140
140
  log_warning(f"Invalid FilterExpr in knowledge_filters: {e}")
141
141
 
142
+ # Handle output_schema - convert JSON schema to dynamic Pydantic model
143
+ if output_schema := kwargs.get("output_schema"):
144
+ try:
145
+ if isinstance(output_schema, str):
146
+ from agno.os.utils import json_schema_to_pydantic_model
147
+
148
+ schema_dict = json.loads(output_schema)
149
+ dynamic_model = json_schema_to_pydantic_model(schema_dict)
150
+ kwargs["output_schema"] = dynamic_model
151
+ except json.JSONDecodeError:
152
+ kwargs.pop("output_schema")
153
+ log_warning(f"Invalid output_schema JSON: {output_schema}")
154
+ except Exception as e:
155
+ kwargs.pop("output_schema")
156
+ log_warning(f"Failed to create output_schema model: {e}")
157
+
142
158
  # Parse boolean and null values
143
159
  for key, value in kwargs.items():
144
160
  if isinstance(value, str) and value.lower() in ["true", "false"]:
@@ -8,9 +8,12 @@ from fastapi.routing import APIRouter
8
8
 
9
9
  from agno.db.base import AsyncBaseDb, BaseDb
10
10
  from agno.db.schemas import UserMemory
11
+ from agno.models.utils import get_model
11
12
  from agno.os.auth import get_authentication_dependency
12
13
  from agno.os.routers.memory.schemas import (
13
14
  DeleteMemoriesRequest,
15
+ OptimizeMemoriesRequest,
16
+ OptimizeMemoriesResponse,
14
17
  UserMemoryCreateSchema,
15
18
  UserMemorySchema,
16
19
  UserStatsSchema,
@@ -497,6 +500,146 @@ def attach_routes(router: APIRouter, dbs: dict[str, list[Union[BaseDb, AsyncBase
497
500
  except Exception as e:
498
501
  raise HTTPException(status_code=500, detail=f"Failed to get user statistics: {str(e)}")
499
502
 
503
+ @router.post(
504
+ "/optimize-memories",
505
+ response_model=OptimizeMemoriesResponse,
506
+ status_code=200,
507
+ operation_id="optimize_memories",
508
+ summary="Optimize User Memories",
509
+ description=(
510
+ "Optimize all memories for a given user using the default summarize strategy. "
511
+ "This operation combines all memories into a single comprehensive summary, "
512
+ "achieving maximum token reduction while preserving all key information. "
513
+ "To use a custom model, specify the model parameter in 'provider:model_id' format "
514
+ "(e.g., 'openai:gpt-4o-mini', 'anthropic:claude-3-5-sonnet-20241022'). "
515
+ "If not specified, uses MemoryManager's default model (gpt-4o). "
516
+ "Set apply=false to preview optimization results without saving to database."
517
+ ),
518
+ responses={
519
+ 200: {
520
+ "description": "Memories optimized successfully",
521
+ "content": {
522
+ "application/json": {
523
+ "example": {
524
+ "memories": [
525
+ {
526
+ "memory_id": "f9361a69-2997-40c7-ae4e-a5861d434047",
527
+ "memory": "User has a 3-year-old golden retriever named Max who loves fetch and walks. Lives in San Francisco's Mission district, works as a product manager in tech. Enjoys hiking Bay Area trails, trying new restaurants (especially Japanese, Thai, Mexican), and learning piano for 1.5 years.",
528
+ "topics": ["pets", "location", "work", "hobbies", "food_preferences"],
529
+ "user_id": "user2",
530
+ "updated_at": "2025-11-18T10:30:00Z",
531
+ }
532
+ ],
533
+ "memories_before": 4,
534
+ "memories_after": 1,
535
+ "tokens_before": 450,
536
+ "tokens_after": 180,
537
+ "tokens_saved": 270,
538
+ "reduction_percentage": 60.0,
539
+ }
540
+ }
541
+ },
542
+ },
543
+ 400: {
544
+ "description": "Bad request - User ID is required or invalid model string format",
545
+ "model": BadRequestResponse,
546
+ },
547
+ 404: {"description": "No memories found for user", "model": NotFoundResponse},
548
+ 500: {"description": "Failed to optimize memories", "model": InternalServerErrorResponse},
549
+ },
550
+ )
551
+ async def optimize_memories(
552
+ request: OptimizeMemoriesRequest,
553
+ db_id: Optional[str] = Query(default=None, description="Database ID to use for optimization"),
554
+ table: Optional[str] = Query(default=None, description="Table to use for optimization"),
555
+ ) -> OptimizeMemoriesResponse:
556
+ """Optimize user memories using the default summarize strategy."""
557
+ from agno.memory import MemoryManager
558
+ from agno.memory.strategies.types import MemoryOptimizationStrategyType
559
+
560
+ try:
561
+ # Get database instance
562
+ db = await get_db(dbs, db_id, table)
563
+
564
+ # Create memory manager with optional model
565
+ if request.model:
566
+ try:
567
+ model_instance = get_model(request.model)
568
+ except ValueError as e:
569
+ raise HTTPException(status_code=400, detail=str(e))
570
+ memory_manager = MemoryManager(model=model_instance, db=db)
571
+ else:
572
+ # No model specified - use MemoryManager's default
573
+ memory_manager = MemoryManager(db=db)
574
+
575
+ # Get current memories to count tokens before optimization
576
+ if isinstance(db, AsyncBaseDb):
577
+ memories_before = await memory_manager.aget_user_memories(user_id=request.user_id)
578
+ else:
579
+ memories_before = memory_manager.get_user_memories(user_id=request.user_id)
580
+
581
+ if not memories_before:
582
+ raise HTTPException(status_code=404, detail=f"No memories found for user {request.user_id}")
583
+
584
+ # Count tokens before optimization
585
+ from agno.memory.strategies.summarize import SummarizeStrategy
586
+
587
+ strategy = SummarizeStrategy()
588
+ tokens_before = strategy.count_tokens(memories_before)
589
+ memories_before_count = len(memories_before)
590
+
591
+ # Optimize memories with default SUMMARIZE strategy
592
+ if isinstance(db, AsyncBaseDb):
593
+ optimized_memories = await memory_manager.aoptimize_memories(
594
+ user_id=request.user_id,
595
+ strategy=MemoryOptimizationStrategyType.SUMMARIZE,
596
+ apply=request.apply,
597
+ )
598
+ else:
599
+ optimized_memories = memory_manager.optimize_memories(
600
+ user_id=request.user_id,
601
+ strategy=MemoryOptimizationStrategyType.SUMMARIZE,
602
+ apply=request.apply,
603
+ )
604
+
605
+ # Count tokens after optimization
606
+ tokens_after = strategy.count_tokens(optimized_memories)
607
+ memories_after_count = len(optimized_memories)
608
+
609
+ # Calculate statistics
610
+ tokens_saved = tokens_before - tokens_after
611
+ reduction_percentage = (tokens_saved / tokens_before * 100.0) if tokens_before > 0 else 0.0
612
+
613
+ # Convert to schema objects
614
+ optimized_memory_schemas = [
615
+ UserMemorySchema(
616
+ memory_id=mem.memory_id or "",
617
+ memory=mem.memory or "",
618
+ topics=mem.topics,
619
+ agent_id=mem.agent_id,
620
+ team_id=mem.team_id,
621
+ user_id=mem.user_id,
622
+ updated_at=mem.updated_at,
623
+ )
624
+ for mem in optimized_memories
625
+ ]
626
+
627
+ return OptimizeMemoriesResponse(
628
+ memories=optimized_memory_schemas,
629
+ memories_before=memories_before_count,
630
+ memories_after=memories_after_count,
631
+ tokens_before=tokens_before,
632
+ tokens_after=tokens_after,
633
+ tokens_saved=tokens_saved,
634
+ reduction_percentage=reduction_percentage,
635
+ )
636
+
637
+ except HTTPException:
638
+ raise
639
+ except Exception as e:
640
+ logger.error(f"Failed to optimize memories for user {request.user_id}: {str(e)}")
641
+ raise HTTPException(status_code=500, detail=f"Failed to optimize memories: {str(e)}")
642
+
500
643
  return router
501
644
 
502
645
 
@@ -60,3 +60,29 @@ class UserStatsSchema(BaseModel):
60
60
  total_memories=user_stats_dict["total_memories"],
61
61
  last_memory_updated_at=datetime.fromtimestamp(updated_at, tz=timezone.utc) if updated_at else None,
62
62
  )
63
+
64
+
65
+ class OptimizeMemoriesRequest(BaseModel):
66
+ """Schema for memory optimization request"""
67
+
68
+ user_id: str = Field(..., description="User ID to optimize memories for")
69
+ model: Optional[str] = Field(
70
+ default=None,
71
+ description="Model to use for optimization in format 'provider:model_id' (e.g., 'openai:gpt-4o-mini', 'anthropic:claude-3-5-sonnet-20241022', 'google:gemini-2.0-flash-exp'). If not specified, uses MemoryManager's default model (gpt-4o).",
72
+ )
73
+ apply: bool = Field(
74
+ default=True,
75
+ description="If True, apply optimization changes to database. If False, return preview only without saving.",
76
+ )
77
+
78
+
79
+ class OptimizeMemoriesResponse(BaseModel):
80
+ """Schema for memory optimization response"""
81
+
82
+ memories: List[UserMemorySchema] = Field(..., description="List of optimized memory objects")
83
+ memories_before: int = Field(..., description="Number of memories before optimization", ge=0)
84
+ memories_after: int = Field(..., description="Number of memories after optimization", ge=0)
85
+ tokens_before: int = Field(..., description="Token count before optimization", ge=0)
86
+ tokens_after: int = Field(..., description="Token count after optimization", ge=0)
87
+ tokens_saved: int = Field(..., description="Number of tokens saved through optimization", ge=0)
88
+ reduction_percentage: float = Field(..., description="Percentage of token reduction achieved", ge=0.0, le=100.0)