agno 2.3.2__py3-none-any.whl → 2.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +513 -185
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +176 -0
- agno/db/dynamo/dynamo.py +11 -0
- agno/db/firestore/firestore.py +5 -1
- agno/db/gcs_json/gcs_json_db.py +5 -2
- agno/db/in_memory/in_memory_db.py +5 -2
- agno/db/json/json_db.py +5 -1
- agno/db/migrations/manager.py +4 -4
- agno/db/mongo/async_mongo.py +158 -34
- agno/db/mongo/mongo.py +6 -2
- agno/db/mysql/mysql.py +48 -54
- agno/db/postgres/async_postgres.py +66 -52
- agno/db/postgres/postgres.py +42 -50
- agno/db/redis/redis.py +5 -0
- agno/db/redis/utils.py +5 -5
- agno/db/singlestore/singlestore.py +99 -108
- agno/db/sqlite/async_sqlite.py +29 -27
- agno/db/sqlite/sqlite.py +30 -26
- agno/knowledge/reader/pdf_reader.py +2 -2
- agno/knowledge/reader/tavily_reader.py +0 -1
- agno/memory/__init__.py +14 -1
- agno/memory/manager.py +217 -4
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +67 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/aimlapi.py +18 -0
- agno/models/anthropic/claude.py +87 -81
- agno/models/aws/bedrock.py +38 -16
- agno/models/aws/claude.py +97 -277
- agno/models/azure/ai_foundry.py +8 -4
- agno/models/base.py +101 -14
- agno/models/cerebras/cerebras.py +25 -9
- agno/models/cerebras/cerebras_openai.py +22 -2
- agno/models/cohere/chat.py +18 -6
- agno/models/cometapi/cometapi.py +19 -1
- agno/models/deepinfra/deepinfra.py +19 -1
- agno/models/fireworks/fireworks.py +19 -1
- agno/models/google/gemini.py +583 -21
- agno/models/groq/groq.py +23 -6
- agno/models/huggingface/huggingface.py +22 -7
- agno/models/ibm/watsonx.py +21 -7
- agno/models/internlm/internlm.py +19 -1
- agno/models/langdb/langdb.py +10 -0
- agno/models/litellm/chat.py +17 -7
- agno/models/litellm/litellm_openai.py +19 -1
- agno/models/message.py +19 -5
- agno/models/meta/llama.py +25 -5
- agno/models/meta/llama_openai.py +18 -0
- agno/models/mistral/mistral.py +13 -5
- agno/models/nvidia/nvidia.py +19 -1
- agno/models/ollama/chat.py +17 -6
- agno/models/openai/chat.py +22 -7
- agno/models/openai/responses.py +28 -10
- agno/models/openrouter/openrouter.py +20 -0
- agno/models/perplexity/perplexity.py +17 -0
- agno/models/requesty/requesty.py +18 -0
- agno/models/sambanova/sambanova.py +19 -1
- agno/models/siliconflow/siliconflow.py +19 -1
- agno/models/together/together.py +19 -1
- agno/models/vercel/v0.py +19 -1
- agno/models/vertexai/claude.py +99 -5
- agno/models/xai/xai.py +18 -0
- agno/os/interfaces/agui/router.py +1 -0
- agno/os/interfaces/agui/utils.py +97 -57
- agno/os/router.py +16 -0
- agno/os/routers/memory/memory.py +143 -0
- agno/os/routers/memory/schemas.py +26 -0
- agno/os/schema.py +33 -6
- agno/os/utils.py +134 -10
- agno/run/base.py +2 -1
- agno/run/workflow.py +1 -1
- agno/team/team.py +566 -219
- agno/tools/mcp/mcp.py +1 -1
- agno/utils/agent.py +119 -1
- agno/utils/models/ai_foundry.py +9 -2
- agno/utils/models/claude.py +12 -5
- agno/utils/models/cohere.py +9 -2
- agno/utils/models/llama.py +9 -2
- agno/utils/models/mistral.py +4 -2
- agno/utils/print_response/agent.py +37 -2
- agno/utils/print_response/team.py +52 -0
- agno/utils/tokens.py +41 -0
- agno/workflow/types.py +2 -2
- {agno-2.3.2.dist-info → agno-2.3.4.dist-info}/METADATA +45 -40
- {agno-2.3.2.dist-info → agno-2.3.4.dist-info}/RECORD +90 -83
- {agno-2.3.2.dist-info → agno-2.3.4.dist-info}/WHEEL +0 -0
- {agno-2.3.2.dist-info → agno-2.3.4.dist-info}/licenses/LICENSE +0 -0
- {agno-2.3.2.dist-info → agno-2.3.4.dist-info}/top_level.txt +0 -0
agno/models/together/together.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
from dataclasses import dataclass, field
|
|
2
2
|
from os import getenv
|
|
3
|
-
from typing import Optional
|
|
3
|
+
from typing import Any, Dict, Optional
|
|
4
4
|
|
|
5
|
+
from agno.exceptions import ModelProviderError
|
|
5
6
|
from agno.models.openai.like import OpenAILike
|
|
6
7
|
|
|
7
8
|
|
|
@@ -23,3 +24,20 @@ class Together(OpenAILike):
|
|
|
23
24
|
provider: str = "Together"
|
|
24
25
|
api_key: Optional[str] = field(default_factory=lambda: getenv("TOGETHER_API_KEY"))
|
|
25
26
|
base_url: str = "https://api.together.xyz/v1"
|
|
27
|
+
|
|
28
|
+
def _get_client_params(self) -> Dict[str, Any]:
|
|
29
|
+
"""
|
|
30
|
+
Returns client parameters for API requests, checking for TOGETHER_API_KEY.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
Dict[str, Any]: A dictionary of client parameters for API requests.
|
|
34
|
+
"""
|
|
35
|
+
if not self.api_key:
|
|
36
|
+
self.api_key = getenv("TOGETHER_API_KEY")
|
|
37
|
+
if not self.api_key:
|
|
38
|
+
raise ModelProviderError(
|
|
39
|
+
message="TOGETHER_API_KEY not set. Please set the TOGETHER_API_KEY environment variable.",
|
|
40
|
+
model_name=self.name,
|
|
41
|
+
model_id=self.id,
|
|
42
|
+
)
|
|
43
|
+
return super()._get_client_params()
|
agno/models/vercel/v0.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
from dataclasses import dataclass, field
|
|
2
2
|
from os import getenv
|
|
3
|
-
from typing import Optional
|
|
3
|
+
from typing import Any, Dict, Optional
|
|
4
4
|
|
|
5
|
+
from agno.exceptions import ModelProviderError
|
|
5
6
|
from agno.models.openai.like import OpenAILike
|
|
6
7
|
|
|
7
8
|
|
|
@@ -24,3 +25,20 @@ class V0(OpenAILike):
|
|
|
24
25
|
|
|
25
26
|
api_key: Optional[str] = field(default_factory=lambda: getenv("V0_API_KEY"))
|
|
26
27
|
base_url: str = "https://api.v0.dev/v1/"
|
|
28
|
+
|
|
29
|
+
def _get_client_params(self) -> Dict[str, Any]:
|
|
30
|
+
"""
|
|
31
|
+
Returns client parameters for API requests, checking for V0_API_KEY.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Dict[str, Any]: A dictionary of client parameters for API requests.
|
|
35
|
+
"""
|
|
36
|
+
if not self.api_key:
|
|
37
|
+
self.api_key = getenv("V0_API_KEY")
|
|
38
|
+
if not self.api_key:
|
|
39
|
+
raise ModelProviderError(
|
|
40
|
+
message="V0_API_KEY not set. Please set the V0_API_KEY environment variable.",
|
|
41
|
+
model_name=self.name,
|
|
42
|
+
model_id=self.id,
|
|
43
|
+
)
|
|
44
|
+
return super()._get_client_params()
|
agno/models/vertexai/claude.py
CHANGED
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
2
|
from os import getenv
|
|
3
|
-
from typing import Any, Dict, Optional
|
|
3
|
+
from typing import Any, Dict, List, Optional, Type, Union
|
|
4
4
|
|
|
5
5
|
import httpx
|
|
6
|
+
from pydantic import BaseModel
|
|
6
7
|
|
|
7
8
|
from agno.models.anthropic import Claude as AnthropicClaude
|
|
8
9
|
from agno.utils.http import get_default_async_client, get_default_sync_client
|
|
9
|
-
from agno.utils.log import log_warning
|
|
10
|
+
from agno.utils.log import log_debug, log_warning
|
|
11
|
+
from agno.utils.models.claude import format_tools_for_model
|
|
10
12
|
|
|
11
13
|
try:
|
|
12
14
|
from anthropic import AnthropicVertex, AsyncAnthropicVertex
|
|
@@ -26,14 +28,23 @@ class Claude(AnthropicClaude):
|
|
|
26
28
|
name: str = "Claude"
|
|
27
29
|
provider: str = "VertexAI"
|
|
28
30
|
|
|
29
|
-
client: Optional[AnthropicVertex] = None # type: ignore
|
|
30
|
-
async_client: Optional[AsyncAnthropicVertex] = None # type: ignore
|
|
31
|
-
|
|
32
31
|
# Client parameters
|
|
33
32
|
region: Optional[str] = None
|
|
34
33
|
project_id: Optional[str] = None
|
|
35
34
|
base_url: Optional[str] = None
|
|
36
35
|
|
|
36
|
+
client: Optional[AnthropicVertex] = None # type: ignore
|
|
37
|
+
async_client: Optional[AsyncAnthropicVertex] = None # type: ignore
|
|
38
|
+
|
|
39
|
+
def __post_init__(self):
|
|
40
|
+
"""Validate model configuration after initialization"""
|
|
41
|
+
# Validate thinking support immediately at model creation
|
|
42
|
+
if self.thinking:
|
|
43
|
+
self._validate_thinking_support()
|
|
44
|
+
# Overwrite output schema support for VertexAI Claude
|
|
45
|
+
self.supports_native_structured_outputs = False
|
|
46
|
+
self.supports_json_schema_outputs = False
|
|
47
|
+
|
|
37
48
|
def _get_client_params(self) -> Dict[str, Any]:
|
|
38
49
|
client_params: Dict[str, Any] = {}
|
|
39
50
|
|
|
@@ -94,3 +105,86 @@ class Claude(AnthropicClaude):
|
|
|
94
105
|
_client_params["http_client"] = get_default_async_client()
|
|
95
106
|
self.async_client = AsyncAnthropicVertex(**_client_params)
|
|
96
107
|
return self.async_client
|
|
108
|
+
|
|
109
|
+
def get_request_params(
|
|
110
|
+
self,
|
|
111
|
+
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
112
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
113
|
+
) -> Dict[str, Any]:
|
|
114
|
+
"""
|
|
115
|
+
Generate keyword arguments for API requests.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
Dict[str, Any]: The keyword arguments for API requests.
|
|
119
|
+
"""
|
|
120
|
+
# Validate thinking support if thinking is enabled
|
|
121
|
+
if self.thinking:
|
|
122
|
+
self._validate_thinking_support()
|
|
123
|
+
|
|
124
|
+
_request_params: Dict[str, Any] = {}
|
|
125
|
+
if self.max_tokens:
|
|
126
|
+
_request_params["max_tokens"] = self.max_tokens
|
|
127
|
+
if self.thinking:
|
|
128
|
+
_request_params["thinking"] = self.thinking
|
|
129
|
+
if self.temperature:
|
|
130
|
+
_request_params["temperature"] = self.temperature
|
|
131
|
+
if self.stop_sequences:
|
|
132
|
+
_request_params["stop_sequences"] = self.stop_sequences
|
|
133
|
+
if self.top_p:
|
|
134
|
+
_request_params["top_p"] = self.top_p
|
|
135
|
+
if self.top_k:
|
|
136
|
+
_request_params["top_k"] = self.top_k
|
|
137
|
+
if self.timeout:
|
|
138
|
+
_request_params["timeout"] = self.timeout
|
|
139
|
+
|
|
140
|
+
# Build betas list - include existing betas and add new one if needed
|
|
141
|
+
betas_list = list(self.betas) if self.betas else []
|
|
142
|
+
|
|
143
|
+
# Include betas if any are present
|
|
144
|
+
if betas_list:
|
|
145
|
+
_request_params["betas"] = betas_list
|
|
146
|
+
|
|
147
|
+
if self.request_params:
|
|
148
|
+
_request_params.update(self.request_params)
|
|
149
|
+
|
|
150
|
+
if _request_params:
|
|
151
|
+
log_debug(f"Calling {self.provider} with request parameters: {_request_params}", log_level=2)
|
|
152
|
+
return _request_params
|
|
153
|
+
|
|
154
|
+
def _prepare_request_kwargs(
|
|
155
|
+
self,
|
|
156
|
+
system_message: str,
|
|
157
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
158
|
+
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
159
|
+
) -> Dict[str, Any]:
|
|
160
|
+
"""
|
|
161
|
+
Prepare the request keyword arguments for the API call.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
system_message (str): The concatenated system messages.
|
|
165
|
+
tools: Optional list of tools
|
|
166
|
+
response_format: Optional response format (Pydantic model or dict)
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
Dict[str, Any]: The request keyword arguments.
|
|
170
|
+
"""
|
|
171
|
+
# Pass response_format and tools to get_request_params for beta header handling
|
|
172
|
+
request_kwargs = self.get_request_params(response_format=response_format, tools=tools).copy()
|
|
173
|
+
if system_message:
|
|
174
|
+
if self.cache_system_prompt:
|
|
175
|
+
cache_control = (
|
|
176
|
+
{"type": "ephemeral", "ttl": "1h"}
|
|
177
|
+
if self.extended_cache_time is not None and self.extended_cache_time is True
|
|
178
|
+
else {"type": "ephemeral"}
|
|
179
|
+
)
|
|
180
|
+
request_kwargs["system"] = [{"text": system_message, "type": "text", "cache_control": cache_control}]
|
|
181
|
+
else:
|
|
182
|
+
request_kwargs["system"] = [{"text": system_message, "type": "text"}]
|
|
183
|
+
|
|
184
|
+
# Format tools (this will handle strict mode)
|
|
185
|
+
if tools:
|
|
186
|
+
request_kwargs["tools"] = format_tools_for_model(tools)
|
|
187
|
+
|
|
188
|
+
if request_kwargs:
|
|
189
|
+
log_debug(f"Calling {self.provider} with request parameters: {request_kwargs}", log_level=2)
|
|
190
|
+
return request_kwargs
|
agno/models/xai/xai.py
CHANGED
|
@@ -4,6 +4,7 @@ from typing import Any, Dict, List, Optional, Type, Union
|
|
|
4
4
|
|
|
5
5
|
from pydantic import BaseModel
|
|
6
6
|
|
|
7
|
+
from agno.exceptions import ModelProviderError
|
|
7
8
|
from agno.models.message import Citations, UrlCitation
|
|
8
9
|
from agno.models.openai.like import OpenAILike
|
|
9
10
|
from agno.models.response import ModelResponse
|
|
@@ -39,6 +40,23 @@ class xAI(OpenAILike):
|
|
|
39
40
|
|
|
40
41
|
search_parameters: Optional[Dict[str, Any]] = None
|
|
41
42
|
|
|
43
|
+
def _get_client_params(self) -> Dict[str, Any]:
|
|
44
|
+
"""
|
|
45
|
+
Returns client parameters for API requests, checking for XAI_API_KEY.
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Dict[str, Any]: A dictionary of client parameters for API requests.
|
|
49
|
+
"""
|
|
50
|
+
if not self.api_key:
|
|
51
|
+
self.api_key = getenv("XAI_API_KEY")
|
|
52
|
+
if not self.api_key:
|
|
53
|
+
raise ModelProviderError(
|
|
54
|
+
message="XAI_API_KEY not set. Please set the XAI_API_KEY environment variable.",
|
|
55
|
+
model_name=self.name,
|
|
56
|
+
model_id=self.id,
|
|
57
|
+
)
|
|
58
|
+
return super()._get_client_params()
|
|
59
|
+
|
|
42
60
|
def get_request_params(
|
|
43
61
|
self,
|
|
44
62
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
@@ -33,6 +33,7 @@ async def run_agent(agent: Agent, run_input: RunAgentInput) -> AsyncIterator[Bas
|
|
|
33
33
|
try:
|
|
34
34
|
# Preparing the input for the Agent and emitting the run started event
|
|
35
35
|
messages = convert_agui_messages_to_agno_messages(run_input.messages or [])
|
|
36
|
+
|
|
36
37
|
yield RunStartedEvent(type=EventType.RUN_STARTED, thread_id=run_input.thread_id, run_id=run_id)
|
|
37
38
|
|
|
38
39
|
# Look for user_id in run_input.forwarded_props
|
agno/os/interfaces/agui/utils.py
CHANGED
|
@@ -28,7 +28,7 @@ from agno.models.message import Message
|
|
|
28
28
|
from agno.run.agent import RunContentEvent, RunEvent, RunOutputEvent, RunPausedEvent
|
|
29
29
|
from agno.run.team import RunContentEvent as TeamRunContentEvent
|
|
30
30
|
from agno.run.team import TeamRunEvent, TeamRunOutputEvent
|
|
31
|
-
from agno.utils.log import log_warning
|
|
31
|
+
from agno.utils.log import log_debug, log_warning
|
|
32
32
|
from agno.utils.message import get_text_from_message
|
|
33
33
|
|
|
34
34
|
|
|
@@ -116,23 +116,43 @@ class EventBuffer:
|
|
|
116
116
|
|
|
117
117
|
def convert_agui_messages_to_agno_messages(messages: List[AGUIMessage]) -> List[Message]:
|
|
118
118
|
"""Convert AG-UI messages to Agno messages."""
|
|
119
|
-
|
|
119
|
+
# First pass: collect all tool_call_ids that have results
|
|
120
|
+
tool_call_ids_with_results: Set[str] = set()
|
|
121
|
+
for msg in messages:
|
|
122
|
+
if msg.role == "tool" and msg.tool_call_id:
|
|
123
|
+
tool_call_ids_with_results.add(msg.tool_call_id)
|
|
124
|
+
|
|
125
|
+
# Second pass: convert messages
|
|
126
|
+
result: List[Message] = []
|
|
127
|
+
seen_tool_call_ids: Set[str] = set()
|
|
128
|
+
|
|
120
129
|
for msg in messages:
|
|
121
130
|
if msg.role == "tool":
|
|
131
|
+
# Deduplicate tool results - keep only first occurrence
|
|
132
|
+
if msg.tool_call_id in seen_tool_call_ids:
|
|
133
|
+
log_debug(f"Skipping duplicate AGUI tool result: {msg.tool_call_id}")
|
|
134
|
+
continue
|
|
135
|
+
seen_tool_call_ids.add(msg.tool_call_id)
|
|
122
136
|
result.append(Message(role="tool", tool_call_id=msg.tool_call_id, content=msg.content))
|
|
137
|
+
|
|
123
138
|
elif msg.role == "assistant":
|
|
124
139
|
tool_calls = None
|
|
125
140
|
if msg.tool_calls:
|
|
126
|
-
tool_calls
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
)
|
|
133
|
-
)
|
|
141
|
+
# Filter tool_calls to only those with results in this message sequence
|
|
142
|
+
filtered_calls = [call for call in msg.tool_calls if call.id in tool_call_ids_with_results]
|
|
143
|
+
if filtered_calls:
|
|
144
|
+
tool_calls = [call.model_dump() for call in filtered_calls]
|
|
145
|
+
result.append(Message(role="assistant", content=msg.content, tool_calls=tool_calls))
|
|
146
|
+
|
|
134
147
|
elif msg.role == "user":
|
|
135
148
|
result.append(Message(role="user", content=msg.content))
|
|
149
|
+
|
|
150
|
+
elif msg.role == "system":
|
|
151
|
+
pass # Skip - agent builds its own system message from configuration
|
|
152
|
+
|
|
153
|
+
else:
|
|
154
|
+
log_warning(f"Unknown AGUI message role: {msg.role}")
|
|
155
|
+
|
|
136
156
|
return result
|
|
137
157
|
|
|
138
158
|
|
|
@@ -250,7 +270,25 @@ def _create_events_from_chunk(
|
|
|
250
270
|
parent_message_id = event_buffer.get_parent_message_id_for_tool_call()
|
|
251
271
|
|
|
252
272
|
if not parent_message_id:
|
|
253
|
-
|
|
273
|
+
# Create parent message for tool calls without preceding assistant message
|
|
274
|
+
parent_message_id = str(uuid.uuid4())
|
|
275
|
+
|
|
276
|
+
# Emit a text message to serve as the parent
|
|
277
|
+
text_start = TextMessageStartEvent(
|
|
278
|
+
type=EventType.TEXT_MESSAGE_START,
|
|
279
|
+
message_id=parent_message_id,
|
|
280
|
+
role="assistant",
|
|
281
|
+
)
|
|
282
|
+
events_to_emit.append(text_start)
|
|
283
|
+
|
|
284
|
+
text_end = TextMessageEndEvent(
|
|
285
|
+
type=EventType.TEXT_MESSAGE_END,
|
|
286
|
+
message_id=parent_message_id,
|
|
287
|
+
)
|
|
288
|
+
events_to_emit.append(text_end)
|
|
289
|
+
|
|
290
|
+
# Set this as the pending parent for subsequent tool calls in this batch
|
|
291
|
+
event_buffer.set_pending_tool_calls_parent_id(parent_message_id)
|
|
254
292
|
|
|
255
293
|
start_event = ToolCallStartEvent(
|
|
256
294
|
type=EventType.TOOL_CALL_START,
|
|
@@ -341,58 +379,60 @@ def _create_completion_events(
|
|
|
341
379
|
end_message_event = TextMessageEndEvent(type=EventType.TEXT_MESSAGE_END, message_id=message_id)
|
|
342
380
|
events_to_emit.append(end_message_event)
|
|
343
381
|
|
|
344
|
-
#
|
|
345
|
-
if isinstance(chunk, RunPausedEvent)
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
)
|
|
353
|
-
events_to_emit.append(assistant_start_event)
|
|
354
|
-
|
|
355
|
-
# Add any text content if present for the assistant message
|
|
356
|
-
if chunk.content:
|
|
357
|
-
content_event = TextMessageContentEvent(
|
|
358
|
-
type=EventType.TEXT_MESSAGE_CONTENT,
|
|
382
|
+
# Emit external execution tools
|
|
383
|
+
if isinstance(chunk, RunPausedEvent):
|
|
384
|
+
external_tools = chunk.tools_awaiting_external_execution
|
|
385
|
+
if external_tools:
|
|
386
|
+
# First, emit an assistant message for external tool calls
|
|
387
|
+
assistant_message_id = str(uuid.uuid4())
|
|
388
|
+
assistant_start_event = TextMessageStartEvent(
|
|
389
|
+
type=EventType.TEXT_MESSAGE_START,
|
|
359
390
|
message_id=assistant_message_id,
|
|
360
|
-
|
|
391
|
+
role="assistant",
|
|
361
392
|
)
|
|
362
|
-
events_to_emit.append(
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
for tool in chunk.tools:
|
|
373
|
-
if tool.tool_call_id is None or tool.tool_name is None:
|
|
374
|
-
continue
|
|
393
|
+
events_to_emit.append(assistant_start_event)
|
|
394
|
+
|
|
395
|
+
# Add any text content if present for the assistant message
|
|
396
|
+
if chunk.content:
|
|
397
|
+
content_event = TextMessageContentEvent(
|
|
398
|
+
type=EventType.TEXT_MESSAGE_CONTENT,
|
|
399
|
+
message_id=assistant_message_id,
|
|
400
|
+
delta=str(chunk.content),
|
|
401
|
+
)
|
|
402
|
+
events_to_emit.append(content_event)
|
|
375
403
|
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
parent_message_id=assistant_message_id, # Use the assistant message as parent
|
|
404
|
+
# End the assistant message
|
|
405
|
+
assistant_end_event = TextMessageEndEvent(
|
|
406
|
+
type=EventType.TEXT_MESSAGE_END,
|
|
407
|
+
message_id=assistant_message_id,
|
|
381
408
|
)
|
|
382
|
-
events_to_emit.append(
|
|
409
|
+
events_to_emit.append(assistant_end_event)
|
|
410
|
+
|
|
411
|
+
# Emit tool call events for external execution
|
|
412
|
+
for tool in external_tools:
|
|
413
|
+
if tool.tool_call_id is None or tool.tool_name is None:
|
|
414
|
+
continue
|
|
415
|
+
|
|
416
|
+
start_event = ToolCallStartEvent(
|
|
417
|
+
type=EventType.TOOL_CALL_START,
|
|
418
|
+
tool_call_id=tool.tool_call_id,
|
|
419
|
+
tool_call_name=tool.tool_name,
|
|
420
|
+
parent_message_id=assistant_message_id, # Use the assistant message as parent
|
|
421
|
+
)
|
|
422
|
+
events_to_emit.append(start_event)
|
|
383
423
|
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
424
|
+
args_event = ToolCallArgsEvent(
|
|
425
|
+
type=EventType.TOOL_CALL_ARGS,
|
|
426
|
+
tool_call_id=tool.tool_call_id,
|
|
427
|
+
delta=json.dumps(tool.tool_args),
|
|
428
|
+
)
|
|
429
|
+
events_to_emit.append(args_event)
|
|
390
430
|
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
431
|
+
end_event = ToolCallEndEvent(
|
|
432
|
+
type=EventType.TOOL_CALL_END,
|
|
433
|
+
tool_call_id=tool.tool_call_id,
|
|
434
|
+
)
|
|
435
|
+
events_to_emit.append(end_event)
|
|
396
436
|
|
|
397
437
|
run_finished_event = RunFinishedEvent(type=EventType.RUN_FINISHED, thread_id=thread_id, run_id=run_id)
|
|
398
438
|
events_to_emit.append(run_finished_event)
|
agno/os/router.py
CHANGED
|
@@ -139,6 +139,22 @@ async def _get_request_kwargs(request: Request, endpoint_func: Callable) -> Dict
|
|
|
139
139
|
kwargs.pop("knowledge_filters")
|
|
140
140
|
log_warning(f"Invalid FilterExpr in knowledge_filters: {e}")
|
|
141
141
|
|
|
142
|
+
# Handle output_schema - convert JSON schema to dynamic Pydantic model
|
|
143
|
+
if output_schema := kwargs.get("output_schema"):
|
|
144
|
+
try:
|
|
145
|
+
if isinstance(output_schema, str):
|
|
146
|
+
from agno.os.utils import json_schema_to_pydantic_model
|
|
147
|
+
|
|
148
|
+
schema_dict = json.loads(output_schema)
|
|
149
|
+
dynamic_model = json_schema_to_pydantic_model(schema_dict)
|
|
150
|
+
kwargs["output_schema"] = dynamic_model
|
|
151
|
+
except json.JSONDecodeError:
|
|
152
|
+
kwargs.pop("output_schema")
|
|
153
|
+
log_warning(f"Invalid output_schema JSON: {output_schema}")
|
|
154
|
+
except Exception as e:
|
|
155
|
+
kwargs.pop("output_schema")
|
|
156
|
+
log_warning(f"Failed to create output_schema model: {e}")
|
|
157
|
+
|
|
142
158
|
# Parse boolean and null values
|
|
143
159
|
for key, value in kwargs.items():
|
|
144
160
|
if isinstance(value, str) and value.lower() in ["true", "false"]:
|
agno/os/routers/memory/memory.py
CHANGED
|
@@ -8,9 +8,12 @@ from fastapi.routing import APIRouter
|
|
|
8
8
|
|
|
9
9
|
from agno.db.base import AsyncBaseDb, BaseDb
|
|
10
10
|
from agno.db.schemas import UserMemory
|
|
11
|
+
from agno.models.utils import get_model
|
|
11
12
|
from agno.os.auth import get_authentication_dependency
|
|
12
13
|
from agno.os.routers.memory.schemas import (
|
|
13
14
|
DeleteMemoriesRequest,
|
|
15
|
+
OptimizeMemoriesRequest,
|
|
16
|
+
OptimizeMemoriesResponse,
|
|
14
17
|
UserMemoryCreateSchema,
|
|
15
18
|
UserMemorySchema,
|
|
16
19
|
UserStatsSchema,
|
|
@@ -497,6 +500,146 @@ def attach_routes(router: APIRouter, dbs: dict[str, list[Union[BaseDb, AsyncBase
|
|
|
497
500
|
except Exception as e:
|
|
498
501
|
raise HTTPException(status_code=500, detail=f"Failed to get user statistics: {str(e)}")
|
|
499
502
|
|
|
503
|
+
@router.post(
|
|
504
|
+
"/optimize-memories",
|
|
505
|
+
response_model=OptimizeMemoriesResponse,
|
|
506
|
+
status_code=200,
|
|
507
|
+
operation_id="optimize_memories",
|
|
508
|
+
summary="Optimize User Memories",
|
|
509
|
+
description=(
|
|
510
|
+
"Optimize all memories for a given user using the default summarize strategy. "
|
|
511
|
+
"This operation combines all memories into a single comprehensive summary, "
|
|
512
|
+
"achieving maximum token reduction while preserving all key information. "
|
|
513
|
+
"To use a custom model, specify the model parameter in 'provider:model_id' format "
|
|
514
|
+
"(e.g., 'openai:gpt-4o-mini', 'anthropic:claude-3-5-sonnet-20241022'). "
|
|
515
|
+
"If not specified, uses MemoryManager's default model (gpt-4o). "
|
|
516
|
+
"Set apply=false to preview optimization results without saving to database."
|
|
517
|
+
),
|
|
518
|
+
responses={
|
|
519
|
+
200: {
|
|
520
|
+
"description": "Memories optimized successfully",
|
|
521
|
+
"content": {
|
|
522
|
+
"application/json": {
|
|
523
|
+
"example": {
|
|
524
|
+
"memories": [
|
|
525
|
+
{
|
|
526
|
+
"memory_id": "f9361a69-2997-40c7-ae4e-a5861d434047",
|
|
527
|
+
"memory": "User has a 3-year-old golden retriever named Max who loves fetch and walks. Lives in San Francisco's Mission district, works as a product manager in tech. Enjoys hiking Bay Area trails, trying new restaurants (especially Japanese, Thai, Mexican), and learning piano for 1.5 years.",
|
|
528
|
+
"topics": ["pets", "location", "work", "hobbies", "food_preferences"],
|
|
529
|
+
"user_id": "user2",
|
|
530
|
+
"updated_at": "2025-11-18T10:30:00Z",
|
|
531
|
+
}
|
|
532
|
+
],
|
|
533
|
+
"memories_before": 4,
|
|
534
|
+
"memories_after": 1,
|
|
535
|
+
"tokens_before": 450,
|
|
536
|
+
"tokens_after": 180,
|
|
537
|
+
"tokens_saved": 270,
|
|
538
|
+
"reduction_percentage": 60.0,
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
},
|
|
542
|
+
},
|
|
543
|
+
400: {
|
|
544
|
+
"description": "Bad request - User ID is required or invalid model string format",
|
|
545
|
+
"model": BadRequestResponse,
|
|
546
|
+
},
|
|
547
|
+
404: {"description": "No memories found for user", "model": NotFoundResponse},
|
|
548
|
+
500: {"description": "Failed to optimize memories", "model": InternalServerErrorResponse},
|
|
549
|
+
},
|
|
550
|
+
)
|
|
551
|
+
async def optimize_memories(
|
|
552
|
+
request: OptimizeMemoriesRequest,
|
|
553
|
+
db_id: Optional[str] = Query(default=None, description="Database ID to use for optimization"),
|
|
554
|
+
table: Optional[str] = Query(default=None, description="Table to use for optimization"),
|
|
555
|
+
) -> OptimizeMemoriesResponse:
|
|
556
|
+
"""Optimize user memories using the default summarize strategy."""
|
|
557
|
+
from agno.memory import MemoryManager
|
|
558
|
+
from agno.memory.strategies.types import MemoryOptimizationStrategyType
|
|
559
|
+
|
|
560
|
+
try:
|
|
561
|
+
# Get database instance
|
|
562
|
+
db = await get_db(dbs, db_id, table)
|
|
563
|
+
|
|
564
|
+
# Create memory manager with optional model
|
|
565
|
+
if request.model:
|
|
566
|
+
try:
|
|
567
|
+
model_instance = get_model(request.model)
|
|
568
|
+
except ValueError as e:
|
|
569
|
+
raise HTTPException(status_code=400, detail=str(e))
|
|
570
|
+
memory_manager = MemoryManager(model=model_instance, db=db)
|
|
571
|
+
else:
|
|
572
|
+
# No model specified - use MemoryManager's default
|
|
573
|
+
memory_manager = MemoryManager(db=db)
|
|
574
|
+
|
|
575
|
+
# Get current memories to count tokens before optimization
|
|
576
|
+
if isinstance(db, AsyncBaseDb):
|
|
577
|
+
memories_before = await memory_manager.aget_user_memories(user_id=request.user_id)
|
|
578
|
+
else:
|
|
579
|
+
memories_before = memory_manager.get_user_memories(user_id=request.user_id)
|
|
580
|
+
|
|
581
|
+
if not memories_before:
|
|
582
|
+
raise HTTPException(status_code=404, detail=f"No memories found for user {request.user_id}")
|
|
583
|
+
|
|
584
|
+
# Count tokens before optimization
|
|
585
|
+
from agno.memory.strategies.summarize import SummarizeStrategy
|
|
586
|
+
|
|
587
|
+
strategy = SummarizeStrategy()
|
|
588
|
+
tokens_before = strategy.count_tokens(memories_before)
|
|
589
|
+
memories_before_count = len(memories_before)
|
|
590
|
+
|
|
591
|
+
# Optimize memories with default SUMMARIZE strategy
|
|
592
|
+
if isinstance(db, AsyncBaseDb):
|
|
593
|
+
optimized_memories = await memory_manager.aoptimize_memories(
|
|
594
|
+
user_id=request.user_id,
|
|
595
|
+
strategy=MemoryOptimizationStrategyType.SUMMARIZE,
|
|
596
|
+
apply=request.apply,
|
|
597
|
+
)
|
|
598
|
+
else:
|
|
599
|
+
optimized_memories = memory_manager.optimize_memories(
|
|
600
|
+
user_id=request.user_id,
|
|
601
|
+
strategy=MemoryOptimizationStrategyType.SUMMARIZE,
|
|
602
|
+
apply=request.apply,
|
|
603
|
+
)
|
|
604
|
+
|
|
605
|
+
# Count tokens after optimization
|
|
606
|
+
tokens_after = strategy.count_tokens(optimized_memories)
|
|
607
|
+
memories_after_count = len(optimized_memories)
|
|
608
|
+
|
|
609
|
+
# Calculate statistics
|
|
610
|
+
tokens_saved = tokens_before - tokens_after
|
|
611
|
+
reduction_percentage = (tokens_saved / tokens_before * 100.0) if tokens_before > 0 else 0.0
|
|
612
|
+
|
|
613
|
+
# Convert to schema objects
|
|
614
|
+
optimized_memory_schemas = [
|
|
615
|
+
UserMemorySchema(
|
|
616
|
+
memory_id=mem.memory_id or "",
|
|
617
|
+
memory=mem.memory or "",
|
|
618
|
+
topics=mem.topics,
|
|
619
|
+
agent_id=mem.agent_id,
|
|
620
|
+
team_id=mem.team_id,
|
|
621
|
+
user_id=mem.user_id,
|
|
622
|
+
updated_at=mem.updated_at,
|
|
623
|
+
)
|
|
624
|
+
for mem in optimized_memories
|
|
625
|
+
]
|
|
626
|
+
|
|
627
|
+
return OptimizeMemoriesResponse(
|
|
628
|
+
memories=optimized_memory_schemas,
|
|
629
|
+
memories_before=memories_before_count,
|
|
630
|
+
memories_after=memories_after_count,
|
|
631
|
+
tokens_before=tokens_before,
|
|
632
|
+
tokens_after=tokens_after,
|
|
633
|
+
tokens_saved=tokens_saved,
|
|
634
|
+
reduction_percentage=reduction_percentage,
|
|
635
|
+
)
|
|
636
|
+
|
|
637
|
+
except HTTPException:
|
|
638
|
+
raise
|
|
639
|
+
except Exception as e:
|
|
640
|
+
logger.error(f"Failed to optimize memories for user {request.user_id}: {str(e)}")
|
|
641
|
+
raise HTTPException(status_code=500, detail=f"Failed to optimize memories: {str(e)}")
|
|
642
|
+
|
|
500
643
|
return router
|
|
501
644
|
|
|
502
645
|
|
|
@@ -60,3 +60,29 @@ class UserStatsSchema(BaseModel):
|
|
|
60
60
|
total_memories=user_stats_dict["total_memories"],
|
|
61
61
|
last_memory_updated_at=datetime.fromtimestamp(updated_at, tz=timezone.utc) if updated_at else None,
|
|
62
62
|
)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class OptimizeMemoriesRequest(BaseModel):
|
|
66
|
+
"""Schema for memory optimization request"""
|
|
67
|
+
|
|
68
|
+
user_id: str = Field(..., description="User ID to optimize memories for")
|
|
69
|
+
model: Optional[str] = Field(
|
|
70
|
+
default=None,
|
|
71
|
+
description="Model to use for optimization in format 'provider:model_id' (e.g., 'openai:gpt-4o-mini', 'anthropic:claude-3-5-sonnet-20241022', 'google:gemini-2.0-flash-exp'). If not specified, uses MemoryManager's default model (gpt-4o).",
|
|
72
|
+
)
|
|
73
|
+
apply: bool = Field(
|
|
74
|
+
default=True,
|
|
75
|
+
description="If True, apply optimization changes to database. If False, return preview only without saving.",
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class OptimizeMemoriesResponse(BaseModel):
|
|
80
|
+
"""Schema for memory optimization response"""
|
|
81
|
+
|
|
82
|
+
memories: List[UserMemorySchema] = Field(..., description="List of optimized memory objects")
|
|
83
|
+
memories_before: int = Field(..., description="Number of memories before optimization", ge=0)
|
|
84
|
+
memories_after: int = Field(..., description="Number of memories after optimization", ge=0)
|
|
85
|
+
tokens_before: int = Field(..., description="Token count before optimization", ge=0)
|
|
86
|
+
tokens_after: int = Field(..., description="Token count after optimization", ge=0)
|
|
87
|
+
tokens_saved: int = Field(..., description="Number of tokens saved through optimization", ge=0)
|
|
88
|
+
reduction_percentage: float = Field(..., description="Percentage of token reduction achieved", ge=0.0, le=100.0)
|