letta-nightly 0.6.43.dev20250320104204__py3-none-any.whl → 0.6.43.dev20250322104133__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/agent.py +2 -2
- letta/agents/ephemeral_memory_agent.py +114 -0
- letta/agents/{low_latency_agent.py → voice_agent.py} +133 -79
- letta/client/client.py +1 -1
- letta/embeddings.py +3 -14
- letta/functions/function_sets/multi_agent.py +46 -1
- letta/functions/helpers.py +10 -57
- letta/functions/mcp_client/base_client.py +7 -9
- letta/functions/mcp_client/exceptions.py +6 -0
- letta/helpers/tool_execution_helper.py +9 -7
- letta/llm_api/anthropic.py +1 -19
- letta/llm_api/aws_bedrock.py +2 -2
- letta/llm_api/azure_openai.py +22 -46
- letta/llm_api/llm_api_tools.py +15 -4
- letta/orm/sqlalchemy_base.py +106 -7
- letta/schemas/openai/chat_completion_request.py +20 -1
- letta/schemas/providers.py +251 -0
- letta/schemas/tool.py +4 -1
- letta/server/rest_api/app.py +1 -11
- letta/server/rest_api/optimistic_json_parser.py +5 -5
- letta/server/rest_api/routers/v1/tools.py +34 -2
- letta/server/rest_api/routers/v1/voice.py +5 -5
- letta/server/server.py +6 -0
- letta/services/agent_manager.py +1 -1
- letta/services/block_manager.py +8 -6
- letta/services/message_manager.py +65 -2
- letta/settings.py +3 -3
- {letta_nightly-0.6.43.dev20250320104204.dist-info → letta_nightly-0.6.43.dev20250322104133.dist-info}/METADATA +4 -4
- {letta_nightly-0.6.43.dev20250320104204.dist-info → letta_nightly-0.6.43.dev20250322104133.dist-info}/RECORD +32 -30
- {letta_nightly-0.6.43.dev20250320104204.dist-info → letta_nightly-0.6.43.dev20250322104133.dist-info}/LICENSE +0 -0
- {letta_nightly-0.6.43.dev20250320104204.dist-info → letta_nightly-0.6.43.dev20250322104133.dist-info}/WHEEL +0 -0
- {letta_nightly-0.6.43.dev20250320104204.dist-info → letta_nightly-0.6.43.dev20250322104133.dist-info}/entry_points.txt +0 -0
letta/agent.py
CHANGED
|
@@ -522,7 +522,7 @@ class Agent(BaseAgent):
|
|
|
522
522
|
openai_message_dict=response_message.model_dump(),
|
|
523
523
|
)
|
|
524
524
|
) # extend conversation with assistant's reply
|
|
525
|
-
self.logger.
|
|
525
|
+
self.logger.debug(f"Function call message: {messages[-1]}")
|
|
526
526
|
|
|
527
527
|
nonnull_content = False
|
|
528
528
|
if response_message.content:
|
|
@@ -537,7 +537,7 @@ class Agent(BaseAgent):
|
|
|
537
537
|
response_message.function_call if response_message.function_call is not None else response_message.tool_calls[0].function
|
|
538
538
|
)
|
|
539
539
|
function_name = function_call.name
|
|
540
|
-
self.logger.
|
|
540
|
+
self.logger.debug(f"Request to call function {function_name} with tool_call_id: {tool_call_id}")
|
|
541
541
|
|
|
542
542
|
# Failure case 1: function name is wrong (not in agent_state.tools)
|
|
543
543
|
target_letta_tool = None
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
from typing import AsyncGenerator, Dict, List
|
|
2
|
+
|
|
3
|
+
import openai
|
|
4
|
+
|
|
5
|
+
from letta.agents.base_agent import BaseAgent
|
|
6
|
+
from letta.helpers.tool_execution_helper import enable_strict_mode
|
|
7
|
+
from letta.orm.enums import ToolType
|
|
8
|
+
from letta.schemas.agent import AgentState
|
|
9
|
+
from letta.schemas.enums import MessageRole
|
|
10
|
+
from letta.schemas.letta_message import UserMessage
|
|
11
|
+
from letta.schemas.letta_message_content import TextContent
|
|
12
|
+
from letta.schemas.message import Message
|
|
13
|
+
from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool
|
|
14
|
+
from letta.schemas.user import User
|
|
15
|
+
from letta.services.agent_manager import AgentManager
|
|
16
|
+
from letta.services.message_manager import MessageManager
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class EphemeralMemoryAgent(BaseAgent):
|
|
20
|
+
"""
|
|
21
|
+
A stateless agent that helps with offline memory computations.
|
|
22
|
+
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
agent_id: str,
|
|
28
|
+
openai_client: openai.AsyncClient,
|
|
29
|
+
message_manager: MessageManager,
|
|
30
|
+
agent_manager: AgentManager,
|
|
31
|
+
actor: User,
|
|
32
|
+
):
|
|
33
|
+
super().__init__(
|
|
34
|
+
agent_id=agent_id,
|
|
35
|
+
openai_client=openai_client,
|
|
36
|
+
message_manager=message_manager,
|
|
37
|
+
agent_manager=agent_manager,
|
|
38
|
+
actor=actor,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
async def step(self, input_message: UserMessage) -> List[Message]:
|
|
42
|
+
"""
|
|
43
|
+
Synchronous method that takes a user's input text and returns a summary from OpenAI.
|
|
44
|
+
Returns a list of ephemeral Message objects containing both the user text and the assistant summary.
|
|
45
|
+
"""
|
|
46
|
+
agent_state = self.agent_manager.get_agent_by_id(agent_id=self.agent_id, actor=self.actor)
|
|
47
|
+
|
|
48
|
+
input_message = self.pre_process_input_message(input_message=input_message)
|
|
49
|
+
request = self._build_openai_request([input_message], agent_state)
|
|
50
|
+
|
|
51
|
+
chat_completion = await self.openai_client.chat.completions.create(**request.model_dump(exclude_unset=True))
|
|
52
|
+
|
|
53
|
+
return [
|
|
54
|
+
Message(
|
|
55
|
+
role=MessageRole.assistant,
|
|
56
|
+
content=[TextContent(text=chat_completion.choices[0].message.content.strip())],
|
|
57
|
+
)
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
def pre_process_input_message(self, input_message: UserMessage) -> Dict:
|
|
61
|
+
input_prompt_augmented = f"""
|
|
62
|
+
You are a memory recall agent whose job is to comb through a large set of messages and write relevant memories in relation to a user query.
|
|
63
|
+
Your response will directly populate a "memory block" called "human" that describes the user, that will be used to answer more questions in the future.
|
|
64
|
+
You should err on the side of being more verbose, and also try to *predict* the trajectory of the conversation, and pull memories or messages you think will be relevant to where the conversation is going.
|
|
65
|
+
|
|
66
|
+
Your response should include:
|
|
67
|
+
- A high level summary of the relevant events/timeline of the conversation relevant to the query
|
|
68
|
+
- Direct citations of quotes from the messages you used while creating the summary
|
|
69
|
+
|
|
70
|
+
Here is a history of the messages so far:
|
|
71
|
+
|
|
72
|
+
{self._format_messages_llm_friendly()}
|
|
73
|
+
|
|
74
|
+
This is the query:
|
|
75
|
+
|
|
76
|
+
"{input_message.content}"
|
|
77
|
+
|
|
78
|
+
Your response:
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
input_message.content = input_prompt_augmented
|
|
82
|
+
# print(input_prompt_augmented)
|
|
83
|
+
return input_message.model_dump()
|
|
84
|
+
|
|
85
|
+
def _format_messages_llm_friendly(self):
|
|
86
|
+
messages = self.message_manager.list_messages_for_agent(agent_id=self.agent_id, actor=self.actor)
|
|
87
|
+
|
|
88
|
+
llm_friendly_messages = [f"{m.role}: {m.content[0].text}" for m in messages if m.content and isinstance(m.content[0], TextContent)]
|
|
89
|
+
return "\n".join(llm_friendly_messages)
|
|
90
|
+
|
|
91
|
+
def _build_openai_request(self, openai_messages: List[Dict], agent_state: AgentState) -> ChatCompletionRequest:
|
|
92
|
+
openai_request = ChatCompletionRequest(
|
|
93
|
+
model=agent_state.llm_config.model,
|
|
94
|
+
messages=openai_messages,
|
|
95
|
+
# tools=self._build_tool_schemas(agent_state),
|
|
96
|
+
# tool_choice="auto",
|
|
97
|
+
user=self.actor.id,
|
|
98
|
+
max_completion_tokens=agent_state.llm_config.max_tokens,
|
|
99
|
+
temperature=agent_state.llm_config.temperature,
|
|
100
|
+
stream=False,
|
|
101
|
+
)
|
|
102
|
+
return openai_request
|
|
103
|
+
|
|
104
|
+
def _build_tool_schemas(self, agent_state: AgentState) -> List[Tool]:
|
|
105
|
+
# Only include memory tools
|
|
106
|
+
tools = [t for t in agent_state.tools if t.tool_type in {ToolType.LETTA_CORE, ToolType.LETTA_MEMORY_CORE}]
|
|
107
|
+
|
|
108
|
+
return [Tool(type="function", function=enable_strict_mode(t.json_schema)) for t in tools]
|
|
109
|
+
|
|
110
|
+
async def step_stream(self, input_message: UserMessage) -> AsyncGenerator[str, None]:
|
|
111
|
+
"""
|
|
112
|
+
This agent is synchronous-only. If called in an async context, raise an error.
|
|
113
|
+
"""
|
|
114
|
+
raise NotImplementedError("EphemeralMemoryAgent does not support async step.")
|
|
@@ -5,7 +5,7 @@ from typing import Any, AsyncGenerator, Dict, List, Tuple
|
|
|
5
5
|
import openai
|
|
6
6
|
|
|
7
7
|
from letta.agents.base_agent import BaseAgent
|
|
8
|
-
from letta.agents.
|
|
8
|
+
from letta.agents.ephemeral_memory_agent import EphemeralMemoryAgent
|
|
9
9
|
from letta.constants import NON_USER_MSG_PREFIX
|
|
10
10
|
from letta.helpers.datetime_helpers import get_utc_time
|
|
11
11
|
from letta.helpers.tool_execution_helper import (
|
|
@@ -42,13 +42,12 @@ from letta.services.helpers.agent_manager_helper import compile_system_message
|
|
|
42
42
|
from letta.services.message_manager import MessageManager
|
|
43
43
|
from letta.services.passage_manager import PassageManager
|
|
44
44
|
from letta.services.summarizer.enums import SummarizationMode
|
|
45
|
-
from letta.services.summarizer.summarizer import Summarizer
|
|
46
45
|
from letta.utils import united_diff
|
|
47
46
|
|
|
48
47
|
logger = get_logger(__name__)
|
|
49
48
|
|
|
50
49
|
|
|
51
|
-
class
|
|
50
|
+
class VoiceAgent(BaseAgent):
|
|
52
51
|
"""
|
|
53
52
|
A function-calling loop for streaming OpenAI responses with tool execution.
|
|
54
53
|
This agent:
|
|
@@ -65,9 +64,9 @@ class LowLatencyAgent(BaseAgent):
|
|
|
65
64
|
agent_manager: AgentManager,
|
|
66
65
|
block_manager: BlockManager,
|
|
67
66
|
actor: User,
|
|
67
|
+
message_buffer_limit: int,
|
|
68
|
+
message_buffer_min: int,
|
|
68
69
|
summarization_mode: SummarizationMode = SummarizationMode.STATIC_MESSAGE_BUFFER,
|
|
69
|
-
message_buffer_limit: int = 10,
|
|
70
|
-
message_buffer_min: int = 4,
|
|
71
70
|
):
|
|
72
71
|
super().__init__(
|
|
73
72
|
agent_id=agent_id, openai_client=openai_client, message_manager=message_manager, agent_manager=agent_manager, actor=actor
|
|
@@ -79,75 +78,78 @@ class LowLatencyAgent(BaseAgent):
|
|
|
79
78
|
self.passage_manager = PassageManager() # TODO: pass this in
|
|
80
79
|
# TODO: This is not guaranteed to exist!
|
|
81
80
|
self.summary_block_label = "human"
|
|
82
|
-
self.summarizer = Summarizer(
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
)
|
|
81
|
+
# self.summarizer = Summarizer(
|
|
82
|
+
# mode=summarization_mode,
|
|
83
|
+
# summarizer_agent=EphemeralAgent(
|
|
84
|
+
# agent_id=agent_id, openai_client=openai_client, message_manager=message_manager, agent_manager=agent_manager, actor=actor
|
|
85
|
+
# ),
|
|
86
|
+
# message_buffer_limit=message_buffer_limit,
|
|
87
|
+
# message_buffer_min=message_buffer_min,
|
|
88
|
+
# )
|
|
90
89
|
self.message_buffer_limit = message_buffer_limit
|
|
91
|
-
self.message_buffer_min = message_buffer_min
|
|
90
|
+
# self.message_buffer_min = message_buffer_min
|
|
91
|
+
self.offline_memory_agent = EphemeralMemoryAgent(
|
|
92
|
+
agent_id=agent_id, openai_client=openai_client, message_manager=message_manager, agent_manager=agent_manager, actor=actor
|
|
93
|
+
)
|
|
92
94
|
|
|
93
95
|
async def step(self, input_message: UserMessage) -> List[Message]:
|
|
94
96
|
raise NotImplementedError("LowLatencyAgent does not have a synchronous step implemented currently.")
|
|
95
97
|
|
|
96
98
|
async def step_stream(self, input_message: UserMessage) -> AsyncGenerator[str, None]:
|
|
97
99
|
"""
|
|
98
|
-
|
|
99
|
-
|
|
100
|
+
Main streaming loop that yields partial tokens.
|
|
101
|
+
Whenever we detect a tool call, we yield from _handle_ai_response as well.
|
|
100
102
|
"""
|
|
101
|
-
input_message = self.pre_process_input_message(input_message
|
|
102
|
-
agent_state = self.agent_manager.get_agent_by_id(
|
|
103
|
+
input_message = self.pre_process_input_message(input_message)
|
|
104
|
+
agent_state = self.agent_manager.get_agent_by_id(self.agent_id, actor=self.actor)
|
|
103
105
|
in_context_messages = self.message_manager.get_messages_by_ids(message_ids=agent_state.message_ids, actor=self.actor)
|
|
104
106
|
letta_message_db_queue = [create_user_message(input_message=input_message, agent_id=agent_state.id, actor=self.actor)]
|
|
105
107
|
in_memory_message_history = [input_message]
|
|
106
108
|
|
|
109
|
+
# TODO: Define max steps here
|
|
107
110
|
while True:
|
|
108
|
-
#
|
|
109
|
-
in_context_messages = self._rebuild_memory(in_context_messages
|
|
110
|
-
|
|
111
|
-
# Convert Letta messages to OpenAI messages
|
|
111
|
+
# Rebuild memory each loop
|
|
112
|
+
in_context_messages = self._rebuild_memory(in_context_messages, agent_state)
|
|
112
113
|
openai_messages = convert_letta_messages_to_openai(in_context_messages)
|
|
113
114
|
openai_messages.extend(in_memory_message_history)
|
|
115
|
+
|
|
114
116
|
request = self._build_openai_request(openai_messages, agent_state)
|
|
115
117
|
|
|
116
|
-
# Execute the request
|
|
117
118
|
stream = await self.openai_client.chat.completions.create(**request.model_dump(exclude_unset=True))
|
|
118
119
|
streaming_interface = OpenAIChatCompletionsStreamingInterface(stream_pre_execution_message=True)
|
|
119
120
|
|
|
120
|
-
|
|
121
|
-
|
|
121
|
+
# 1) Yield partial tokens from OpenAI
|
|
122
|
+
async for sse_chunk in streaming_interface.process(stream):
|
|
123
|
+
yield sse_chunk
|
|
122
124
|
|
|
123
|
-
#
|
|
124
|
-
|
|
125
|
-
streaming_interface,
|
|
125
|
+
# 2) Now handle the final AI response. This might yield more text (stalling, etc.)
|
|
126
|
+
should_continue = await self._handle_ai_response(
|
|
127
|
+
streaming_interface,
|
|
128
|
+
agent_state,
|
|
129
|
+
in_memory_message_history,
|
|
130
|
+
letta_message_db_queue,
|
|
126
131
|
)
|
|
127
132
|
|
|
128
|
-
if not
|
|
133
|
+
if not should_continue:
|
|
129
134
|
break
|
|
130
135
|
|
|
131
|
-
# Rebuild context window
|
|
136
|
+
# Rebuild context window if desired
|
|
132
137
|
await self._rebuild_context_window(in_context_messages, letta_message_db_queue, agent_state)
|
|
133
|
-
|
|
134
138
|
yield "data: [DONE]\n\n"
|
|
135
139
|
|
|
136
140
|
async def _handle_ai_response(
|
|
137
141
|
self,
|
|
138
|
-
streaming_interface: OpenAIChatCompletionsStreamingInterface,
|
|
142
|
+
streaming_interface: "OpenAIChatCompletionsStreamingInterface",
|
|
139
143
|
agent_state: AgentState,
|
|
140
144
|
in_memory_message_history: List[Dict[str, Any]],
|
|
141
145
|
letta_message_db_queue: List[Any],
|
|
142
146
|
) -> bool:
|
|
143
147
|
"""
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
Returns:
|
|
148
|
-
bool: True if execution should continue, False if the step loop should terminate.
|
|
148
|
+
Now that streaming is done, handle the final AI response.
|
|
149
|
+
This might yield additional SSE tokens if we do stalling.
|
|
150
|
+
At the end, set self._continue_execution accordingly.
|
|
149
151
|
"""
|
|
150
|
-
#
|
|
152
|
+
# 1. If we have any leftover content from partial stream, store it as an assistant message
|
|
151
153
|
if streaming_interface.content_buffer:
|
|
152
154
|
content = "".join(streaming_interface.content_buffer)
|
|
153
155
|
in_memory_message_history.append({"role": "assistant", "content": content})
|
|
@@ -160,82 +162,92 @@ class LowLatencyAgent(BaseAgent):
|
|
|
160
162
|
)
|
|
161
163
|
letta_message_db_queue.extend(assistant_msgs)
|
|
162
164
|
|
|
163
|
-
#
|
|
165
|
+
# 2. If a tool call was requested, handle it
|
|
164
166
|
if streaming_interface.tool_call_happened:
|
|
167
|
+
tool_call_name = streaming_interface.tool_call_name
|
|
168
|
+
tool_call_args_str = streaming_interface.tool_call_args_str or "{}"
|
|
165
169
|
try:
|
|
166
|
-
tool_args = json.loads(
|
|
170
|
+
tool_args = json.loads(tool_call_args_str)
|
|
167
171
|
except json.JSONDecodeError:
|
|
168
172
|
tool_args = {}
|
|
169
173
|
|
|
170
174
|
tool_call_id = streaming_interface.tool_call_id or f"call_{uuid.uuid4().hex[:8]}"
|
|
171
|
-
|
|
172
175
|
assistant_tool_call_msg = AssistantMessage(
|
|
173
176
|
content=None,
|
|
174
177
|
tool_calls=[
|
|
175
178
|
ToolCall(
|
|
176
179
|
id=tool_call_id,
|
|
177
180
|
function=ToolCallFunction(
|
|
178
|
-
name=
|
|
179
|
-
arguments=
|
|
181
|
+
name=tool_call_name,
|
|
182
|
+
arguments=tool_call_args_str,
|
|
180
183
|
),
|
|
181
184
|
)
|
|
182
185
|
],
|
|
183
186
|
)
|
|
184
187
|
in_memory_message_history.append(assistant_tool_call_msg.model_dump())
|
|
185
188
|
|
|
186
|
-
tool_result,
|
|
187
|
-
tool_name=
|
|
189
|
+
tool_result, success_flag = await self._execute_tool(
|
|
190
|
+
tool_name=tool_call_name,
|
|
188
191
|
tool_args=tool_args,
|
|
189
192
|
agent_state=agent_state,
|
|
190
193
|
)
|
|
191
194
|
|
|
192
|
-
|
|
195
|
+
# 3. Provide function_call response back into the conversation
|
|
196
|
+
tool_message = ToolMessage(
|
|
197
|
+
content=json.dumps({"result": tool_result}),
|
|
198
|
+
tool_call_id=tool_call_id,
|
|
199
|
+
)
|
|
193
200
|
in_memory_message_history.append(tool_message.model_dump())
|
|
194
201
|
|
|
202
|
+
# 4. Insert heartbeat message for follow-up
|
|
195
203
|
heartbeat_user_message = UserMessage(
|
|
196
204
|
content=f"{NON_USER_MSG_PREFIX} Tool finished executing. Summarize the result for the user."
|
|
197
205
|
)
|
|
198
206
|
in_memory_message_history.append(heartbeat_user_message.model_dump())
|
|
199
207
|
|
|
208
|
+
# 5. Also store in DB
|
|
200
209
|
tool_call_messages = create_tool_call_messages_from_openai_response(
|
|
201
210
|
agent_id=agent_state.id,
|
|
202
211
|
model=agent_state.llm_config.model,
|
|
203
|
-
function_name=
|
|
212
|
+
function_name=tool_call_name,
|
|
204
213
|
function_arguments=tool_args,
|
|
205
214
|
tool_call_id=tool_call_id,
|
|
206
|
-
function_call_success=
|
|
215
|
+
function_call_success=success_flag,
|
|
207
216
|
function_response=tool_result,
|
|
208
217
|
actor=self.actor,
|
|
209
218
|
add_heartbeat_request_system_message=True,
|
|
210
219
|
)
|
|
211
220
|
letta_message_db_queue.extend(tool_call_messages)
|
|
212
221
|
|
|
213
|
-
#
|
|
222
|
+
# Because we have new data, we want to continue the while-loop in `step_stream`
|
|
214
223
|
return True
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
224
|
+
else:
|
|
225
|
+
# If we got here, there's no tool call. If finish_reason_stop => done
|
|
226
|
+
return not streaming_interface.finish_reason_stop
|
|
218
227
|
|
|
219
228
|
async def _rebuild_context_window(
|
|
220
229
|
self, in_context_messages: List[Message], letta_message_db_queue: List[Message], agent_state: AgentState
|
|
221
230
|
) -> None:
|
|
222
231
|
new_letta_messages = self.message_manager.create_many_messages(letta_message_db_queue, actor=self.actor)
|
|
232
|
+
new_in_context_messages = in_context_messages + new_letta_messages
|
|
223
233
|
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
new_in_context_messages, summary_str, updated = await self.summarizer.summarize(
|
|
228
|
-
in_context_messages=in_context_messages, new_letta_messages=new_letta_messages, previous_summary=previous_summary
|
|
229
|
-
)
|
|
230
|
-
|
|
231
|
-
if updated:
|
|
232
|
-
self.block_manager.update_block(block_id=target_block.id, block_update=BlockUpdate(value=summary_str), actor=self.actor)
|
|
234
|
+
if len(new_in_context_messages) > self.message_buffer_limit:
|
|
235
|
+
cutoff = len(new_in_context_messages) - self.message_buffer_limit
|
|
236
|
+
new_in_context_messages = [new_in_context_messages[0]] + new_in_context_messages[cutoff:]
|
|
233
237
|
|
|
234
238
|
self.agent_manager.set_in_context_messages(
|
|
235
239
|
agent_id=self.agent_id, message_ids=[m.id for m in new_in_context_messages], actor=self.actor
|
|
236
240
|
)
|
|
237
241
|
|
|
238
242
|
def _rebuild_memory(self, in_context_messages: List[Message], agent_state: AgentState) -> List[Message]:
|
|
243
|
+
# Refresh memory
|
|
244
|
+
# TODO: This only happens for the summary block
|
|
245
|
+
# TODO: We want to extend this refresh to be general, and stick it in agent_manager
|
|
246
|
+
for i, b in enumerate(agent_state.memory.blocks):
|
|
247
|
+
if b.label == self.summary_block_label:
|
|
248
|
+
agent_state.memory.blocks[i] = self.block_manager.get_block_by_id(block_id=b.id, actor=self.actor)
|
|
249
|
+
break
|
|
250
|
+
|
|
239
251
|
# TODO: This is a pretty brittle pattern established all over our code, need to get rid of this
|
|
240
252
|
curr_system_message = in_context_messages[0]
|
|
241
253
|
curr_memory_str = agent_state.memory.compile()
|
|
@@ -249,8 +261,8 @@ class LowLatencyAgent(BaseAgent):
|
|
|
249
261
|
|
|
250
262
|
memory_edit_timestamp = get_utc_time()
|
|
251
263
|
|
|
252
|
-
num_messages = self.message_manager.size(actor=actor, agent_id=
|
|
253
|
-
num_archival_memories = self.passage_manager.size(actor=actor, agent_id=
|
|
264
|
+
num_messages = self.message_manager.size(actor=self.actor, agent_id=agent_state.id)
|
|
265
|
+
num_archival_memories = self.passage_manager.size(actor=self.actor, agent_id=agent_state.id)
|
|
254
266
|
|
|
255
267
|
new_system_message_str = compile_system_message(
|
|
256
268
|
system_prompt=agent_state.system,
|
|
@@ -296,8 +308,37 @@ class LowLatencyAgent(BaseAgent):
|
|
|
296
308
|
else:
|
|
297
309
|
tools = agent_state.tools
|
|
298
310
|
|
|
311
|
+
# Special tool state
|
|
312
|
+
recall_memory_utterance_description = (
|
|
313
|
+
"A lengthier message to be uttered while your memories of the current conversation are being re-contextualized."
|
|
314
|
+
"You should stall naturally and show the user you're thinking hard. The main thing is to not leave the user in silence."
|
|
315
|
+
"You MUST also include punctuation at the end of this message."
|
|
316
|
+
)
|
|
317
|
+
recall_memory_json = Tool(
|
|
318
|
+
type="function",
|
|
319
|
+
function=enable_strict_mode(
|
|
320
|
+
add_pre_execution_message(
|
|
321
|
+
{
|
|
322
|
+
"name": "recall_memory",
|
|
323
|
+
"description": "Retrieve relevant information from memory based on a given query. Use when you don't remember the answer to a question.",
|
|
324
|
+
"parameters": {
|
|
325
|
+
"type": "object",
|
|
326
|
+
"properties": {
|
|
327
|
+
"query": {
|
|
328
|
+
"type": "string",
|
|
329
|
+
"description": "A description of what the model is trying to recall from memory.",
|
|
330
|
+
}
|
|
331
|
+
},
|
|
332
|
+
"required": ["query"],
|
|
333
|
+
},
|
|
334
|
+
},
|
|
335
|
+
description=recall_memory_utterance_description,
|
|
336
|
+
)
|
|
337
|
+
),
|
|
338
|
+
)
|
|
339
|
+
|
|
299
340
|
# TODO: Customize whether or not to have heartbeats, pre_exec_message, etc.
|
|
300
|
-
return [
|
|
341
|
+
return [recall_memory_json] + [
|
|
301
342
|
Tool(type="function", function=enable_strict_mode(add_pre_execution_message(remove_request_heartbeat(t.json_schema))))
|
|
302
343
|
for t in tools
|
|
303
344
|
]
|
|
@@ -306,19 +347,32 @@ class LowLatencyAgent(BaseAgent):
|
|
|
306
347
|
"""
|
|
307
348
|
Executes a tool and returns (result, success_flag).
|
|
308
349
|
"""
|
|
309
|
-
|
|
310
|
-
if
|
|
311
|
-
|
|
350
|
+
# Special memory case
|
|
351
|
+
if tool_name == "recall_memory":
|
|
352
|
+
# TODO: Make this safe
|
|
353
|
+
await self._recall_memory(tool_args["query"], agent_state)
|
|
354
|
+
return f"Successfully recalled memory and populated {self.summary_block_label} block.", True
|
|
355
|
+
else:
|
|
356
|
+
target_tool = next((x for x in agent_state.tools if x.name == tool_name), None)
|
|
357
|
+
if not target_tool:
|
|
358
|
+
return f"Tool not found: {tool_name}", False
|
|
312
359
|
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
360
|
+
try:
|
|
361
|
+
tool_result, _ = execute_external_tool(
|
|
362
|
+
agent_state=agent_state,
|
|
363
|
+
function_name=tool_name,
|
|
364
|
+
function_args=tool_args,
|
|
365
|
+
target_letta_tool=target_tool,
|
|
366
|
+
actor=self.actor,
|
|
367
|
+
allow_agent_state_modifications=False,
|
|
368
|
+
)
|
|
369
|
+
return tool_result, True
|
|
370
|
+
except Exception as e:
|
|
371
|
+
return f"Failed to call tool. Error: {e}", False
|
|
372
|
+
|
|
373
|
+
async def _recall_memory(self, query, agent_state: AgentState) -> None:
|
|
374
|
+
results = await self.offline_memory_agent.step(UserMessage(content=query))
|
|
375
|
+
target_block = next(b for b in agent_state.memory.blocks if b.label == self.summary_block_label)
|
|
376
|
+
self.block_manager.update_block(
|
|
377
|
+
block_id=target_block.id, block_update=BlockUpdate(value=results[0].content[0].text), actor=self.actor
|
|
378
|
+
)
|
letta/client/client.py
CHANGED
|
@@ -2937,7 +2937,6 @@ class LocalClient(AbstractClient):
|
|
|
2937
2937
|
|
|
2938
2938
|
Args:
|
|
2939
2939
|
func (callable): The function to create a tool for.
|
|
2940
|
-
name: (str): Name of the tool (must be unique per-user.)
|
|
2941
2940
|
tags (Optional[List[str]], optional): Tags for the tool. Defaults to None.
|
|
2942
2941
|
description (str, optional): The description.
|
|
2943
2942
|
return_char_limit (int): The character limit for the tool's return value. Defaults to FUNCTION_RETURN_CHAR_LIMIT.
|
|
@@ -2950,6 +2949,7 @@ class LocalClient(AbstractClient):
|
|
|
2950
2949
|
# parse source code/schema
|
|
2951
2950
|
source_code = parse_source_code(func)
|
|
2952
2951
|
source_type = "python"
|
|
2952
|
+
name = func.__name__ # Initialize name using function's __name__
|
|
2953
2953
|
if not tags:
|
|
2954
2954
|
tags = []
|
|
2955
2955
|
|
letta/embeddings.py
CHANGED
|
@@ -235,7 +235,9 @@ def embedding_model(config: EmbeddingConfig, user_id: Optional[uuid.UUID] = None
|
|
|
235
235
|
|
|
236
236
|
if endpoint_type == "openai":
|
|
237
237
|
return OpenAIEmbeddings(
|
|
238
|
-
api_key=model_settings.openai_api_key,
|
|
238
|
+
api_key=model_settings.openai_api_key,
|
|
239
|
+
model=config.embedding_model,
|
|
240
|
+
base_url=model_settings.openai_api_base,
|
|
239
241
|
)
|
|
240
242
|
|
|
241
243
|
elif endpoint_type == "azure":
|
|
@@ -246,19 +248,6 @@ def embedding_model(config: EmbeddingConfig, user_id: Optional[uuid.UUID] = None
|
|
|
246
248
|
model_settings.azure_api_version is not None,
|
|
247
249
|
]
|
|
248
250
|
)
|
|
249
|
-
# from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
|
|
250
|
-
|
|
251
|
-
## https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#embeddings
|
|
252
|
-
# model = "text-embedding-ada-002"
|
|
253
|
-
# deployment = credentials.azure_embedding_deployment if credentials.azure_embedding_deployment is not None else model
|
|
254
|
-
# return AzureOpenAIEmbedding(
|
|
255
|
-
# model=model,
|
|
256
|
-
# deployment_name=deployment,
|
|
257
|
-
# api_key=credentials.azure_key,
|
|
258
|
-
# azure_endpoint=credentials.azure_endpoint,
|
|
259
|
-
# api_version=credentials.azure_version,
|
|
260
|
-
# )
|
|
261
|
-
|
|
262
251
|
return AzureOpenAIEmbedding(
|
|
263
252
|
api_endpoint=model_settings.azure_base_url,
|
|
264
253
|
api_key=model_settings.azure_api_key,
|
|
@@ -9,6 +9,8 @@ from letta.functions.helpers import (
|
|
|
9
9
|
)
|
|
10
10
|
from letta.schemas.enums import MessageRole
|
|
11
11
|
from letta.schemas.message import MessageCreate
|
|
12
|
+
from letta.server.rest_api.utils import get_letta_server
|
|
13
|
+
from letta.utils import log_telemetry
|
|
12
14
|
|
|
13
15
|
if TYPE_CHECKING:
|
|
14
16
|
from letta.agent import Agent
|
|
@@ -85,8 +87,51 @@ def send_message_to_agents_matching_tags(self: "Agent", message: str, match_all:
|
|
|
85
87
|
response corresponds to a single agent. Agents that do not respond will not have an entry
|
|
86
88
|
in the returned list.
|
|
87
89
|
"""
|
|
90
|
+
log_telemetry(
|
|
91
|
+
self.logger,
|
|
92
|
+
"_send_message_to_agents_matching_tags_async start",
|
|
93
|
+
message=message,
|
|
94
|
+
match_all=match_all,
|
|
95
|
+
match_some=match_some,
|
|
96
|
+
)
|
|
97
|
+
server = get_letta_server()
|
|
98
|
+
|
|
99
|
+
augmented_message = (
|
|
100
|
+
f"[Incoming message from agent with ID '{self.agent_state.id}' - to reply to this message, "
|
|
101
|
+
f"make sure to use the 'send_message' at the end, and the system will notify the sender of your response] "
|
|
102
|
+
f"{message}"
|
|
103
|
+
)
|
|
88
104
|
|
|
89
|
-
|
|
105
|
+
# Retrieve up to 100 matching agents
|
|
106
|
+
log_telemetry(
|
|
107
|
+
self.logger,
|
|
108
|
+
"_send_message_to_agents_matching_tags_async listing agents start",
|
|
109
|
+
message=message,
|
|
110
|
+
match_all=match_all,
|
|
111
|
+
match_some=match_some,
|
|
112
|
+
)
|
|
113
|
+
matching_agents = server.agent_manager.list_agents_matching_tags(actor=self.user, match_all=match_all, match_some=match_some)
|
|
114
|
+
|
|
115
|
+
log_telemetry(
|
|
116
|
+
self.logger,
|
|
117
|
+
"_send_message_to_agents_matching_tags_async listing agents finish",
|
|
118
|
+
message=message,
|
|
119
|
+
match_all=match_all,
|
|
120
|
+
match_some=match_some,
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
# Create a system message
|
|
124
|
+
messages = [MessageCreate(role=MessageRole.system, content=augmented_message, name=self.agent_state.name)]
|
|
125
|
+
|
|
126
|
+
result = asyncio.run(_send_message_to_agents_matching_tags_async(self, server, messages, matching_agents))
|
|
127
|
+
log_telemetry(
|
|
128
|
+
self.logger,
|
|
129
|
+
"_send_message_to_agents_matching_tags_async finish",
|
|
130
|
+
messages=message,
|
|
131
|
+
match_all=match_all,
|
|
132
|
+
match_some=match_some,
|
|
133
|
+
)
|
|
134
|
+
return result
|
|
90
135
|
|
|
91
136
|
|
|
92
137
|
def send_message_to_all_agents_in_group(self: "Agent", message: str) -> List[str]:
|