letta-nightly 0.6.43.dev20250319104146__py3-none-any.whl → 0.6.43.dev20250321104124__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (32) hide show
  1. letta/agent.py +2 -2
  2. letta/agents/ephemeral_memory_agent.py +114 -0
  3. letta/agents/{low_latency_agent.py → voice_agent.py} +133 -79
  4. letta/client/client.py +1 -1
  5. letta/embeddings.py +3 -14
  6. letta/functions/function_sets/multi_agent.py +46 -1
  7. letta/functions/helpers.py +10 -57
  8. letta/functions/mcp_client/base_client.py +7 -9
  9. letta/functions/mcp_client/exceptions.py +6 -0
  10. letta/helpers/tool_execution_helper.py +9 -7
  11. letta/llm_api/anthropic.py +1 -19
  12. letta/llm_api/aws_bedrock.py +2 -2
  13. letta/llm_api/azure_openai.py +22 -46
  14. letta/llm_api/llm_api_tools.py +15 -4
  15. letta/orm/sqlalchemy_base.py +106 -7
  16. letta/schemas/openai/chat_completion_request.py +20 -1
  17. letta/schemas/providers.py +251 -0
  18. letta/schemas/tool.py +4 -1
  19. letta/server/rest_api/app.py +1 -11
  20. letta/server/rest_api/optimistic_json_parser.py +5 -5
  21. letta/server/rest_api/routers/v1/tools.py +34 -2
  22. letta/server/rest_api/routers/v1/voice.py +5 -5
  23. letta/server/server.py +6 -0
  24. letta/services/agent_manager.py +1 -1
  25. letta/services/block_manager.py +8 -6
  26. letta/services/message_manager.py +65 -2
  27. letta/settings.py +3 -3
  28. {letta_nightly-0.6.43.dev20250319104146.dist-info → letta_nightly-0.6.43.dev20250321104124.dist-info}/METADATA +4 -4
  29. {letta_nightly-0.6.43.dev20250319104146.dist-info → letta_nightly-0.6.43.dev20250321104124.dist-info}/RECORD +32 -30
  30. {letta_nightly-0.6.43.dev20250319104146.dist-info → letta_nightly-0.6.43.dev20250321104124.dist-info}/LICENSE +0 -0
  31. {letta_nightly-0.6.43.dev20250319104146.dist-info → letta_nightly-0.6.43.dev20250321104124.dist-info}/WHEEL +0 -0
  32. {letta_nightly-0.6.43.dev20250319104146.dist-info → letta_nightly-0.6.43.dev20250321104124.dist-info}/entry_points.txt +0 -0
letta/agent.py CHANGED
@@ -522,7 +522,7 @@ class Agent(BaseAgent):
522
522
  openai_message_dict=response_message.model_dump(),
523
523
  )
524
524
  ) # extend conversation with assistant's reply
525
- self.logger.info(f"Function call message: {messages[-1]}")
525
+ self.logger.debug(f"Function call message: {messages[-1]}")
526
526
 
527
527
  nonnull_content = False
528
528
  if response_message.content:
@@ -537,7 +537,7 @@ class Agent(BaseAgent):
537
537
  response_message.function_call if response_message.function_call is not None else response_message.tool_calls[0].function
538
538
  )
539
539
  function_name = function_call.name
540
- self.logger.info(f"Request to call function {function_name} with tool_call_id: {tool_call_id}")
540
+ self.logger.debug(f"Request to call function {function_name} with tool_call_id: {tool_call_id}")
541
541
 
542
542
  # Failure case 1: function name is wrong (not in agent_state.tools)
543
543
  target_letta_tool = None
@@ -0,0 +1,114 @@
1
+ from typing import AsyncGenerator, Dict, List
2
+
3
+ import openai
4
+
5
+ from letta.agents.base_agent import BaseAgent
6
+ from letta.helpers.tool_execution_helper import enable_strict_mode
7
+ from letta.orm.enums import ToolType
8
+ from letta.schemas.agent import AgentState
9
+ from letta.schemas.enums import MessageRole
10
+ from letta.schemas.letta_message import UserMessage
11
+ from letta.schemas.letta_message_content import TextContent
12
+ from letta.schemas.message import Message
13
+ from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool
14
+ from letta.schemas.user import User
15
+ from letta.services.agent_manager import AgentManager
16
+ from letta.services.message_manager import MessageManager
17
+
18
+
19
+ class EphemeralMemoryAgent(BaseAgent):
20
+ """
21
+ A stateless agent that helps with offline memory computations.
22
+
23
+ """
24
+
25
+ def __init__(
26
+ self,
27
+ agent_id: str,
28
+ openai_client: openai.AsyncClient,
29
+ message_manager: MessageManager,
30
+ agent_manager: AgentManager,
31
+ actor: User,
32
+ ):
33
+ super().__init__(
34
+ agent_id=agent_id,
35
+ openai_client=openai_client,
36
+ message_manager=message_manager,
37
+ agent_manager=agent_manager,
38
+ actor=actor,
39
+ )
40
+
41
+ async def step(self, input_message: UserMessage) -> List[Message]:
42
+ """
43
+ Synchronous method that takes a user's input text and returns a summary from OpenAI.
44
+ Returns a list of ephemeral Message objects containing both the user text and the assistant summary.
45
+ """
46
+ agent_state = self.agent_manager.get_agent_by_id(agent_id=self.agent_id, actor=self.actor)
47
+
48
+ input_message = self.pre_process_input_message(input_message=input_message)
49
+ request = self._build_openai_request([input_message], agent_state)
50
+
51
+ chat_completion = await self.openai_client.chat.completions.create(**request.model_dump(exclude_unset=True))
52
+
53
+ return [
54
+ Message(
55
+ role=MessageRole.assistant,
56
+ content=[TextContent(text=chat_completion.choices[0].message.content.strip())],
57
+ )
58
+ ]
59
+
60
+ def pre_process_input_message(self, input_message: UserMessage) -> Dict:
61
+ input_prompt_augmented = f"""
62
+ You are a memory recall agent whose job is to comb through a large set of messages and write relevant memories in relation to a user query.
63
+ Your response will directly populate a "memory block" called "human" that describes the user, that will be used to answer more questions in the future.
64
+ You should err on the side of being more verbose, and also try to *predict* the trajectory of the conversation, and pull memories or messages you think will be relevant to where the conversation is going.
65
+
66
+ Your response should include:
67
+ - A high level summary of the relevant events/timeline of the conversation relevant to the query
68
+ - Direct citations of quotes from the messages you used while creating the summary
69
+
70
+ Here is a history of the messages so far:
71
+
72
+ {self._format_messages_llm_friendly()}
73
+
74
+ This is the query:
75
+
76
+ "{input_message.content}"
77
+
78
+ Your response:
79
+ """
80
+
81
+ input_message.content = input_prompt_augmented
82
+ # print(input_prompt_augmented)
83
+ return input_message.model_dump()
84
+
85
+ def _format_messages_llm_friendly(self):
86
+ messages = self.message_manager.list_messages_for_agent(agent_id=self.agent_id, actor=self.actor)
87
+
88
+ llm_friendly_messages = [f"{m.role}: {m.content[0].text}" for m in messages if m.content and isinstance(m.content[0], TextContent)]
89
+ return "\n".join(llm_friendly_messages)
90
+
91
+ def _build_openai_request(self, openai_messages: List[Dict], agent_state: AgentState) -> ChatCompletionRequest:
92
+ openai_request = ChatCompletionRequest(
93
+ model=agent_state.llm_config.model,
94
+ messages=openai_messages,
95
+ # tools=self._build_tool_schemas(agent_state),
96
+ # tool_choice="auto",
97
+ user=self.actor.id,
98
+ max_completion_tokens=agent_state.llm_config.max_tokens,
99
+ temperature=agent_state.llm_config.temperature,
100
+ stream=False,
101
+ )
102
+ return openai_request
103
+
104
+ def _build_tool_schemas(self, agent_state: AgentState) -> List[Tool]:
105
+ # Only include memory tools
106
+ tools = [t for t in agent_state.tools if t.tool_type in {ToolType.LETTA_CORE, ToolType.LETTA_MEMORY_CORE}]
107
+
108
+ return [Tool(type="function", function=enable_strict_mode(t.json_schema)) for t in tools]
109
+
110
+ async def step_stream(self, input_message: UserMessage) -> AsyncGenerator[str, None]:
111
+ """
112
+ This agent is synchronous-only. If called in an async context, raise an error.
113
+ """
114
+ raise NotImplementedError("EphemeralMemoryAgent does not support async step.")
@@ -5,7 +5,7 @@ from typing import Any, AsyncGenerator, Dict, List, Tuple
5
5
  import openai
6
6
 
7
7
  from letta.agents.base_agent import BaseAgent
8
- from letta.agents.ephemeral_agent import EphemeralAgent
8
+ from letta.agents.ephemeral_memory_agent import EphemeralMemoryAgent
9
9
  from letta.constants import NON_USER_MSG_PREFIX
10
10
  from letta.helpers.datetime_helpers import get_utc_time
11
11
  from letta.helpers.tool_execution_helper import (
@@ -42,13 +42,12 @@ from letta.services.helpers.agent_manager_helper import compile_system_message
42
42
  from letta.services.message_manager import MessageManager
43
43
  from letta.services.passage_manager import PassageManager
44
44
  from letta.services.summarizer.enums import SummarizationMode
45
- from letta.services.summarizer.summarizer import Summarizer
46
45
  from letta.utils import united_diff
47
46
 
48
47
  logger = get_logger(__name__)
49
48
 
50
49
 
51
- class LowLatencyAgent(BaseAgent):
50
+ class VoiceAgent(BaseAgent):
52
51
  """
53
52
  A function-calling loop for streaming OpenAI responses with tool execution.
54
53
  This agent:
@@ -65,9 +64,9 @@ class LowLatencyAgent(BaseAgent):
65
64
  agent_manager: AgentManager,
66
65
  block_manager: BlockManager,
67
66
  actor: User,
67
+ message_buffer_limit: int,
68
+ message_buffer_min: int,
68
69
  summarization_mode: SummarizationMode = SummarizationMode.STATIC_MESSAGE_BUFFER,
69
- message_buffer_limit: int = 10,
70
- message_buffer_min: int = 4,
71
70
  ):
72
71
  super().__init__(
73
72
  agent_id=agent_id, openai_client=openai_client, message_manager=message_manager, agent_manager=agent_manager, actor=actor
@@ -79,75 +78,78 @@ class LowLatencyAgent(BaseAgent):
79
78
  self.passage_manager = PassageManager() # TODO: pass this in
80
79
  # TODO: This is not guaranteed to exist!
81
80
  self.summary_block_label = "human"
82
- self.summarizer = Summarizer(
83
- mode=summarization_mode,
84
- summarizer_agent=EphemeralAgent(
85
- agent_id=agent_id, openai_client=openai_client, message_manager=message_manager, agent_manager=agent_manager, actor=actor
86
- ),
87
- message_buffer_limit=message_buffer_limit,
88
- message_buffer_min=message_buffer_min,
89
- )
81
+ # self.summarizer = Summarizer(
82
+ # mode=summarization_mode,
83
+ # summarizer_agent=EphemeralAgent(
84
+ # agent_id=agent_id, openai_client=openai_client, message_manager=message_manager, agent_manager=agent_manager, actor=actor
85
+ # ),
86
+ # message_buffer_limit=message_buffer_limit,
87
+ # message_buffer_min=message_buffer_min,
88
+ # )
90
89
  self.message_buffer_limit = message_buffer_limit
91
- self.message_buffer_min = message_buffer_min
90
+ # self.message_buffer_min = message_buffer_min
91
+ self.offline_memory_agent = EphemeralMemoryAgent(
92
+ agent_id=agent_id, openai_client=openai_client, message_manager=message_manager, agent_manager=agent_manager, actor=actor
93
+ )
92
94
 
93
95
  async def step(self, input_message: UserMessage) -> List[Message]:
94
96
  raise NotImplementedError("LowLatencyAgent does not have a synchronous step implemented currently.")
95
97
 
96
98
  async def step_stream(self, input_message: UserMessage) -> AsyncGenerator[str, None]:
97
99
  """
98
- Async generator that yields partial tokens as SSE events, handles tool calls,
99
- and streams error messages if OpenAI API failures occur.
100
+ Main streaming loop that yields partial tokens.
101
+ Whenever we detect a tool call, we yield from _handle_ai_response as well.
100
102
  """
101
- input_message = self.pre_process_input_message(input_message=input_message)
102
- agent_state = self.agent_manager.get_agent_by_id(agent_id=self.agent_id, actor=self.actor)
103
+ input_message = self.pre_process_input_message(input_message)
104
+ agent_state = self.agent_manager.get_agent_by_id(self.agent_id, actor=self.actor)
103
105
  in_context_messages = self.message_manager.get_messages_by_ids(message_ids=agent_state.message_ids, actor=self.actor)
104
106
  letta_message_db_queue = [create_user_message(input_message=input_message, agent_id=agent_state.id, actor=self.actor)]
105
107
  in_memory_message_history = [input_message]
106
108
 
109
+ # TODO: Define max steps here
107
110
  while True:
108
- # Constantly pull down and integrate memory blocks
109
- in_context_messages = self._rebuild_memory(in_context_messages=in_context_messages, agent_state=agent_state)
110
-
111
- # Convert Letta messages to OpenAI messages
111
+ # Rebuild memory each loop
112
+ in_context_messages = self._rebuild_memory(in_context_messages, agent_state)
112
113
  openai_messages = convert_letta_messages_to_openai(in_context_messages)
113
114
  openai_messages.extend(in_memory_message_history)
115
+
114
116
  request = self._build_openai_request(openai_messages, agent_state)
115
117
 
116
- # Execute the request
117
118
  stream = await self.openai_client.chat.completions.create(**request.model_dump(exclude_unset=True))
118
119
  streaming_interface = OpenAIChatCompletionsStreamingInterface(stream_pre_execution_message=True)
119
120
 
120
- async for sse in streaming_interface.process(stream):
121
- yield sse
121
+ # 1) Yield partial tokens from OpenAI
122
+ async for sse_chunk in streaming_interface.process(stream):
123
+ yield sse_chunk
122
124
 
123
- # Process the AI response (buffered messages, tool execution, etc.)
124
- continue_execution = await self._handle_ai_response(
125
- streaming_interface, agent_state, in_memory_message_history, letta_message_db_queue
125
+ # 2) Now handle the final AI response. This might yield more text (stalling, etc.)
126
+ should_continue = await self._handle_ai_response(
127
+ streaming_interface,
128
+ agent_state,
129
+ in_memory_message_history,
130
+ letta_message_db_queue,
126
131
  )
127
132
 
128
- if not continue_execution:
133
+ if not should_continue:
129
134
  break
130
135
 
131
- # Rebuild context window
136
+ # Rebuild context window if desired
132
137
  await self._rebuild_context_window(in_context_messages, letta_message_db_queue, agent_state)
133
-
134
138
  yield "data: [DONE]\n\n"
135
139
 
136
140
  async def _handle_ai_response(
137
141
  self,
138
- streaming_interface: OpenAIChatCompletionsStreamingInterface,
142
+ streaming_interface: "OpenAIChatCompletionsStreamingInterface",
139
143
  agent_state: AgentState,
140
144
  in_memory_message_history: List[Dict[str, Any]],
141
145
  letta_message_db_queue: List[Any],
142
146
  ) -> bool:
143
147
  """
144
- Handles AI response processing, including buffering messages, detecting tool calls,
145
- executing tools, and deciding whether to continue execution.
146
-
147
- Returns:
148
- bool: True if execution should continue, False if the step loop should terminate.
148
+ Now that streaming is done, handle the final AI response.
149
+ This might yield additional SSE tokens if we do stalling.
150
+ At the end, set self._continue_execution accordingly.
149
151
  """
150
- # Handle assistant message buffering
152
+ # 1. If we have any leftover content from partial stream, store it as an assistant message
151
153
  if streaming_interface.content_buffer:
152
154
  content = "".join(streaming_interface.content_buffer)
153
155
  in_memory_message_history.append({"role": "assistant", "content": content})
@@ -160,82 +162,92 @@ class LowLatencyAgent(BaseAgent):
160
162
  )
161
163
  letta_message_db_queue.extend(assistant_msgs)
162
164
 
163
- # Handle tool execution if a tool call occurred
165
+ # 2. If a tool call was requested, handle it
164
166
  if streaming_interface.tool_call_happened:
167
+ tool_call_name = streaming_interface.tool_call_name
168
+ tool_call_args_str = streaming_interface.tool_call_args_str or "{}"
165
169
  try:
166
- tool_args = json.loads(streaming_interface.tool_call_args_str)
170
+ tool_args = json.loads(tool_call_args_str)
167
171
  except json.JSONDecodeError:
168
172
  tool_args = {}
169
173
 
170
174
  tool_call_id = streaming_interface.tool_call_id or f"call_{uuid.uuid4().hex[:8]}"
171
-
172
175
  assistant_tool_call_msg = AssistantMessage(
173
176
  content=None,
174
177
  tool_calls=[
175
178
  ToolCall(
176
179
  id=tool_call_id,
177
180
  function=ToolCallFunction(
178
- name=streaming_interface.tool_call_name,
179
- arguments=streaming_interface.tool_call_args_str,
181
+ name=tool_call_name,
182
+ arguments=tool_call_args_str,
180
183
  ),
181
184
  )
182
185
  ],
183
186
  )
184
187
  in_memory_message_history.append(assistant_tool_call_msg.model_dump())
185
188
 
186
- tool_result, function_call_success = await self._execute_tool(
187
- tool_name=streaming_interface.tool_call_name,
189
+ tool_result, success_flag = await self._execute_tool(
190
+ tool_name=tool_call_name,
188
191
  tool_args=tool_args,
189
192
  agent_state=agent_state,
190
193
  )
191
194
 
192
- tool_message = ToolMessage(content=json.dumps({"result": tool_result}), tool_call_id=tool_call_id)
195
+ # 3. Provide function_call response back into the conversation
196
+ tool_message = ToolMessage(
197
+ content=json.dumps({"result": tool_result}),
198
+ tool_call_id=tool_call_id,
199
+ )
193
200
  in_memory_message_history.append(tool_message.model_dump())
194
201
 
202
+ # 4. Insert heartbeat message for follow-up
195
203
  heartbeat_user_message = UserMessage(
196
204
  content=f"{NON_USER_MSG_PREFIX} Tool finished executing. Summarize the result for the user."
197
205
  )
198
206
  in_memory_message_history.append(heartbeat_user_message.model_dump())
199
207
 
208
+ # 5. Also store in DB
200
209
  tool_call_messages = create_tool_call_messages_from_openai_response(
201
210
  agent_id=agent_state.id,
202
211
  model=agent_state.llm_config.model,
203
- function_name=streaming_interface.tool_call_name,
212
+ function_name=tool_call_name,
204
213
  function_arguments=tool_args,
205
214
  tool_call_id=tool_call_id,
206
- function_call_success=function_call_success,
215
+ function_call_success=success_flag,
207
216
  function_response=tool_result,
208
217
  actor=self.actor,
209
218
  add_heartbeat_request_system_message=True,
210
219
  )
211
220
  letta_message_db_queue.extend(tool_call_messages)
212
221
 
213
- # Continue execution by restarting the loop with updated context
222
+ # Because we have new data, we want to continue the while-loop in `step_stream`
214
223
  return True
215
-
216
- # Exit the loop if finish_reason_stop or no tool call occurred
217
- return not streaming_interface.finish_reason_stop
224
+ else:
225
+ # If we got here, there's no tool call. If finish_reason_stop => done
226
+ return not streaming_interface.finish_reason_stop
218
227
 
219
228
  async def _rebuild_context_window(
220
229
  self, in_context_messages: List[Message], letta_message_db_queue: List[Message], agent_state: AgentState
221
230
  ) -> None:
222
231
  new_letta_messages = self.message_manager.create_many_messages(letta_message_db_queue, actor=self.actor)
232
+ new_in_context_messages = in_context_messages + new_letta_messages
223
233
 
224
- # TODO: Make this more general and configurable, less brittle
225
- target_block = next(b for b in agent_state.memory.blocks if b.label == self.summary_block_label)
226
- previous_summary = self.block_manager.get_block_by_id(block_id=target_block.id, actor=self.actor).value
227
- new_in_context_messages, summary_str, updated = await self.summarizer.summarize(
228
- in_context_messages=in_context_messages, new_letta_messages=new_letta_messages, previous_summary=previous_summary
229
- )
230
-
231
- if updated:
232
- self.block_manager.update_block(block_id=target_block.id, block_update=BlockUpdate(value=summary_str), actor=self.actor)
234
+ if len(new_in_context_messages) > self.message_buffer_limit:
235
+ cutoff = len(new_in_context_messages) - self.message_buffer_limit
236
+ new_in_context_messages = [new_in_context_messages[0]] + new_in_context_messages[cutoff:]
233
237
 
234
238
  self.agent_manager.set_in_context_messages(
235
239
  agent_id=self.agent_id, message_ids=[m.id for m in new_in_context_messages], actor=self.actor
236
240
  )
237
241
 
238
242
  def _rebuild_memory(self, in_context_messages: List[Message], agent_state: AgentState) -> List[Message]:
243
+ # Refresh memory
244
+ # TODO: This only happens for the summary block
245
+ # TODO: We want to extend this refresh to be general, and stick it in agent_manager
246
+ for i, b in enumerate(agent_state.memory.blocks):
247
+ if b.label == self.summary_block_label:
248
+ agent_state.memory.blocks[i] = self.block_manager.get_block_by_id(block_id=b.id, actor=self.actor)
249
+ break
250
+
239
251
  # TODO: This is a pretty brittle pattern established all over our code, need to get rid of this
240
252
  curr_system_message = in_context_messages[0]
241
253
  curr_memory_str = agent_state.memory.compile()
@@ -249,8 +261,8 @@ class LowLatencyAgent(BaseAgent):
249
261
 
250
262
  memory_edit_timestamp = get_utc_time()
251
263
 
252
- num_messages = self.message_manager.size(actor=actor, agent_id=agent_id)
253
- num_archival_memories = self.passage_manager.size(actor=actor, agent_id=agent_id)
264
+ num_messages = self.message_manager.size(actor=self.actor, agent_id=agent_state.id)
265
+ num_archival_memories = self.passage_manager.size(actor=self.actor, agent_id=agent_state.id)
254
266
 
255
267
  new_system_message_str = compile_system_message(
256
268
  system_prompt=agent_state.system,
@@ -296,8 +308,37 @@ class LowLatencyAgent(BaseAgent):
296
308
  else:
297
309
  tools = agent_state.tools
298
310
 
311
+ # Special tool state
312
+ recall_memory_utterance_description = (
313
+ "A lengthier message to be uttered while your memories of the current conversation are being re-contextualized."
314
+ "You should stall naturally and show the user you're thinking hard. The main thing is to not leave the user in silence."
315
+ "You MUST also include punctuation at the end of this message."
316
+ )
317
+ recall_memory_json = Tool(
318
+ type="function",
319
+ function=enable_strict_mode(
320
+ add_pre_execution_message(
321
+ {
322
+ "name": "recall_memory",
323
+ "description": "Retrieve relevant information from memory based on a given query. Use when you don't remember the answer to a question.",
324
+ "parameters": {
325
+ "type": "object",
326
+ "properties": {
327
+ "query": {
328
+ "type": "string",
329
+ "description": "A description of what the model is trying to recall from memory.",
330
+ }
331
+ },
332
+ "required": ["query"],
333
+ },
334
+ },
335
+ description=recall_memory_utterance_description,
336
+ )
337
+ ),
338
+ )
339
+
299
340
  # TODO: Customize whether or not to have heartbeats, pre_exec_message, etc.
300
- return [
341
+ return [recall_memory_json] + [
301
342
  Tool(type="function", function=enable_strict_mode(add_pre_execution_message(remove_request_heartbeat(t.json_schema))))
302
343
  for t in tools
303
344
  ]
@@ -306,19 +347,32 @@ class LowLatencyAgent(BaseAgent):
306
347
  """
307
348
  Executes a tool and returns (result, success_flag).
308
349
  """
309
- target_tool = next((x for x in agent_state.tools if x.name == tool_name), None)
310
- if not target_tool:
311
- return f"Tool not found: {tool_name}", False
350
+ # Special memory case
351
+ if tool_name == "recall_memory":
352
+ # TODO: Make this safe
353
+ await self._recall_memory(tool_args["query"], agent_state)
354
+ return f"Successfully recalled memory and populated {self.summary_block_label} block.", True
355
+ else:
356
+ target_tool = next((x for x in agent_state.tools if x.name == tool_name), None)
357
+ if not target_tool:
358
+ return f"Tool not found: {tool_name}", False
312
359
 
313
- try:
314
- tool_result, _ = execute_external_tool(
315
- agent_state=agent_state,
316
- function_name=tool_name,
317
- function_args=tool_args,
318
- target_letta_tool=target_tool,
319
- actor=self.actor,
320
- allow_agent_state_modifications=False,
321
- )
322
- return tool_result, True
323
- except Exception as e:
324
- return f"Failed to call tool. Error: {e}", False
360
+ try:
361
+ tool_result, _ = execute_external_tool(
362
+ agent_state=agent_state,
363
+ function_name=tool_name,
364
+ function_args=tool_args,
365
+ target_letta_tool=target_tool,
366
+ actor=self.actor,
367
+ allow_agent_state_modifications=False,
368
+ )
369
+ return tool_result, True
370
+ except Exception as e:
371
+ return f"Failed to call tool. Error: {e}", False
372
+
373
+ async def _recall_memory(self, query, agent_state: AgentState) -> None:
374
+ results = await self.offline_memory_agent.step(UserMessage(content=query))
375
+ target_block = next(b for b in agent_state.memory.blocks if b.label == self.summary_block_label)
376
+ self.block_manager.update_block(
377
+ block_id=target_block.id, block_update=BlockUpdate(value=results[0].content[0].text), actor=self.actor
378
+ )
letta/client/client.py CHANGED
@@ -2937,7 +2937,6 @@ class LocalClient(AbstractClient):
2937
2937
 
2938
2938
  Args:
2939
2939
  func (callable): The function to create a tool for.
2940
- name: (str): Name of the tool (must be unique per-user.)
2941
2940
  tags (Optional[List[str]], optional): Tags for the tool. Defaults to None.
2942
2941
  description (str, optional): The description.
2943
2942
  return_char_limit (int): The character limit for the tool's return value. Defaults to FUNCTION_RETURN_CHAR_LIMIT.
@@ -2950,6 +2949,7 @@ class LocalClient(AbstractClient):
2950
2949
  # parse source code/schema
2951
2950
  source_code = parse_source_code(func)
2952
2951
  source_type = "python"
2952
+ name = func.__name__ # Initialize name using function's __name__
2953
2953
  if not tags:
2954
2954
  tags = []
2955
2955
 
letta/embeddings.py CHANGED
@@ -235,7 +235,9 @@ def embedding_model(config: EmbeddingConfig, user_id: Optional[uuid.UUID] = None
235
235
 
236
236
  if endpoint_type == "openai":
237
237
  return OpenAIEmbeddings(
238
- api_key=model_settings.openai_api_key, model=config.embedding_model, base_url=model_settings.openai_api_base
238
+ api_key=model_settings.openai_api_key,
239
+ model=config.embedding_model,
240
+ base_url=model_settings.openai_api_base,
239
241
  )
240
242
 
241
243
  elif endpoint_type == "azure":
@@ -246,19 +248,6 @@ def embedding_model(config: EmbeddingConfig, user_id: Optional[uuid.UUID] = None
246
248
  model_settings.azure_api_version is not None,
247
249
  ]
248
250
  )
249
- # from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
250
-
251
- ## https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#embeddings
252
- # model = "text-embedding-ada-002"
253
- # deployment = credentials.azure_embedding_deployment if credentials.azure_embedding_deployment is not None else model
254
- # return AzureOpenAIEmbedding(
255
- # model=model,
256
- # deployment_name=deployment,
257
- # api_key=credentials.azure_key,
258
- # azure_endpoint=credentials.azure_endpoint,
259
- # api_version=credentials.azure_version,
260
- # )
261
-
262
251
  return AzureOpenAIEmbedding(
263
252
  api_endpoint=model_settings.azure_base_url,
264
253
  api_key=model_settings.azure_api_key,
@@ -9,6 +9,8 @@ from letta.functions.helpers import (
9
9
  )
10
10
  from letta.schemas.enums import MessageRole
11
11
  from letta.schemas.message import MessageCreate
12
+ from letta.server.rest_api.utils import get_letta_server
13
+ from letta.utils import log_telemetry
12
14
 
13
15
  if TYPE_CHECKING:
14
16
  from letta.agent import Agent
@@ -85,8 +87,51 @@ def send_message_to_agents_matching_tags(self: "Agent", message: str, match_all:
85
87
  response corresponds to a single agent. Agents that do not respond will not have an entry
86
88
  in the returned list.
87
89
  """
90
+ log_telemetry(
91
+ self.logger,
92
+ "_send_message_to_agents_matching_tags_async start",
93
+ message=message,
94
+ match_all=match_all,
95
+ match_some=match_some,
96
+ )
97
+ server = get_letta_server()
98
+
99
+ augmented_message = (
100
+ f"[Incoming message from agent with ID '{self.agent_state.id}' - to reply to this message, "
101
+ f"make sure to use the 'send_message' at the end, and the system will notify the sender of your response] "
102
+ f"{message}"
103
+ )
88
104
 
89
- return asyncio.run(_send_message_to_agents_matching_tags_async(self, message, match_all, match_some))
105
+ # Retrieve up to 100 matching agents
106
+ log_telemetry(
107
+ self.logger,
108
+ "_send_message_to_agents_matching_tags_async listing agents start",
109
+ message=message,
110
+ match_all=match_all,
111
+ match_some=match_some,
112
+ )
113
+ matching_agents = server.agent_manager.list_agents_matching_tags(actor=self.user, match_all=match_all, match_some=match_some)
114
+
115
+ log_telemetry(
116
+ self.logger,
117
+ "_send_message_to_agents_matching_tags_async listing agents finish",
118
+ message=message,
119
+ match_all=match_all,
120
+ match_some=match_some,
121
+ )
122
+
123
+ # Create a system message
124
+ messages = [MessageCreate(role=MessageRole.system, content=augmented_message, name=self.agent_state.name)]
125
+
126
+ result = asyncio.run(_send_message_to_agents_matching_tags_async(self, server, messages, matching_agents))
127
+ log_telemetry(
128
+ self.logger,
129
+ "_send_message_to_agents_matching_tags_async finish",
130
+ messages=message,
131
+ match_all=match_all,
132
+ match_some=match_some,
133
+ )
134
+ return result
90
135
 
91
136
 
92
137
  def send_message_to_all_agents_in_group(self: "Agent", message: str) -> List[str]: