letta-nightly 0.7.5.dev20250428110034__py3-none-any.whl → 0.7.6.dev20250429062643__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. letta/__init__.py +1 -1
  2. letta/agents/base_agent.py +1 -1
  3. letta/agents/ephemeral_memory_agent.py +353 -43
  4. letta/agents/voice_agent.py +196 -62
  5. letta/constants.py +2 -0
  6. letta/helpers/datetime_helpers.py +7 -0
  7. letta/interfaces/openai_chat_completions_streaming_interface.py +16 -12
  8. letta/llm_api/google_ai_client.py +4 -0
  9. letta/llm_api/llm_api_tools.py +5 -2
  10. letta/llm_api/openai.py +2 -1
  11. letta/llm_api/openai_client.py +3 -2
  12. letta/schemas/llm_config.py +5 -1
  13. letta/schemas/openai/chat_completion_request.py +1 -0
  14. letta/schemas/providers.py +4 -3
  15. letta/schemas/sandbox_config.py +4 -4
  16. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +4 -10
  17. letta/server/rest_api/routers/v1/voice.py +8 -18
  18. letta/server/rest_api/utils.py +26 -20
  19. letta/server/server.py +67 -26
  20. letta/services/helpers/agent_manager_helper.py +2 -2
  21. letta/services/helpers/tool_execution_helper.py +30 -3
  22. letta/services/summarizer/summarizer.py +121 -54
  23. letta/services/tool_executor/tool_execution_sandbox.py +13 -9
  24. letta/services/tool_sandbox/local_sandbox.py +4 -4
  25. letta/services/user_manager.py +5 -2
  26. letta/settings.py +4 -2
  27. letta/system.py +0 -1
  28. letta/tracing.py +1 -0
  29. {letta_nightly-0.7.5.dev20250428110034.dist-info → letta_nightly-0.7.6.dev20250429062643.dist-info}/METADATA +1 -1
  30. {letta_nightly-0.7.5.dev20250428110034.dist-info → letta_nightly-0.7.6.dev20250429062643.dist-info}/RECORD +33 -33
  31. {letta_nightly-0.7.5.dev20250428110034.dist-info → letta_nightly-0.7.6.dev20250429062643.dist-info}/LICENSE +0 -0
  32. {letta_nightly-0.7.5.dev20250428110034.dist-info → letta_nightly-0.7.6.dev20250429062643.dist-info}/WHEEL +0 -0
  33. {letta_nightly-0.7.5.dev20250428110034.dist-info → letta_nightly-0.7.6.dev20250429062643.dist-info}/entry_points.txt +0 -0
letta/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "0.7.5"
1
+ __version__ = "0.7.6"
2
2
 
3
3
  # import clients
4
4
  from letta.client.client import LocalClient, RESTClient, create_client
@@ -63,4 +63,4 @@ class BaseAgent(ABC):
63
63
  else:
64
64
  return ""
65
65
 
66
- return [{"role": input_message.role, "content": get_content(input_message)} for input_message in input_messages]
66
+ return [{"role": input_message.role.value, "content": get_content(input_message)} for input_message in input_messages]
@@ -1,24 +1,29 @@
1
- from typing import AsyncGenerator, Dict, List
1
+ import json
2
+ import xml.etree.ElementTree as ET
3
+ from typing import AsyncGenerator, Dict, List, Tuple, Union
2
4
 
3
5
  import openai
4
6
 
5
7
  from letta.agents.base_agent import BaseAgent
6
- from letta.helpers.tool_execution_helper import enable_strict_mode
7
- from letta.orm.enums import ToolType
8
8
  from letta.schemas.agent import AgentState
9
- from letta.schemas.enums import MessageRole
9
+ from letta.schemas.block import BlockUpdate
10
+ from letta.schemas.enums import MessageStreamStatus
11
+ from letta.schemas.letta_message import LegacyLettaMessage, LettaMessage
10
12
  from letta.schemas.letta_message_content import TextContent
11
- from letta.schemas.message import Message, MessageCreate
12
- from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool
13
+ from letta.schemas.letta_response import LettaResponse
14
+ from letta.schemas.message import MessageCreate
15
+ from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, SystemMessage, Tool, UserMessage
16
+ from letta.schemas.usage import LettaUsageStatistics
13
17
  from letta.schemas.user import User
18
+ from letta.server.rest_api.utils import convert_in_context_letta_messages_to_openai, create_input_messages
14
19
  from letta.services.agent_manager import AgentManager
20
+ from letta.services.block_manager import BlockManager
15
21
  from letta.services.message_manager import MessageManager
16
22
 
17
23
 
18
24
  class EphemeralMemoryAgent(BaseAgent):
19
25
  """
20
26
  A stateless agent that helps with offline memory computations.
21
-
22
27
  """
23
28
 
24
29
  def __init__(
@@ -27,6 +32,9 @@ class EphemeralMemoryAgent(BaseAgent):
27
32
  openai_client: openai.AsyncClient,
28
33
  message_manager: MessageManager,
29
34
  agent_manager: AgentManager,
35
+ block_manager: BlockManager,
36
+ target_block_label: str,
37
+ message_transcripts: List[str],
30
38
  actor: User,
31
39
  ):
32
40
  super().__init__(
@@ -37,48 +45,122 @@ class EphemeralMemoryAgent(BaseAgent):
37
45
  actor=actor,
38
46
  )
39
47
 
40
- async def step(self, input_messages: List[MessageCreate]) -> List[Message]:
48
+ self.block_manager = block_manager
49
+ self.target_block_label = target_block_label
50
+ self.message_transcripts = message_transcripts
51
+
52
+ def update_message_transcript(self, message_transcripts: List[str]):
53
+ self.message_transcripts = message_transcripts
54
+
55
+ async def step(self, input_messages: List[MessageCreate], max_steps: int = 10) -> LettaResponse:
41
56
  """
42
- Synchronous method that takes a user's input text and returns a summary from OpenAI.
43
- Returns a list of ephemeral Message objects containing both the user text and the assistant summary.
57
+ Process the user's input message, allowing the model to call memory-related tools
58
+ until it decides to stop and provide a final response.
44
59
  """
45
60
  agent_state = self.agent_manager.get_agent_by_id(agent_id=self.agent_id, actor=self.actor)
61
+ in_context_messages = create_input_messages(input_messages=input_messages, agent_id=self.agent_id, actor=self.actor)
62
+ openai_messages = convert_in_context_letta_messages_to_openai(in_context_messages, exclude_system_messages=True)
46
63
 
47
- openai_messages = self.pre_process_input_message(input_messages=input_messages)
48
- request = self._build_openai_request(openai_messages, agent_state)
64
+ # 1. Store memories
65
+ request = self._build_openai_request(
66
+ openai_messages, agent_state, tools=self._build_store_memory_tool_schemas(), system=self._get_memory_store_system_prompt()
67
+ )
49
68
 
50
69
  chat_completion = await self.openai_client.chat.completions.create(**request.model_dump(exclude_unset=True))
70
+ assistant_message = chat_completion.choices[0].message
51
71
 
52
- return [
53
- Message(
54
- role=MessageRole.assistant,
55
- content=[TextContent(text=chat_completion.choices[0].message.content.strip())],
56
- )
57
- ]
72
+ # Process tool calls
73
+ tool_call = assistant_message.tool_calls[0]
74
+ function_name = tool_call.function.name
75
+ function_args = json.loads(tool_call.function.arguments)
58
76
 
59
- def pre_process_input_message(self, input_messages: List[MessageCreate]) -> List[Dict]:
60
- input_message = input_messages[0]
61
- input_prompt_augmented = f"""
62
- You are a memory recall agent whose job is to comb through a large set of messages and write relevant memories in relation to a user query.
63
- Your response will directly populate a "memory block" called "human" that describes the user, that will be used to answer more questions in the future.
64
- You should err on the side of being more verbose, and also try to *predict* the trajectory of the conversation, and pull memories or messages you think will be relevant to where the conversation is going.
77
+ if function_name == "store_memory":
78
+ print("Called store_memory")
79
+ print(function_args)
80
+ for chunk_args in function_args.get("chunks"):
81
+ self.store_memory(agent_state=agent_state, **chunk_args)
82
+ result = "Successfully stored memories"
83
+ else:
84
+ raise ValueError("Error: Unknown tool function '{function_name}'")
65
85
 
66
- Your response should include:
67
- - A high level summary of the relevant events/timeline of the conversation relevant to the query
68
- - Direct citations of quotes from the messages you used while creating the summary
86
+ openai_messages.append(
87
+ {
88
+ "role": "assistant",
89
+ "content": assistant_message.content,
90
+ "tool_calls": [
91
+ {
92
+ "id": tool_call.id,
93
+ "type": "function",
94
+ "function": {"name": function_name, "arguments": tool_call.function.arguments},
95
+ }
96
+ ],
97
+ }
98
+ )
99
+ openai_messages.append({"role": "tool", "tool_call_id": tool_call.id, "content": str(result)})
69
100
 
70
- Here is a history of the messages so far:
101
+ # 2. Execute rethink block memory loop
102
+ human_block_content = self.agent_manager.get_block_with_label(
103
+ agent_id=self.agent_id, block_label=self.target_block_label, actor=self.actor
104
+ )
105
+ rethink_command = f"""
106
+ Here is the current memory block created earlier:
71
107
 
72
- {self._format_messages_llm_friendly()}
108
+ ### CURRENT MEMORY
109
+ {human_block_content}
110
+ ### END CURRENT MEMORY
73
111
 
74
- This is the query:
112
+ Please refine this block:
75
113
 
76
- "{input_message.content}"
114
+ - Merge in any new facts and remove outdated or contradictory details.
115
+ - Organize related information together (e.g., preferences, background, ongoing goals).
116
+ - Add any light, supportable inferences that deepen understanding—but do not invent unsupported details.
77
117
 
78
- Your response:
118
+ Use `rethink_memory(new_memory)` as many times as you need to iteratively improve the text. When it’s fully polished and complete, call `finish_rethinking_memory()`.
79
119
  """
120
+ rethink_command = UserMessage(content=rethink_command)
121
+ openai_messages.append(rethink_command.model_dump())
122
+
123
+ for _ in range(max_steps):
124
+ request = self._build_openai_request(
125
+ openai_messages, agent_state, tools=self._build_sleeptime_tools(), system=self._get_rethink_memory_system_prompt()
126
+ )
127
+ chat_completion = await self.openai_client.chat.completions.create(**request.model_dump(exclude_unset=True))
128
+ assistant_message = chat_completion.choices[0].message
129
+
130
+ # Process tool calls
131
+ tool_call = assistant_message.tool_calls[0]
132
+ function_name = tool_call.function.name
133
+ function_args = json.loads(tool_call.function.arguments)
134
+
135
+ if function_name == "rethink_memory":
136
+ print("Called rethink_memory")
137
+ print(function_args)
138
+ result = self.rethink_memory(agent_state=agent_state, **function_args)
139
+ elif function_name == "finish_rethinking_memory":
140
+ print("Called finish_rethinking_memory")
141
+ break
142
+ else:
143
+ result = f"Error: Unknown tool function '{function_name}'"
144
+ openai_messages.append(
145
+ {
146
+ "role": "assistant",
147
+ "content": assistant_message.content,
148
+ "tool_calls": [
149
+ {
150
+ "id": tool_call.id,
151
+ "type": "function",
152
+ "function": {"name": function_name, "arguments": tool_call.function.arguments},
153
+ }
154
+ ],
155
+ }
156
+ )
157
+ openai_messages.append({"role": "tool", "tool_call_id": tool_call.id, "content": str(result)})
158
+
159
+ # Actually save the memory:
160
+ target_block = agent_state.memory.get_block(self.target_block_label)
161
+ self.block_manager.update_block(block_id=target_block.id, block_update=BlockUpdate(value=target_block.value), actor=self.actor)
80
162
 
81
- return [{"role": "user", "content": input_prompt_augmented}]
163
+ return LettaResponse(messages=[], usage=LettaUsageStatistics())
82
164
 
83
165
  def _format_messages_llm_friendly(self):
84
166
  messages = self.message_manager.list_messages_for_agent(agent_id=self.agent_id, actor=self.actor)
@@ -86,12 +168,15 @@ class EphemeralMemoryAgent(BaseAgent):
86
168
  llm_friendly_messages = [f"{m.role}: {m.content[0].text}" for m in messages if m.content and isinstance(m.content[0], TextContent)]
87
169
  return "\n".join(llm_friendly_messages)
88
170
 
89
- def _build_openai_request(self, openai_messages: List[Dict], agent_state: AgentState) -> ChatCompletionRequest:
171
+ def _build_openai_request(
172
+ self, openai_messages: List[Dict], agent_state: AgentState, tools: List[Tool], system: str
173
+ ) -> ChatCompletionRequest:
174
+ system_message = SystemMessage(role="system", content=system)
90
175
  openai_request = ChatCompletionRequest(
91
- model=agent_state.llm_config.model,
92
- messages=openai_messages,
93
- # tools=self._build_tool_schemas(agent_state),
94
- # tool_choice="auto",
176
+ model="gpt-4o", # agent_state.llm_config.model, # TODO: Separate config for summarizer?
177
+ messages=[system_message] + openai_messages,
178
+ tools=tools,
179
+ tool_choice="required",
95
180
  user=self.actor.id,
96
181
  max_completion_tokens=agent_state.llm_config.max_tokens,
97
182
  temperature=agent_state.llm_config.temperature,
@@ -99,14 +184,239 @@ class EphemeralMemoryAgent(BaseAgent):
99
184
  )
100
185
  return openai_request
101
186
 
102
- def _build_tool_schemas(self, agent_state: AgentState) -> List[Tool]:
103
- # Only include memory tools
104
- tools = [t for t in agent_state.tools if t.tool_type in {ToolType.LETTA_CORE, ToolType.LETTA_MEMORY_CORE}]
187
+ def _build_store_memory_tool_schemas(self) -> List[Tool]:
188
+ """
189
+ Build the schemas for the three memory-related tools.
190
+ """
191
+ tools = [
192
+ Tool(
193
+ type="function",
194
+ function={
195
+ "name": "store_memory",
196
+ "description": "Archive coherent chunks of dialogue that will be evicted, preserving raw lines and a brief contextual description.",
197
+ "parameters": {
198
+ "type": "object",
199
+ "properties": {
200
+ "chunks": {
201
+ "type": "array",
202
+ "items": {
203
+ "type": "object",
204
+ "properties": {
205
+ "start_index": {"type": "integer", "description": "Index of first line in original history."},
206
+ "end_index": {"type": "integer", "description": "Index of last line in original history."},
207
+ "context": {
208
+ "type": "string",
209
+ "description": "A high-level description providing context for why this chunk matters.",
210
+ },
211
+ },
212
+ "required": ["start_index", "end_index", "context"],
213
+ },
214
+ }
215
+ },
216
+ "required": ["chunks"],
217
+ "additionalProperties": False,
218
+ },
219
+ },
220
+ ),
221
+ ]
222
+
223
+ return tools
224
+
225
+ def _build_sleeptime_tools(self) -> List[Tool]:
226
+ tools = [
227
+ Tool(
228
+ type="function",
229
+ function={
230
+ "name": "rethink_memory",
231
+ "description": (
232
+ "Rewrite memory block for the main agent, new_memory should contain all current "
233
+ "information from the block that is not outdated or inconsistent, integrating any "
234
+ "new information, resulting in a new memory block that is organized, readable, and "
235
+ "comprehensive."
236
+ ),
237
+ "parameters": {
238
+ "type": "object",
239
+ "properties": {
240
+ "new_memory": {
241
+ "type": "string",
242
+ "description": (
243
+ "The new memory with information integrated from the memory block. "
244
+ "If there is no new information, then this should be the same as the "
245
+ "content in the source block."
246
+ ),
247
+ },
248
+ },
249
+ "required": ["new_memory"],
250
+ "additionalProperties": False,
251
+ },
252
+ },
253
+ ),
254
+ Tool(
255
+ type="function",
256
+ function={
257
+ "name": "finish_rethinking_memory",
258
+ "description": ("This function is called when the agent is done rethinking the memory."),
259
+ "parameters": {
260
+ "type": "object",
261
+ "properties": {},
262
+ "required": [],
263
+ "additionalProperties": False,
264
+ },
265
+ },
266
+ ),
267
+ ]
268
+
269
+ return tools
105
270
 
106
- return [Tool(type="function", function=enable_strict_mode(t.json_schema)) for t in tools]
271
+ def rethink_memory(self, new_memory: str, agent_state: AgentState) -> str:
272
+ if agent_state.memory.get_block(self.target_block_label) is None:
273
+ agent_state.memory.create_block(label=self.target_block_label, value=new_memory)
107
274
 
108
- async def step_stream(self, input_messages: List[MessageCreate]) -> AsyncGenerator[str, None]:
275
+ agent_state.memory.update_block_value(label=self.target_block_label, value=new_memory)
276
+ return "Successfully updated memory"
277
+
278
+ def store_memory(self, start_index: int, end_index: int, context: str, agent_state: AgentState) -> str:
279
+ """
280
+ Store a memory.
281
+ """
282
+ try:
283
+ messages = self.message_transcripts[start_index : end_index + 1]
284
+ memory = self.serialize(messages, context)
285
+ self.agent_manager.passage_manager.insert_passage(
286
+ agent_state=agent_state,
287
+ agent_id=agent_state.id,
288
+ text=memory,
289
+ actor=self.actor,
290
+ )
291
+ self.agent_manager.rebuild_system_prompt(agent_id=agent_state.id, actor=self.actor, force=True)
292
+
293
+ return "Sucessfully stored memory"
294
+ except Exception as e:
295
+ return f"Failed to store memory given start_index {start_index} and end_index {end_index}: {e}"
296
+
297
+ def serialize(self, messages: List[str], context: str) -> str:
298
+ """
299
+ Produce an XML document like:
300
+
301
+ <memory>
302
+ <messages>
303
+ <message>…</message>
304
+ <message>…</message>
305
+
306
+ </messages>
307
+ <context>…</context>
308
+ </memory>
309
+ """
310
+ root = ET.Element("memory")
311
+
312
+ msgs_el = ET.SubElement(root, "messages")
313
+ for msg in messages:
314
+ m = ET.SubElement(msgs_el, "message")
315
+ m.text = msg
316
+
317
+ sum_el = ET.SubElement(root, "context")
318
+ sum_el.text = context
319
+
320
+ # ET.tostring will escape reserved chars for you
321
+ return ET.tostring(root, encoding="unicode")
322
+
323
+ def deserialize(self, xml_str: str) -> Tuple[List[str], str]:
324
+ """
325
+ Parse the XML back into (messages, context). Raises ValueError if tags are missing.
326
+ """
327
+ try:
328
+ root = ET.fromstring(xml_str)
329
+ except ET.ParseError as e:
330
+ raise ValueError(f"Invalid XML: {e}")
331
+
332
+ msgs_el = root.find("messages")
333
+ if msgs_el is None:
334
+ raise ValueError("Missing <messages> section")
335
+
336
+ messages = []
337
+ for m in msgs_el.findall("message"):
338
+ # .text may be None if empty, so coerce to empty string
339
+ messages.append(m.text or "")
340
+
341
+ sum_el = root.find("context")
342
+ if sum_el is None:
343
+ raise ValueError("Missing <context> section")
344
+ context = sum_el.text or ""
345
+
346
+ return messages, context
347
+
348
+ async def step_stream(
349
+ self, input_messages: List[MessageCreate], max_steps: int = 10
350
+ ) -> AsyncGenerator[Union[LettaMessage, LegacyLettaMessage, MessageStreamStatus], None]:
109
351
  """
110
352
  This agent is synchronous-only. If called in an async context, raise an error.
111
353
  """
112
354
  raise NotImplementedError("EphemeralMemoryAgent does not support async step.")
355
+
356
+ # TODO: Move these to independent text files
357
+ def _get_memory_store_system_prompt(self) -> str:
358
+ return """
359
+ You are a memory-recall assistant working asynchronously alongside a main chat agent that retains only a portion of the message history in its context window.
360
+
361
+ When given a full transcript with lines marked (Older) or (Newer), you should:
362
+ 1. Segment the (Older) portion into coherent chunks by topic, instruction, or preference.
363
+ 2. For each chunk, produce only:
364
+ - start_index: the first line’s index
365
+ - end_index: the last line’s index
366
+ - context: a blurb explaining why this chunk matters
367
+
368
+ Return exactly one JSON tool call to `store_memory`, consider this miniature example:
369
+
370
+ ---
371
+
372
+ (Older)
373
+ 0. user: Okay. Got it. Keep your answers shorter, please.
374
+ 1. assistant: Sure thing! I’ll keep it brief. What would you like to know?
375
+ 2. user: I like basketball.
376
+ 3. assistant: That's great! Do you have a favorite team or player?
377
+
378
+ (Newer)
379
+ 4. user: Yeah. I like basketball.
380
+ 5. assistant: Awesome! What do you enjoy most about basketball?
381
+
382
+ ---
383
+
384
+ Example output:
385
+
386
+ ```json
387
+ {
388
+ "name": "store_memory",
389
+ "arguments": {
390
+ "chunks": [
391
+ {
392
+ "start_index": 0,
393
+ "end_index": 1,
394
+ "context": "User explicitly asked the assistant to keep responses concise."
395
+ },
396
+ {
397
+ "start_index": 2,
398
+ "end_index": 3,
399
+ "context": "User enjoys basketball and prompted follow-up about their favorite team or player."
400
+ }
401
+ ]
402
+ }
403
+ }
404
+ ```
405
+ """
406
+
407
+ def _get_rethink_memory_system_prompt(self) -> str:
408
+ return """
409
+ SYSTEM
410
+ You are a Memory-Updater agent. Your job is to iteratively refine the given memory block until it’s concise, organized, and complete.
411
+
412
+ Instructions:
413
+ - Call `rethink_memory(new_memory: string)` as many times as you like. Each call should submit a fully revised version of the block so far.
414
+ - When you’re fully satisfied, call `finish_rethinking_memory()`.
415
+ - Don’t output anything else—only the JSON for these tool calls.
416
+
417
+ Goals:
418
+ - Merge in new facts and remove contradictions.
419
+ - Group related details (preferences, biography, goals).
420
+ - Draw light, supportable inferences without inventing facts.
421
+ - Preserve every critical piece of information.
422
+ """