letta-nightly 0.7.13.dev20250511104036__py3-none-any.whl → 0.7.14.dev20250513020711__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +14 -17
  3. letta/agents/base_agent.py +112 -1
  4. letta/agents/letta_agent.py +35 -55
  5. letta/agents/letta_agent_batch.py +22 -45
  6. letta/agents/voice_agent.py +10 -42
  7. letta/functions/schema_generator.py +7 -3
  8. letta/llm_api/anthropic.py +4 -2
  9. letta/llm_api/openai.py +4 -2
  10. letta/orm/agents_tags.py +5 -2
  11. letta/orm/blocks_agents.py +3 -1
  12. letta/orm/sqlalchemy_base.py +91 -1
  13. letta/schemas/message.py +1 -1
  14. letta/serialize_schemas/marshmallow_agent.py +4 -4
  15. letta/server/db.py +180 -88
  16. letta/server/rest_api/app.py +6 -3
  17. letta/server/rest_api/chat_completions_interface.py +1 -0
  18. letta/server/rest_api/interface.py +54 -16
  19. letta/server/rest_api/routers/v1/sources.py +1 -0
  20. letta/server/server.py +1 -2
  21. letta/services/agent_manager.py +40 -31
  22. letta/services/block_manager.py +61 -34
  23. letta/services/group_manager.py +11 -15
  24. letta/services/identity_manager.py +9 -13
  25. letta/services/job_manager.py +12 -17
  26. letta/services/llm_batch_manager.py +17 -21
  27. letta/services/message_manager.py +53 -31
  28. letta/services/organization_manager.py +7 -14
  29. letta/services/passage_manager.py +6 -10
  30. letta/services/provider_manager.py +5 -9
  31. letta/services/sandbox_config_manager.py +13 -17
  32. letta/services/source_manager.py +13 -17
  33. letta/services/step_manager.py +5 -9
  34. letta/services/tool_manager.py +9 -14
  35. letta/services/user_manager.py +7 -12
  36. letta/settings.py +2 -0
  37. letta/streaming_interface.py +2 -0
  38. letta/utils.py +1 -1
  39. {letta_nightly-0.7.13.dev20250511104036.dist-info → letta_nightly-0.7.14.dev20250513020711.dist-info}/METADATA +2 -1
  40. {letta_nightly-0.7.13.dev20250511104036.dist-info → letta_nightly-0.7.14.dev20250513020711.dist-info}/RECORD +43 -43
  41. {letta_nightly-0.7.13.dev20250511104036.dist-info → letta_nightly-0.7.14.dev20250513020711.dist-info}/LICENSE +0 -0
  42. {letta_nightly-0.7.13.dev20250511104036.dist-info → letta_nightly-0.7.14.dev20250513020711.dist-info}/WHEEL +0 -0
  43. {letta_nightly-0.7.13.dev20250511104036.dist-info → letta_nightly-0.7.14.dev20250513020711.dist-info}/entry_points.txt +0 -0
letta/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "0.7.13"
1
+ __version__ = "0.7.14"
2
2
 
3
3
  # import clients
4
4
  from letta.client.client import LocalClient, RESTClient, create_client
letta/agent.py CHANGED
@@ -133,7 +133,6 @@ class Agent(BaseAgent):
133
133
  # Different interfaces can handle events differently
134
134
  # e.g., print in CLI vs send a discord message with a discord bot
135
135
  self.interface = interface
136
- self.chunk_index = 0
137
136
 
138
137
  # Create the persistence manager object based on the AgentState info
139
138
  self.message_manager = MessageManager()
@@ -248,11 +247,9 @@ class Agent(BaseAgent):
248
247
  group_id=group_id,
249
248
  )
250
249
  messages.append(new_message)
251
- self.interface.function_message(f"Error: {error_msg}", msg_obj=new_message, chunk_index=self.chunk_index)
252
- self.chunk_index += 1
250
+ self.interface.function_message(f"Error: {error_msg}", msg_obj=new_message, chunk_index=0)
253
251
  if include_function_failed_message:
254
- self.interface.function_message(f"Ran {function_name}({function_args})", msg_obj=new_message, chunk_index=self.chunk_index)
255
- self.chunk_index += 1
252
+ self.interface.function_message(f"Ran {function_name}({function_args})", msg_obj=new_message)
256
253
 
257
254
  # Return updated messages
258
255
  return messages
@@ -422,6 +419,7 @@ class Agent(BaseAgent):
422
419
  messages = [] # append these to the history when done
423
420
  function_name = None
424
421
  function_args = {}
422
+ chunk_index = 0
425
423
 
426
424
  # Step 2: check if LLM wanted to call a function
427
425
  if response_message.function_call or (response_message.tool_calls is not None and len(response_message.tool_calls) > 0):
@@ -465,8 +463,8 @@ class Agent(BaseAgent):
465
463
  nonnull_content = False
466
464
  if response_message.content or response_message.reasoning_content or response_message.redacted_reasoning_content:
467
465
  # The content if then internal monologue, not chat
468
- self.interface.internal_monologue(response_message.content, msg_obj=messages[-1], chunk_index=self.chunk_index)
469
- self.chunk_index += 1
466
+ self.interface.internal_monologue(response_message.content, msg_obj=messages[-1], chunk_index=chunk_index)
467
+ chunk_index += 1
470
468
  # Flag to avoid printing a duplicate if inner thoughts get popped from the function call
471
469
  nonnull_content = True
472
470
 
@@ -515,8 +513,8 @@ class Agent(BaseAgent):
515
513
  response_message.content = function_args.pop("inner_thoughts")
516
514
  # The content if then internal monologue, not chat
517
515
  if response_message.content and not nonnull_content:
518
- self.interface.internal_monologue(response_message.content, msg_obj=messages[-1], chunk_index=self.chunk_index)
519
- self.chunk_index += 1
516
+ self.interface.internal_monologue(response_message.content, msg_obj=messages[-1], chunk_index=chunk_index)
517
+ chunk_index += 1
520
518
 
521
519
  # (Still parsing function args)
522
520
  # Handle requests for immediate heartbeat
@@ -542,8 +540,8 @@ class Agent(BaseAgent):
542
540
  # handle cases where we return a json message
543
541
  if "message" in function_args:
544
542
  function_args["message"] = str(function_args.get("message", ""))
545
- self.interface.function_message(f"Running {function_name}({function_args})", msg_obj=messages[-1], chunk_index=self.chunk_index)
546
- self.chunk_index += 1
543
+ self.interface.function_message(f"Running {function_name}({function_args})", msg_obj=messages[-1], chunk_index=chunk_index)
544
+ chunk_index = 0 # reset chunk index after assistant message
547
545
  try:
548
546
  # handle tool execution (sandbox) and state updates
549
547
  log_telemetry(
@@ -667,10 +665,9 @@ class Agent(BaseAgent):
667
665
  group_id=group_id,
668
666
  )
669
667
  ) # extend conversation with function response
670
- self.interface.function_message(f"Ran {function_name}({function_args})", msg_obj=messages[-1], chunk_index=self.chunk_index)
671
- self.chunk_index += 1
672
- self.interface.function_message(f"Success: {function_response_string}", msg_obj=messages[-1], chunk_index=self.chunk_index)
673
- self.chunk_index += 1
668
+ self.interface.function_message(f"Ran {function_name}({function_args})", msg_obj=messages[-1], chunk_index=chunk_index)
669
+ self.interface.function_message(f"Success: {function_response_string}", msg_obj=messages[-1], chunk_index=chunk_index)
670
+ chunk_index += 1
674
671
  self.last_function_response = function_response
675
672
 
676
673
  else:
@@ -685,8 +682,8 @@ class Agent(BaseAgent):
685
682
  group_id=group_id,
686
683
  )
687
684
  ) # extend conversation with assistant's reply
688
- self.interface.internal_monologue(response_message.content, msg_obj=messages[-1], chunk_index=self.chunk_index)
689
- self.chunk_index += 1
685
+ self.interface.internal_monologue(response_message.content, msg_obj=messages[-1], chunk_index=chunk_index)
686
+ chunk_index += 1
690
687
  heartbeat_request = False
691
688
  function_failed = False
692
689
 
@@ -3,14 +3,21 @@ from typing import Any, AsyncGenerator, List, Optional, Union
3
3
 
4
4
  import openai
5
5
 
6
+ from letta.helpers.datetime_helpers import get_utc_time
7
+ from letta.log import get_logger
8
+ from letta.schemas.agent import AgentState
6
9
  from letta.schemas.enums import MessageStreamStatus
7
10
  from letta.schemas.letta_message import LegacyLettaMessage, LettaMessage
8
11
  from letta.schemas.letta_message_content import TextContent
9
12
  from letta.schemas.letta_response import LettaResponse
10
- from letta.schemas.message import MessageCreate
13
+ from letta.schemas.message import Message, MessageCreate, MessageUpdate
11
14
  from letta.schemas.user import User
12
15
  from letta.services.agent_manager import AgentManager
16
+ from letta.services.helpers.agent_manager_helper import compile_system_message
13
17
  from letta.services.message_manager import MessageManager
18
+ from letta.utils import united_diff
19
+
20
+ logger = get_logger(__name__)
14
21
 
15
22
 
16
23
  class BaseAgent(ABC):
@@ -64,3 +71,107 @@ class BaseAgent(ABC):
64
71
  return ""
65
72
 
66
73
  return [{"role": input_message.role.value, "content": get_content(input_message)} for input_message in input_messages]
74
+
75
+ def _rebuild_memory(
76
+ self,
77
+ in_context_messages: List[Message],
78
+ agent_state: AgentState,
79
+ num_messages: int | None = None, # storing these calculations is specific to the voice agent
80
+ num_archival_memories: int | None = None,
81
+ ) -> List[Message]:
82
+ try:
83
+ # Refresh Memory
84
+ # TODO: This only happens for the summary block (voice?)
85
+ # [DB Call] loading blocks (modifies: agent_state.memory.blocks)
86
+ self.agent_manager.refresh_memory(agent_state=agent_state, actor=self.actor)
87
+
88
+ # TODO: This is a pretty brittle pattern established all over our code, need to get rid of this
89
+ curr_system_message = in_context_messages[0]
90
+ curr_memory_str = agent_state.memory.compile()
91
+ curr_system_message_text = curr_system_message.content[0].text
92
+ if curr_memory_str in curr_system_message_text:
93
+ # NOTE: could this cause issues if a block is removed? (substring match would still work)
94
+ logger.debug(
95
+ f"Memory hasn't changed for agent id={agent_state.id} and actor=({self.actor.id}, {self.actor.name}), skipping system prompt rebuild"
96
+ )
97
+ return in_context_messages
98
+
99
+ memory_edit_timestamp = get_utc_time()
100
+
101
+ # [DB Call] size of messages and archival memories
102
+ num_messages = num_messages or self.message_manager.size(actor=self.actor, agent_id=agent_state.id)
103
+ num_archival_memories = num_archival_memories or self.passage_manager.size(actor=self.actor, agent_id=agent_state.id)
104
+
105
+ new_system_message_str = compile_system_message(
106
+ system_prompt=agent_state.system,
107
+ in_context_memory=agent_state.memory,
108
+ in_context_memory_last_edit=memory_edit_timestamp,
109
+ previous_message_count=num_messages,
110
+ archival_memory_size=num_archival_memories,
111
+ )
112
+
113
+ diff = united_diff(curr_system_message_text, new_system_message_str)
114
+ if len(diff) > 0:
115
+ logger.debug(f"Rebuilding system with new memory...\nDiff:\n{diff}")
116
+
117
+ # [DB Call] Update Messages
118
+ new_system_message = self.message_manager.update_message_by_id(
119
+ curr_system_message.id, message_update=MessageUpdate(content=new_system_message_str), actor=self.actor
120
+ )
121
+ # Skip pulling down the agent's memory again to save on a db call
122
+ return [new_system_message] + in_context_messages[1:]
123
+
124
+ else:
125
+ return in_context_messages
126
+ except:
127
+ logger.exception(f"Failed to rebuild memory for agent id={agent_state.id} and actor=({self.actor.id}, {self.actor.name})")
128
+ raise
129
+
130
+ async def _rebuild_memory_async(self, in_context_messages: List[Message], agent_state: AgentState) -> List[Message]:
131
+ """
132
+ Async version of function above. For now before breaking up components, changes should be made in both places.
133
+ """
134
+ try:
135
+ # [DB Call] loading blocks (modifies: agent_state.memory.blocks)
136
+ await self.agent_manager.refresh_memory_async(agent_state=agent_state, actor=self.actor)
137
+
138
+ # TODO: This is a pretty brittle pattern established all over our code, need to get rid of this
139
+ curr_system_message = in_context_messages[0]
140
+ curr_memory_str = agent_state.memory.compile()
141
+ curr_system_message_text = curr_system_message.content[0].text
142
+ if curr_memory_str in curr_system_message_text:
143
+ logger.debug(
144
+ f"Memory hasn't changed for agent id={agent_state.id} and actor=({self.actor.id}, {self.actor.name}), skipping system prompt rebuild"
145
+ )
146
+ return in_context_messages
147
+
148
+ memory_edit_timestamp = get_utc_time()
149
+
150
+ # [DB Call] size of messages and archival memories
151
+ # todo: blocking for now
152
+ num_messages = num_messages or self.message_manager.size(actor=self.actor, agent_id=agent_state.id)
153
+ num_archival_memories = num_archival_memories or self.passage_manager.size(actor=self.actor, agent_id=agent_state.id)
154
+
155
+ new_system_message_str = compile_system_message(
156
+ system_prompt=agent_state.system,
157
+ in_context_memory=agent_state.memory,
158
+ in_context_memory_last_edit=memory_edit_timestamp,
159
+ previous_message_count=num_messages,
160
+ archival_memory_size=num_archival_memories,
161
+ )
162
+
163
+ diff = united_diff(curr_system_message_text, new_system_message_str)
164
+ if len(diff) > 0:
165
+ logger.debug(f"Rebuilding system with new memory...\nDiff:\n{diff}")
166
+
167
+ # [DB Call] Update Messages
168
+ new_system_message = self.message_manager.update_message_by_id_async(
169
+ curr_system_message.id, message_update=MessageUpdate(content=new_system_message_str), actor=self.actor
170
+ )
171
+ return [new_system_message] + in_context_messages[1:]
172
+
173
+ else:
174
+ return in_context_messages
175
+ except:
176
+ logger.exception(f"Failed to rebuild memory for agent id={agent_state.id} and actor=({self.actor.id}, {self.actor.name})")
177
+ raise
@@ -9,7 +9,6 @@ from openai.types.chat import ChatCompletion, ChatCompletionChunk
9
9
  from letta.agents.base_agent import BaseAgent
10
10
  from letta.agents.helpers import _create_letta_response, _prepare_in_context_messages
11
11
  from letta.helpers import ToolRulesSolver
12
- from letta.helpers.datetime_helpers import get_utc_time
13
12
  from letta.helpers.tool_execution_helper import enable_strict_mode
14
13
  from letta.interfaces.anthropic_streaming_interface import AnthropicStreamingInterface
15
14
  from letta.llm_api.llm_client import LLMClient
@@ -22,18 +21,18 @@ from letta.schemas.enums import MessageRole, MessageStreamStatus
22
21
  from letta.schemas.letta_message import AssistantMessage
23
22
  from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
24
23
  from letta.schemas.letta_response import LettaResponse
25
- from letta.schemas.message import Message, MessageCreate, MessageUpdate
24
+ from letta.schemas.message import Message, MessageCreate
26
25
  from letta.schemas.openai.chat_completion_response import ToolCall
27
26
  from letta.schemas.user import User
28
27
  from letta.server.rest_api.utils import create_letta_messages_from_llm_response
29
28
  from letta.services.agent_manager import AgentManager
30
29
  from letta.services.block_manager import BlockManager
31
- from letta.services.helpers.agent_manager_helper import compile_system_message
32
30
  from letta.services.message_manager import MessageManager
33
31
  from letta.services.passage_manager import PassageManager
34
32
  from letta.services.tool_executor.tool_execution_manager import ToolExecutionManager
33
+ from letta.settings import settings
34
+ from letta.system import package_function_response
35
35
  from letta.tracing import log_event, trace_method
36
- from letta.utils import united_diff
37
36
 
38
37
  logger = get_logger(__name__)
39
38
 
@@ -59,6 +58,8 @@ class LettaAgent(BaseAgent):
59
58
  self.use_assistant_message = use_assistant_message
60
59
  self.response_messages: List[Message] = []
61
60
 
61
+ self.last_function_response = self._load_last_function_response()
62
+
62
63
  @trace_method
63
64
  async def step(self, input_messages: List[MessageCreate], max_steps: int = 10) -> LettaResponse:
64
65
  agent_state = self.agent_manager.get_agent_by_id(self.agent_id, actor=self.actor)
@@ -168,6 +169,7 @@ class LettaAgent(BaseAgent):
168
169
  yield f"data: {MessageStreamStatus.done.model_dump_json()}\n\n"
169
170
 
170
171
  @trace_method
172
+ # When raising an error this doesn't show up
171
173
  async def _get_ai_reply(
172
174
  self,
173
175
  llm_client: LLMClientBase,
@@ -176,7 +178,13 @@ class LettaAgent(BaseAgent):
176
178
  tool_rules_solver: ToolRulesSolver,
177
179
  stream: bool,
178
180
  ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
179
- in_context_messages = self._rebuild_memory(in_context_messages, agent_state)
181
+ if settings.experimental_enable_async_db_engine:
182
+ in_context_messages = await self._rebuild_memory_async(in_context_messages, agent_state)
183
+ else:
184
+ if settings.experimental_skip_rebuild_memory and agent_state.llm_config.model_endpoint_type == "google_vertex":
185
+ logger.info("Skipping memory rebuild")
186
+ else:
187
+ in_context_messages = self._rebuild_memory(in_context_messages, agent_state)
180
188
 
181
189
  tools = [
182
190
  t
@@ -194,7 +202,12 @@ class LettaAgent(BaseAgent):
194
202
  or (t.tool_type == ToolType.EXTERNAL_COMPOSIO)
195
203
  ]
196
204
 
197
- valid_tool_names = tool_rules_solver.get_allowed_tool_names(available_tools=set([t.name for t in tools]))
205
+ # Mirror the sync agent loop: get allowed tools or allow all if none are allowed
206
+ valid_tool_names = tool_rules_solver.get_allowed_tool_names(
207
+ available_tools=set([t.name for t in tools]),
208
+ last_function_response=self.last_function_response,
209
+ ) or list(set(t.name for t in tools))
210
+
198
211
  # TODO: Copied from legacy agent loop, so please be cautious
199
212
  # Set force tool
200
213
  force_tool_call = None
@@ -255,6 +268,7 @@ class LettaAgent(BaseAgent):
255
268
  tool_args=tool_args,
256
269
  agent_state=agent_state,
257
270
  )
271
+ function_response = package_function_response(tool_result, success_flag)
258
272
 
259
273
  # 4. Register tool call with tool rule solver
260
274
  # Resolve whether or not to continue stepping
@@ -283,54 +297,10 @@ class LettaAgent(BaseAgent):
283
297
  pre_computed_tool_message_id=pre_computed_tool_message_id,
284
298
  )
285
299
  persisted_messages = self.message_manager.create_many_messages(tool_call_messages, actor=self.actor)
300
+ self.last_function_response = function_response
286
301
 
287
302
  return persisted_messages, continue_stepping
288
303
 
289
- def _rebuild_memory(self, in_context_messages: List[Message], agent_state: AgentState) -> List[Message]:
290
- try:
291
- self.agent_manager.refresh_memory(agent_state=agent_state, actor=self.actor)
292
-
293
- # TODO: This is a pretty brittle pattern established all over our code, need to get rid of this
294
- curr_system_message = in_context_messages[0]
295
- curr_memory_str = agent_state.memory.compile()
296
- curr_system_message_text = curr_system_message.content[0].text
297
- if curr_memory_str in curr_system_message_text:
298
- # NOTE: could this cause issues if a block is removed? (substring match would still work)
299
- logger.debug(
300
- f"Memory hasn't changed for agent id={agent_state.id} and actor=({self.actor.id}, {self.actor.name}), skipping system prompt rebuild"
301
- )
302
- return in_context_messages
303
-
304
- memory_edit_timestamp = get_utc_time()
305
-
306
- num_messages = self.message_manager.size(actor=self.actor, agent_id=agent_state.id)
307
- num_archival_memories = self.passage_manager.size(actor=self.actor, agent_id=agent_state.id)
308
-
309
- new_system_message_str = compile_system_message(
310
- system_prompt=agent_state.system,
311
- in_context_memory=agent_state.memory,
312
- in_context_memory_last_edit=memory_edit_timestamp,
313
- previous_message_count=num_messages,
314
- archival_memory_size=num_archival_memories,
315
- )
316
-
317
- diff = united_diff(curr_system_message_text, new_system_message_str)
318
- if len(diff) > 0:
319
- logger.debug(f"Rebuilding system with new memory...\nDiff:\n{diff}")
320
-
321
- new_system_message = self.message_manager.update_message_by_id(
322
- curr_system_message.id, message_update=MessageUpdate(content=new_system_message_str), actor=self.actor
323
- )
324
-
325
- # Skip pulling down the agent's memory again to save on a db call
326
- return [new_system_message] + in_context_messages[1:]
327
-
328
- else:
329
- return in_context_messages
330
- except:
331
- logger.exception(f"Failed to rebuild memory for agent id={agent_state.id} and actor=({self.actor.id}, {self.actor.name})")
332
- raise
333
-
334
304
  @trace_method
335
305
  async def _execute_tool(self, tool_name: str, tool_args: dict, agent_state: AgentState) -> Tuple[str, bool]:
336
306
  """
@@ -348,10 +318,6 @@ class LettaAgent(BaseAgent):
348
318
  results = await self._send_message_to_agents_matching_tags(**tool_args)
349
319
  log_event(name="finish_send_message_to_agents_matching_tags", attributes=tool_args)
350
320
  return json.dumps(results), True
351
- elif target_tool.tool_type == ToolType.EXTERNAL_COMPOSIO:
352
- log_event(name=f"start_composio_{tool_name}_execution", attributes=tool_args)
353
- log_event(name=f"finish_compsio_{tool_name}_execution", attributes=tool_args)
354
- return tool_execution_result.func_return, True
355
321
  else:
356
322
  tool_execution_manager = ToolExecutionManager(agent_state=agent_state, actor=self.actor)
357
323
  # TODO: Integrate sandbox result
@@ -416,3 +382,17 @@ class LettaAgent(BaseAgent):
416
382
  tasks = [asyncio.create_task(process_agent(agent_state=agent_state, message=message)) for agent_state in matching_agents]
417
383
  results = await asyncio.gather(*tasks)
418
384
  return results
385
+
386
+ def _load_last_function_response(self):
387
+ """Load the last function response from message history"""
388
+ in_context_messages = self.agent_manager.get_in_context_messages(agent_id=self.agent_id, actor=self.actor)
389
+ for msg in reversed(in_context_messages):
390
+ if msg.role == MessageRole.tool and msg.content and len(msg.content) == 1 and isinstance(msg.content[0], TextContent):
391
+ text_content = msg.content[0].text
392
+ try:
393
+ response_json = json.loads(text_content)
394
+ if response_json.get("message"):
395
+ return response_json["message"]
396
+ except (json.JSONDecodeError, KeyError):
397
+ raise ValueError(f"Invalid JSON format in message: {text_content}")
398
+ return None
@@ -1,11 +1,12 @@
1
1
  import json
2
2
  import uuid
3
3
  from dataclasses import dataclass
4
- from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
4
+ from typing import Any, AsyncGenerator, Dict, List, Optional, Sequence, Tuple, Union
5
5
 
6
6
  from aiomultiprocess import Pool
7
7
  from anthropic.types.beta.messages import BetaMessageBatchCanceledResult, BetaMessageBatchErroredResult, BetaMessageBatchSucceededResult
8
8
 
9
+ from letta.agents.base_agent import BaseAgent
9
10
  from letta.agents.helpers import _prepare_in_context_messages
10
11
  from letta.helpers import ToolRulesSolver
11
12
  from letta.helpers.datetime_helpers import get_utc_time
@@ -16,20 +17,20 @@ from letta.local_llm.constants import INNER_THOUGHTS_KWARG
16
17
  from letta.log import get_logger
17
18
  from letta.orm.enums import ToolType
18
19
  from letta.schemas.agent import AgentState, AgentStepState
19
- from letta.schemas.enums import AgentStepStatus, JobStatus, ProviderType
20
+ from letta.schemas.enums import AgentStepStatus, JobStatus, MessageStreamStatus, ProviderType
20
21
  from letta.schemas.job import JobUpdate
22
+ from letta.schemas.letta_message import LegacyLettaMessage, LettaMessage
21
23
  from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
22
24
  from letta.schemas.letta_request import LettaBatchRequest
23
- from letta.schemas.letta_response import LettaBatchResponse
25
+ from letta.schemas.letta_response import LettaBatchResponse, LettaResponse
24
26
  from letta.schemas.llm_batch_job import LLMBatchItem
25
- from letta.schemas.message import Message, MessageCreate, MessageUpdate
27
+ from letta.schemas.message import Message, MessageCreate
26
28
  from letta.schemas.openai.chat_completion_response import ToolCall as OpenAIToolCall
27
29
  from letta.schemas.sandbox_config import SandboxConfig, SandboxType
28
30
  from letta.schemas.user import User
29
31
  from letta.server.rest_api.utils import create_heartbeat_system_message, create_letta_messages_from_llm_response
30
32
  from letta.services.agent_manager import AgentManager
31
33
  from letta.services.block_manager import BlockManager
32
- from letta.services.helpers.agent_manager_helper import compile_system_message
33
34
  from letta.services.job_manager import JobManager
34
35
  from letta.services.llm_batch_manager import LLMBatchManager
35
36
  from letta.services.message_manager import MessageManager
@@ -38,7 +39,6 @@ from letta.services.sandbox_config_manager import SandboxConfigManager
38
39
  from letta.services.tool_executor.tool_execution_manager import ToolExecutionManager
39
40
  from letta.settings import tool_settings
40
41
  from letta.tracing import log_event, trace_method
41
- from letta.utils import united_diff
42
42
 
43
43
  logger = get_logger(__name__)
44
44
 
@@ -95,7 +95,7 @@ async def execute_tool_wrapper(params: ToolExecutionParams) -> Tuple[str, Tuple[
95
95
 
96
96
  # TODO: Limitations ->
97
97
  # TODO: Only works with anthropic for now
98
- class LettaAgentBatch:
98
+ class LettaAgentBatch(BaseAgent):
99
99
 
100
100
  def __init__(
101
101
  self,
@@ -539,43 +539,20 @@ class LettaAgentBatch:
539
539
  return in_context_messages
540
540
 
541
541
  # TODO: Make this a bullk function
542
- def _rebuild_memory(self, in_context_messages: List[Message], agent_state: AgentState) -> List[Message]:
543
- agent_state = self.agent_manager.refresh_memory(agent_state=agent_state, actor=self.actor)
544
-
545
- # TODO: This is a pretty brittle pattern established all over our code, need to get rid of this
546
- curr_system_message = in_context_messages[0]
547
- curr_memory_str = agent_state.memory.compile()
548
- curr_system_message_text = curr_system_message.content[0].text
549
- if curr_memory_str in curr_system_message_text:
550
- # NOTE: could this cause issues if a block is removed? (substring match would still work)
551
- logger.debug(
552
- f"Memory hasn't changed for agent id={agent_state.id} and actor=({self.actor.id}, {self.actor.name}), skipping system prompt rebuild"
553
- )
554
- return in_context_messages
555
-
556
- memory_edit_timestamp = get_utc_time()
557
-
558
- num_messages = self.message_manager.size(actor=self.actor, agent_id=agent_state.id)
559
- num_archival_memories = self.passage_manager.size(actor=self.actor, agent_id=agent_state.id)
560
-
561
- new_system_message_str = compile_system_message(
562
- system_prompt=agent_state.system,
563
- in_context_memory=agent_state.memory,
564
- in_context_memory_last_edit=memory_edit_timestamp,
565
- previous_message_count=num_messages,
566
- archival_memory_size=num_archival_memories,
567
- )
568
-
569
- diff = united_diff(curr_system_message_text, new_system_message_str)
570
- if len(diff) > 0:
571
- logger.debug(f"Rebuilding system with new memory...\nDiff:\n{diff}")
572
-
573
- new_system_message = self.message_manager.update_message_by_id(
574
- curr_system_message.id, message_update=MessageUpdate(content=new_system_message_str), actor=self.actor
575
- )
542
+ def _rebuild_memory(
543
+ self,
544
+ in_context_messages: List[Message],
545
+ agent_state: AgentState,
546
+ num_messages: int | None = None,
547
+ num_archival_memories: int | None = None,
548
+ ) -> List[Message]:
549
+ return super()._rebuild_memory(in_context_messages, agent_state)
576
550
 
577
- # Skip pulling down the agent's memory again to save on a db call
578
- return [new_system_message] + in_context_messages[1:]
551
+ # Not used in batch.
552
+ async def step(self, input_messages: List[MessageCreate], max_steps: int = 10) -> LettaResponse:
553
+ raise NotImplementedError
579
554
 
580
- else:
581
- return in_context_messages
555
+ async def step_stream(
556
+ self, input_messages: List[MessageCreate], max_steps: int = 10
557
+ ) -> AsyncGenerator[Union[LettaMessage, LegacyLettaMessage, MessageStreamStatus], None]:
558
+ raise NotImplementedError
@@ -22,7 +22,7 @@ from letta.orm.enums import ToolType
22
22
  from letta.schemas.agent import AgentState, AgentType
23
23
  from letta.schemas.enums import MessageRole
24
24
  from letta.schemas.letta_response import LettaResponse
25
- from letta.schemas.message import Message, MessageCreate, MessageUpdate
25
+ from letta.schemas.message import Message, MessageCreate
26
26
  from letta.schemas.openai.chat_completion_request import (
27
27
  AssistantMessage,
28
28
  ChatCompletionRequest,
@@ -47,7 +47,6 @@ from letta.services.passage_manager import PassageManager
47
47
  from letta.services.summarizer.enums import SummarizationMode
48
48
  from letta.services.summarizer.summarizer import Summarizer
49
49
  from letta.settings import model_settings
50
- from letta.utils import united_diff
51
50
 
52
51
  logger = get_logger(__name__)
53
52
 
@@ -293,48 +292,17 @@ class VoiceAgent(BaseAgent):
293
292
  agent_id=self.agent_id, message_ids=[m.id for m in new_in_context_messages], actor=self.actor
294
293
  )
295
294
 
296
- def _rebuild_memory(self, in_context_messages: List[Message], agent_state: AgentState) -> List[Message]:
297
- # Refresh memory
298
- # TODO: This only happens for the summary block
299
- # TODO: We want to extend this refresh to be general, and stick it in agent_manager
300
- block_ids = [block.id for block in agent_state.memory.blocks]
301
- agent_state.memory.blocks = self.block_manager.get_all_blocks_by_ids(block_ids=block_ids, actor=self.actor)
302
-
303
- # TODO: This is a pretty brittle pattern established all over our code, need to get rid of this
304
- curr_system_message = in_context_messages[0]
305
- curr_memory_str = agent_state.memory.compile()
306
- curr_system_message_text = curr_system_message.content[0].text
307
- if curr_memory_str in curr_system_message_text:
308
- # NOTE: could this cause issues if a block is removed? (substring match would still work)
309
- logger.debug(
310
- f"Memory hasn't changed for agent id={agent_state.id} and actor=({self.actor.id}, {self.actor.name}), skipping system prompt rebuild"
311
- )
312
- return in_context_messages
313
-
314
- memory_edit_timestamp = get_utc_time()
315
-
316
- new_system_message_str = compile_system_message(
317
- system_prompt=agent_state.system,
318
- in_context_memory=agent_state.memory,
319
- in_context_memory_last_edit=memory_edit_timestamp,
320
- previous_message_count=self.num_messages,
321
- archival_memory_size=self.num_archival_memories,
295
+ def _rebuild_memory(
296
+ self,
297
+ in_context_messages: List[Message],
298
+ agent_state: AgentState,
299
+ num_messages: int | None = None,
300
+ num_archival_memories: int | None = None,
301
+ ) -> List[Message]:
302
+ return super()._rebuild_memory(
303
+ in_context_messages, agent_state, num_messages=self.num_messages, num_archival_memories=self.num_archival_memories
322
304
  )
323
305
 
324
- diff = united_diff(curr_system_message_text, new_system_message_str)
325
- if len(diff) > 0:
326
- logger.debug(f"Rebuilding system with new memory...\nDiff:\n{diff}")
327
-
328
- new_system_message = self.message_manager.update_message_by_id(
329
- curr_system_message.id, message_update=MessageUpdate(content=new_system_message_str), actor=self.actor
330
- )
331
-
332
- # Skip pulling down the agent's memory again to save on a db call
333
- return [new_system_message] + in_context_messages[1:]
334
-
335
- else:
336
- return in_context_messages
337
-
338
306
  def _build_openai_request(self, openai_messages: List[Dict], agent_state: AgentState) -> ChatCompletionRequest:
339
307
  tool_schemas = self._build_tool_schemas(agent_state)
340
308
  tool_choice = "auto" if tool_schemas else None
@@ -466,9 +466,13 @@ def generate_tool_schema_for_mcp(
466
466
  name = mcp_tool.name
467
467
  description = mcp_tool.description
468
468
 
469
- assert "type" in parameters_schema
470
- assert "required" in parameters_schema
471
- assert "properties" in parameters_schema
469
+ assert "type" in parameters_schema, parameters_schema
470
+ assert "properties" in parameters_schema, parameters_schema
471
+ # assert "required" in parameters_schema, parameters_schema
472
+
473
+ # Zero-arg tools often omit "required" because nothing is required.
474
+ # Normalise so downstream code can treat it consistently.
475
+ parameters_schema.setdefault("required", [])
472
476
 
473
477
  # Add the optional heartbeat parameter
474
478
  if append_heartbeat:
@@ -997,10 +997,12 @@ def anthropic_chat_completions_process_stream(
997
997
  expect_reasoning_content=extended_thinking,
998
998
  name=name,
999
999
  message_index=message_idx,
1000
+ prev_message_type=prev_message_type,
1000
1001
  )
1001
- if message_type != prev_message_type and message_type is not None:
1002
+ if message_type != prev_message_type and message_type is not None and prev_message_type is not None:
1002
1003
  message_idx += 1
1003
- prev_message_type = message_type
1004
+ if message_type is not None:
1005
+ prev_message_type = message_type
1004
1006
  elif isinstance(stream_interface, AgentRefreshStreamingInterface):
1005
1007
  stream_interface.process_refresh(chat_completion_response)
1006
1008
  else:
letta/llm_api/openai.py CHANGED
@@ -325,10 +325,12 @@ def openai_chat_completions_process_stream(
325
325
  expect_reasoning_content=expect_reasoning_content,
326
326
  name=name,
327
327
  message_index=message_idx,
328
+ prev_message_type=prev_message_type,
328
329
  )
329
- if message_type != prev_message_type and message_type is not None:
330
+ if message_type != prev_message_type and message_type is not None and prev_message_type is not None:
330
331
  message_idx += 1
331
- prev_message_type = message_type
332
+ if message_type is not None:
333
+ prev_message_type = message_type
332
334
  elif isinstance(stream_interface, AgentRefreshStreamingInterface):
333
335
  stream_interface.process_refresh(chat_completion_response)
334
336
  else:
letta/orm/agents_tags.py CHANGED
@@ -1,4 +1,4 @@
1
- from sqlalchemy import ForeignKey, String, UniqueConstraint
1
+ from sqlalchemy import ForeignKey, Index, String, UniqueConstraint
2
2
  from sqlalchemy.orm import Mapped, mapped_column, relationship
3
3
 
4
4
  from letta.orm.base import Base
@@ -6,7 +6,10 @@ from letta.orm.base import Base
6
6
 
7
7
  class AgentsTags(Base):
8
8
  __tablename__ = "agents_tags"
9
- __table_args__ = (UniqueConstraint("agent_id", "tag", name="unique_agent_tag"),)
9
+ __table_args__ = (
10
+ UniqueConstraint("agent_id", "tag", name="unique_agent_tag"),
11
+ Index("ix_agents_tags_agent_id_tag", "agent_id", "tag"),
12
+ )
10
13
 
11
14
  # # agent generates its own id
12
15
  # # TODO: We want to migrate all the ORM models to do this, so we will need to move this to the SqlalchemyBase