letta-nightly 0.6.50.dev20250411104155__py3-none-any.whl → 0.6.52.dev20250412051016__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +23 -32
- letta/agents/base_agent.py +17 -6
- letta/agents/ephemeral_agent.py +5 -6
- letta/agents/ephemeral_memory_agent.py +8 -10
- letta/agents/helpers.py +6 -6
- letta/agents/letta_agent.py +9 -10
- letta/agents/letta_agent_batch.py +164 -0
- letta/agents/voice_agent.py +8 -8
- letta/functions/function_sets/base.py +1 -1
- letta/helpers/converters.py +5 -2
- letta/helpers/tool_rule_solver.py +12 -2
- letta/jobs/scheduler.py +13 -11
- letta/llm_api/anthropic.py +0 -1
- letta/llm_api/anthropic_client.py +61 -23
- letta/llm_api/cohere.py +1 -1
- letta/llm_api/google_ai_client.py +48 -13
- letta/llm_api/google_vertex_client.py +19 -1
- letta/llm_api/llm_client_base.py +13 -5
- letta/llm_api/openai.py +4 -3
- letta/llm_api/openai_client.py +18 -10
- letta/orm/organization.py +4 -2
- letta/orm/sqlalchemy_base.py +3 -0
- letta/schemas/enums.py +1 -0
- letta/schemas/group.py +30 -1
- letta/schemas/identity.py +10 -0
- letta/schemas/letta_request.py +4 -0
- letta/schemas/letta_response.py +9 -1
- letta/schemas/llm_config.py +10 -0
- letta/schemas/message.py +21 -12
- letta/schemas/openai/chat_completion_request.py +1 -0
- letta/schemas/tool_rule.py +14 -1
- letta/server/rest_api/interface.py +5 -4
- letta/server/rest_api/routers/v1/agents.py +20 -13
- letta/server/rest_api/routers/v1/groups.py +1 -1
- letta/server/rest_api/routers/v1/identities.py +23 -2
- letta/server/rest_api/utils.py +20 -22
- letta/server/server.py +34 -21
- letta/services/agent_manager.py +13 -9
- letta/services/block_manager.py +2 -4
- letta/services/identity_manager.py +21 -5
- letta/services/llm_batch_manager.py +21 -1
- letta/services/summarizer/summarizer.py +11 -4
- letta/services/tool_manager.py +1 -1
- letta/settings.py +1 -0
- letta/utils.py +2 -2
- {letta_nightly-0.6.50.dev20250411104155.dist-info → letta_nightly-0.6.52.dev20250412051016.dist-info}/METADATA +3 -3
- {letta_nightly-0.6.50.dev20250411104155.dist-info → letta_nightly-0.6.52.dev20250412051016.dist-info}/RECORD +51 -50
- {letta_nightly-0.6.50.dev20250411104155.dist-info → letta_nightly-0.6.52.dev20250412051016.dist-info}/LICENSE +0 -0
- {letta_nightly-0.6.50.dev20250411104155.dist-info → letta_nightly-0.6.52.dev20250412051016.dist-info}/WHEEL +0 -0
- {letta_nightly-0.6.50.dev20250411104155.dist-info → letta_nightly-0.6.52.dev20250412051016.dist-info}/entry_points.txt +0 -0
letta/__init__.py
CHANGED
letta/agent.py
CHANGED
@@ -110,19 +110,19 @@ class Agent(BaseAgent):
|
|
110
110
|
self.user = user
|
111
111
|
|
112
112
|
# initialize a tool rules solver
|
113
|
-
if agent_state.tool_rules:
|
114
|
-
# if there are tool rules, print out a warning
|
115
|
-
for rule in agent_state.tool_rules:
|
116
|
-
if not isinstance(rule, TerminalToolRule):
|
117
|
-
warnings.warn("Tool rules only work reliably for the latest OpenAI models that support structured outputs.")
|
118
|
-
break
|
119
|
-
|
120
113
|
self.tool_rules_solver = ToolRulesSolver(tool_rules=agent_state.tool_rules)
|
121
114
|
|
122
115
|
# gpt-4, gpt-3.5-turbo, ...
|
123
116
|
self.model = self.agent_state.llm_config.model
|
124
117
|
self.supports_structured_output = check_supports_structured_output(model=self.model, tool_rules=agent_state.tool_rules)
|
125
118
|
|
119
|
+
# if there are tool rules, print out a warning
|
120
|
+
if not self.supports_structured_output and agent_state.tool_rules:
|
121
|
+
for rule in agent_state.tool_rules:
|
122
|
+
if not isinstance(rule, TerminalToolRule):
|
123
|
+
warnings.warn("Tool rules only work reliably for model backends that support structured outputs (e.g. OpenAI gpt-4o).")
|
124
|
+
break
|
125
|
+
|
126
126
|
# state managers
|
127
127
|
self.block_manager = BlockManager()
|
128
128
|
|
@@ -236,17 +236,15 @@ class Agent(BaseAgent):
|
|
236
236
|
|
237
237
|
# Extend conversation with function response
|
238
238
|
function_response = package_function_response(False, error_msg)
|
239
|
-
new_message = Message
|
239
|
+
new_message = Message(
|
240
240
|
agent_id=self.agent_state.id,
|
241
|
-
|
241
|
+
# Base info OpenAI-style
|
242
242
|
model=self.model,
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
},
|
249
|
-
name=self.agent_state.name,
|
243
|
+
role="tool",
|
244
|
+
name=function_name, # NOTE: when role is 'tool', the 'name' is the function name, not agent name
|
245
|
+
content=[TextContent(text=function_response)],
|
246
|
+
tool_call_id=tool_call_id,
|
247
|
+
# Letta extras
|
250
248
|
tool_returns=tool_returns,
|
251
249
|
group_id=group_id,
|
252
250
|
)
|
@@ -386,6 +384,7 @@ class Agent(BaseAgent):
|
|
386
384
|
delay = min(backoff_factor * (2 ** (attempt - 1)), max_delay)
|
387
385
|
warnings.warn(f"Attempt {attempt} failed: {ve}. Retrying in {delay} seconds...")
|
388
386
|
time.sleep(delay)
|
387
|
+
continue
|
389
388
|
|
390
389
|
except Exception as e:
|
391
390
|
# For non-retryable errors, exit immediately
|
@@ -397,6 +396,7 @@ class Agent(BaseAgent):
|
|
397
396
|
# trigger summarization
|
398
397
|
log_telemetry(self.logger, "_get_ai_reply summarize_messages_inplace")
|
399
398
|
self.summarize_messages_inplace()
|
399
|
+
|
400
400
|
# return the response
|
401
401
|
return response
|
402
402
|
|
@@ -455,7 +455,6 @@ class Agent(BaseAgent):
|
|
455
455
|
Message.dict_to_message(
|
456
456
|
id=response_message_id,
|
457
457
|
agent_id=self.agent_state.id,
|
458
|
-
user_id=self.agent_state.created_by_id,
|
459
458
|
model=self.model,
|
460
459
|
openai_message_dict=response_message.model_dump(),
|
461
460
|
name=self.agent_state.name,
|
@@ -659,17 +658,15 @@ class Agent(BaseAgent):
|
|
659
658
|
else None
|
660
659
|
)
|
661
660
|
messages.append(
|
662
|
-
Message
|
661
|
+
Message(
|
663
662
|
agent_id=self.agent_state.id,
|
664
|
-
|
663
|
+
# Base info OpenAI-style
|
665
664
|
model=self.model,
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
},
|
672
|
-
name=self.agent_state.name,
|
665
|
+
role="tool",
|
666
|
+
name=function_name, # NOTE: when role is 'tool', the 'name' is the function name, not agent name
|
667
|
+
content=[TextContent(text=function_response)],
|
668
|
+
tool_call_id=tool_call_id,
|
669
|
+
# Letta extras
|
673
670
|
tool_returns=[tool_return] if sandbox_run_result else None,
|
674
671
|
group_id=group_id,
|
675
672
|
)
|
@@ -686,7 +683,6 @@ class Agent(BaseAgent):
|
|
686
683
|
Message.dict_to_message(
|
687
684
|
id=response_message_id,
|
688
685
|
agent_id=self.agent_state.id,
|
689
|
-
user_id=self.agent_state.created_by_id,
|
690
686
|
model=self.model,
|
691
687
|
openai_message_dict=response_message.model_dump(),
|
692
688
|
name=self.agent_state.name,
|
@@ -777,7 +773,6 @@ class Agent(BaseAgent):
|
|
777
773
|
assert self.agent_state.created_by_id is not None
|
778
774
|
next_input_message = Message.dict_to_message(
|
779
775
|
agent_id=self.agent_state.id,
|
780
|
-
user_id=self.agent_state.created_by_id,
|
781
776
|
model=self.model,
|
782
777
|
openai_message_dict={
|
783
778
|
"role": "user", # TODO: change to system?
|
@@ -789,7 +784,6 @@ class Agent(BaseAgent):
|
|
789
784
|
assert self.agent_state.created_by_id is not None
|
790
785
|
next_input_message = Message.dict_to_message(
|
791
786
|
agent_id=self.agent_state.id,
|
792
|
-
user_id=self.agent_state.created_by_id,
|
793
787
|
model=self.model,
|
794
788
|
openai_message_dict={
|
795
789
|
"role": "user", # TODO: change to system?
|
@@ -801,7 +795,6 @@ class Agent(BaseAgent):
|
|
801
795
|
assert self.agent_state.created_by_id is not None
|
802
796
|
next_input_message = Message.dict_to_message(
|
803
797
|
agent_id=self.agent_state.id,
|
804
|
-
user_id=self.agent_state.created_by_id,
|
805
798
|
model=self.model,
|
806
799
|
openai_message_dict={
|
807
800
|
"role": "user", # TODO: change to system?
|
@@ -1057,7 +1050,6 @@ class Agent(BaseAgent):
|
|
1057
1050
|
assert self.agent_state.created_by_id is not None, "User ID is not set"
|
1058
1051
|
user_message = Message.dict_to_message(
|
1059
1052
|
agent_id=self.agent_state.id,
|
1060
|
-
user_id=self.agent_state.created_by_id,
|
1061
1053
|
model=self.model,
|
1062
1054
|
openai_message_dict=openai_message_dict,
|
1063
1055
|
# created_at=timestamp,
|
@@ -1117,7 +1109,6 @@ class Agent(BaseAgent):
|
|
1117
1109
|
messages=[
|
1118
1110
|
Message.dict_to_message(
|
1119
1111
|
agent_id=self.agent_state.id,
|
1120
|
-
user_id=self.agent_state.created_by_id,
|
1121
1112
|
model=self.model,
|
1122
1113
|
openai_message_dict=packed_summary_message,
|
1123
1114
|
)
|
letta/agents/base_agent.py
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
from abc import ABC, abstractmethod
|
2
|
-
from typing import Any, AsyncGenerator, Optional, Union
|
2
|
+
from typing import Any, AsyncGenerator, List, Optional, Union
|
3
3
|
|
4
4
|
import openai
|
5
5
|
|
6
6
|
from letta.schemas.enums import MessageStreamStatus
|
7
|
-
from letta.schemas.letta_message import LegacyLettaMessage, LettaMessage
|
7
|
+
from letta.schemas.letta_message import LegacyLettaMessage, LettaMessage
|
8
|
+
from letta.schemas.letta_message_content import TextContent
|
8
9
|
from letta.schemas.letta_response import LettaResponse
|
10
|
+
from letta.schemas.message import MessageCreate
|
9
11
|
from letta.schemas.user import User
|
10
12
|
from letta.services.agent_manager import AgentManager
|
11
13
|
from letta.services.message_manager import MessageManager
|
@@ -33,7 +35,7 @@ class BaseAgent(ABC):
|
|
33
35
|
self.actor = actor
|
34
36
|
|
35
37
|
@abstractmethod
|
36
|
-
async def step(self,
|
38
|
+
async def step(self, input_messages: List[MessageCreate], max_steps: int = 10) -> LettaResponse:
|
37
39
|
"""
|
38
40
|
Main execution loop for the agent.
|
39
41
|
"""
|
@@ -41,15 +43,24 @@ class BaseAgent(ABC):
|
|
41
43
|
|
42
44
|
@abstractmethod
|
43
45
|
async def step_stream(
|
44
|
-
self,
|
46
|
+
self, input_messages: List[MessageCreate], max_steps: int = 10
|
45
47
|
) -> AsyncGenerator[Union[LettaMessage, LegacyLettaMessage, MessageStreamStatus], None]:
|
46
48
|
"""
|
47
49
|
Main streaming execution loop for the agent.
|
48
50
|
"""
|
49
51
|
raise NotImplementedError
|
50
52
|
|
51
|
-
def pre_process_input_message(self,
|
53
|
+
def pre_process_input_message(self, input_messages: List[MessageCreate]) -> Any:
|
52
54
|
"""
|
53
55
|
Pre-process function to run on the input_message.
|
54
56
|
"""
|
55
|
-
|
57
|
+
|
58
|
+
def get_content(message: MessageCreate) -> str:
|
59
|
+
if isinstance(message.content, str):
|
60
|
+
return message.content
|
61
|
+
elif message.content and len(message.content) == 1 and isinstance(message.content[0], TextContent):
|
62
|
+
return message.content[0].text
|
63
|
+
else:
|
64
|
+
return ""
|
65
|
+
|
66
|
+
return [{"role": input_message.role, "content": get_content(input_message)} for input_message in input_messages]
|
letta/agents/ephemeral_agent.py
CHANGED
@@ -5,9 +5,8 @@ import openai
|
|
5
5
|
from letta.agents.base_agent import BaseAgent
|
6
6
|
from letta.schemas.agent import AgentState
|
7
7
|
from letta.schemas.enums import MessageRole
|
8
|
-
from letta.schemas.letta_message import UserMessage
|
9
8
|
from letta.schemas.letta_message_content import TextContent
|
10
|
-
from letta.schemas.message import Message
|
9
|
+
from letta.schemas.message import Message, MessageCreate
|
11
10
|
from letta.schemas.openai.chat_completion_request import ChatCompletionRequest
|
12
11
|
from letta.schemas.user import User
|
13
12
|
from letta.services.agent_manager import AgentManager
|
@@ -37,15 +36,15 @@ class EphemeralAgent(BaseAgent):
|
|
37
36
|
actor=actor,
|
38
37
|
)
|
39
38
|
|
40
|
-
async def step(self,
|
39
|
+
async def step(self, input_messages: List[MessageCreate]) -> List[Message]:
|
41
40
|
"""
|
42
41
|
Synchronous method that takes a user's input text and returns a summary from OpenAI.
|
43
42
|
Returns a list of ephemeral Message objects containing both the user text and the assistant summary.
|
44
43
|
"""
|
45
44
|
agent_state = self.agent_manager.get_agent_by_id(agent_id=self.agent_id, actor=self.actor)
|
46
45
|
|
47
|
-
|
48
|
-
request = self._build_openai_request(
|
46
|
+
openai_messages = self.pre_process_input_message(input_messages=input_messages)
|
47
|
+
request = self._build_openai_request(openai_messages, agent_state)
|
49
48
|
|
50
49
|
chat_completion = await self.openai_client.chat.completions.create(**request.model_dump(exclude_unset=True))
|
51
50
|
|
@@ -66,7 +65,7 @@ class EphemeralAgent(BaseAgent):
|
|
66
65
|
)
|
67
66
|
return openai_request
|
68
67
|
|
69
|
-
async def step_stream(self,
|
68
|
+
async def step_stream(self, input_messages: List[MessageCreate]) -> AsyncGenerator[str, None]:
|
70
69
|
"""
|
71
70
|
This agent is synchronous-only. If called in an async context, raise an error.
|
72
71
|
"""
|
@@ -7,9 +7,8 @@ from letta.helpers.tool_execution_helper import enable_strict_mode
|
|
7
7
|
from letta.orm.enums import ToolType
|
8
8
|
from letta.schemas.agent import AgentState
|
9
9
|
from letta.schemas.enums import MessageRole
|
10
|
-
from letta.schemas.letta_message import UserMessage
|
11
10
|
from letta.schemas.letta_message_content import TextContent
|
12
|
-
from letta.schemas.message import Message
|
11
|
+
from letta.schemas.message import Message, MessageCreate
|
13
12
|
from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool
|
14
13
|
from letta.schemas.user import User
|
15
14
|
from letta.services.agent_manager import AgentManager
|
@@ -38,15 +37,15 @@ class EphemeralMemoryAgent(BaseAgent):
|
|
38
37
|
actor=actor,
|
39
38
|
)
|
40
39
|
|
41
|
-
async def step(self,
|
40
|
+
async def step(self, input_messages: List[MessageCreate]) -> List[Message]:
|
42
41
|
"""
|
43
42
|
Synchronous method that takes a user's input text and returns a summary from OpenAI.
|
44
43
|
Returns a list of ephemeral Message objects containing both the user text and the assistant summary.
|
45
44
|
"""
|
46
45
|
agent_state = self.agent_manager.get_agent_by_id(agent_id=self.agent_id, actor=self.actor)
|
47
46
|
|
48
|
-
|
49
|
-
request = self._build_openai_request(
|
47
|
+
openai_messages = self.pre_process_input_message(input_messages=input_messages)
|
48
|
+
request = self._build_openai_request(openai_messages, agent_state)
|
50
49
|
|
51
50
|
chat_completion = await self.openai_client.chat.completions.create(**request.model_dump(exclude_unset=True))
|
52
51
|
|
@@ -57,7 +56,8 @@ class EphemeralMemoryAgent(BaseAgent):
|
|
57
56
|
)
|
58
57
|
]
|
59
58
|
|
60
|
-
def pre_process_input_message(self,
|
59
|
+
def pre_process_input_message(self, input_messages: List[MessageCreate]) -> List[Dict]:
|
60
|
+
input_message = input_messages[0]
|
61
61
|
input_prompt_augmented = f"""
|
62
62
|
You are a memory recall agent whose job is to comb through a large set of messages and write relevant memories in relation to a user query.
|
63
63
|
Your response will directly populate a "memory block" called "human" that describes the user, that will be used to answer more questions in the future.
|
@@ -78,9 +78,7 @@ class EphemeralMemoryAgent(BaseAgent):
|
|
78
78
|
Your response:
|
79
79
|
"""
|
80
80
|
|
81
|
-
|
82
|
-
# print(input_prompt_augmented)
|
83
|
-
return input_message.model_dump()
|
81
|
+
return [{"role": "user", "content": input_prompt_augmented}]
|
84
82
|
|
85
83
|
def _format_messages_llm_friendly(self):
|
86
84
|
messages = self.message_manager.list_messages_for_agent(agent_id=self.agent_id, actor=self.actor)
|
@@ -107,7 +105,7 @@ class EphemeralMemoryAgent(BaseAgent):
|
|
107
105
|
|
108
106
|
return [Tool(type="function", function=enable_strict_mode(t.json_schema)) for t in tools]
|
109
107
|
|
110
|
-
async def step_stream(self,
|
108
|
+
async def step_stream(self, input_messages: List[MessageCreate]) -> AsyncGenerator[str, None]:
|
111
109
|
"""
|
112
110
|
This agent is synchronous-only. If called in an async context, raise an error.
|
113
111
|
"""
|
letta/agents/helpers.py
CHANGED
@@ -1,11 +1,11 @@
|
|
1
|
-
from typing import
|
1
|
+
from typing import List, Tuple
|
2
2
|
|
3
3
|
from letta.schemas.agent import AgentState
|
4
4
|
from letta.schemas.letta_response import LettaResponse
|
5
|
-
from letta.schemas.message import Message
|
5
|
+
from letta.schemas.message import Message, MessageCreate
|
6
6
|
from letta.schemas.usage import LettaUsageStatistics
|
7
7
|
from letta.schemas.user import User
|
8
|
-
from letta.server.rest_api.utils import
|
8
|
+
from letta.server.rest_api.utils import create_input_messages
|
9
9
|
from letta.services.message_manager import MessageManager
|
10
10
|
|
11
11
|
|
@@ -20,13 +20,13 @@ def _create_letta_response(new_in_context_messages: list[Message], use_assistant
|
|
20
20
|
|
21
21
|
|
22
22
|
def _prepare_in_context_messages(
|
23
|
-
|
23
|
+
input_messages: List[MessageCreate], agent_state: AgentState, message_manager: MessageManager, actor: User
|
24
24
|
) -> Tuple[List[Message], List[Message]]:
|
25
25
|
"""
|
26
26
|
Prepares in-context messages for an agent, based on the current state and a new user input.
|
27
27
|
|
28
28
|
Args:
|
29
|
-
|
29
|
+
input_messages (List[MessageCreate]): The new user input messages to process.
|
30
30
|
agent_state (AgentState): The current state of the agent, including message buffer config.
|
31
31
|
message_manager (MessageManager): The manager used to retrieve and create messages.
|
32
32
|
actor (User): The user performing the action, used for access control and attribution.
|
@@ -46,7 +46,7 @@ def _prepare_in_context_messages(
|
|
46
46
|
|
47
47
|
# Create a new user message from the input and store it
|
48
48
|
new_in_context_messages = message_manager.create_many_messages(
|
49
|
-
|
49
|
+
create_input_messages(input_messages=input_messages, agent_id=agent_state.id, actor=actor), actor=actor
|
50
50
|
)
|
51
51
|
|
52
52
|
return current_in_context_messages, new_in_context_messages
|
letta/agents/letta_agent.py
CHANGED
@@ -18,12 +18,11 @@ from letta.local_llm.constants import INNER_THOUGHTS_KWARG
|
|
18
18
|
from letta.log import get_logger
|
19
19
|
from letta.orm.enums import ToolType
|
20
20
|
from letta.schemas.agent import AgentState
|
21
|
-
from letta.schemas.enums import MessageStreamStatus
|
21
|
+
from letta.schemas.enums import MessageRole, MessageStreamStatus
|
22
22
|
from letta.schemas.letta_message import AssistantMessage
|
23
23
|
from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
|
24
24
|
from letta.schemas.letta_response import LettaResponse
|
25
|
-
from letta.schemas.message import Message, MessageUpdate
|
26
|
-
from letta.schemas.openai.chat_completion_request import UserMessage
|
25
|
+
from letta.schemas.message import Message, MessageCreate, MessageUpdate
|
27
26
|
from letta.schemas.openai.chat_completion_response import ToolCall
|
28
27
|
from letta.schemas.user import User
|
29
28
|
from letta.server.rest_api.utils import create_letta_messages_from_llm_response
|
@@ -60,11 +59,10 @@ class LettaAgent(BaseAgent):
|
|
60
59
|
self.use_assistant_message = use_assistant_message
|
61
60
|
|
62
61
|
@trace_method
|
63
|
-
async def step(self,
|
64
|
-
input_message = self.pre_process_input_message(input_message)
|
62
|
+
async def step(self, input_messages: List[MessageCreate], max_steps: int = 10) -> LettaResponse:
|
65
63
|
agent_state = self.agent_manager.get_agent_by_id(self.agent_id, actor=self.actor)
|
66
64
|
current_in_context_messages, new_in_context_messages = _prepare_in_context_messages(
|
67
|
-
|
65
|
+
input_messages, agent_state, self.message_manager, self.actor
|
68
66
|
)
|
69
67
|
tool_rules_solver = ToolRulesSolver(agent_state.tool_rules)
|
70
68
|
llm_client = LLMClient.create(
|
@@ -96,16 +94,15 @@ class LettaAgent(BaseAgent):
|
|
96
94
|
|
97
95
|
@trace_method
|
98
96
|
async def step_stream(
|
99
|
-
self,
|
97
|
+
self, input_messages: List[MessageCreate], max_steps: int = 10, use_assistant_message: bool = False
|
100
98
|
) -> AsyncGenerator[str, None]:
|
101
99
|
"""
|
102
100
|
Main streaming loop that yields partial tokens.
|
103
101
|
Whenever we detect a tool call, we yield from _handle_ai_response as well.
|
104
102
|
"""
|
105
|
-
input_message = self.pre_process_input_message(input_message)
|
106
103
|
agent_state = self.agent_manager.get_agent_by_id(self.agent_id, actor=self.actor)
|
107
104
|
current_in_context_messages, new_in_context_messages = _prepare_in_context_messages(
|
108
|
-
|
105
|
+
input_messages, agent_state, self.message_manager, self.actor
|
109
106
|
)
|
110
107
|
tool_rules_solver = ToolRulesSolver(agent_state.tool_rules)
|
111
108
|
llm_client = LLMClient.create(
|
@@ -362,7 +359,9 @@ class LettaAgent(BaseAgent):
|
|
362
359
|
f"{message}"
|
363
360
|
)
|
364
361
|
|
365
|
-
letta_response = await letta_agent.step(
|
362
|
+
letta_response = await letta_agent.step(
|
363
|
+
[MessageCreate(role=MessageRole.system, content=[TextContent(text=augmented_message)])]
|
364
|
+
)
|
366
365
|
messages = letta_response.messages
|
367
366
|
|
368
367
|
send_message_content = [message.content for message in messages if isinstance(message, AssistantMessage)]
|
@@ -0,0 +1,164 @@
|
|
1
|
+
from typing import Dict, List
|
2
|
+
|
3
|
+
from letta.agents.helpers import _prepare_in_context_messages
|
4
|
+
from letta.helpers import ToolRulesSolver
|
5
|
+
from letta.helpers.datetime_helpers import get_utc_time
|
6
|
+
from letta.helpers.tool_execution_helper import enable_strict_mode
|
7
|
+
from letta.llm_api.llm_client import LLMClient
|
8
|
+
from letta.log import get_logger
|
9
|
+
from letta.orm.enums import ToolType
|
10
|
+
from letta.schemas.agent import AgentState, AgentStepState
|
11
|
+
from letta.schemas.enums import JobStatus, ProviderType
|
12
|
+
from letta.schemas.letta_request import LettaBatchRequest
|
13
|
+
from letta.schemas.letta_response import LettaBatchResponse
|
14
|
+
from letta.schemas.message import Message, MessageCreate, MessageUpdate
|
15
|
+
from letta.schemas.user import User
|
16
|
+
from letta.services.agent_manager import AgentManager
|
17
|
+
from letta.services.block_manager import BlockManager
|
18
|
+
from letta.services.helpers.agent_manager_helper import compile_system_message
|
19
|
+
from letta.services.llm_batch_manager import LLMBatchManager
|
20
|
+
from letta.services.message_manager import MessageManager
|
21
|
+
from letta.services.passage_manager import PassageManager
|
22
|
+
from letta.utils import united_diff
|
23
|
+
|
24
|
+
logger = get_logger(__name__)
|
25
|
+
|
26
|
+
|
27
|
+
# TODO: Limitations ->
|
28
|
+
# TODO: Only works with anthropic for now
|
29
|
+
class LettaAgentBatch:
|
30
|
+
|
31
|
+
def __init__(
|
32
|
+
self,
|
33
|
+
batch_id: str,
|
34
|
+
message_manager: MessageManager,
|
35
|
+
agent_manager: AgentManager,
|
36
|
+
block_manager: BlockManager,
|
37
|
+
passage_manager: PassageManager,
|
38
|
+
batch_manager: LLMBatchManager,
|
39
|
+
actor: User,
|
40
|
+
use_assistant_message: bool = True,
|
41
|
+
max_steps: int = 10,
|
42
|
+
):
|
43
|
+
self.batch_id = batch_id
|
44
|
+
self.message_manager = message_manager
|
45
|
+
self.agent_manager = agent_manager
|
46
|
+
self.block_manager = block_manager
|
47
|
+
self.passage_manager = passage_manager
|
48
|
+
self.batch_manager = batch_manager
|
49
|
+
self.use_assistant_message = use_assistant_message
|
50
|
+
self.actor = actor
|
51
|
+
self.max_steps = max_steps
|
52
|
+
|
53
|
+
async def step_until_request(
|
54
|
+
self, batch_requests: List[LettaBatchRequest], agent_step_state_mapping: Dict[str, AgentStepState]
|
55
|
+
) -> LettaBatchResponse:
|
56
|
+
agent_messages_mapping: Dict[str, List[Message]] = {}
|
57
|
+
agent_tools_mapping: Dict[str, List[dict]] = {}
|
58
|
+
agent_states = []
|
59
|
+
|
60
|
+
for batch_request in batch_requests:
|
61
|
+
agent_id = batch_request.agent_id
|
62
|
+
agent_state = self.agent_manager.get_agent_by_id(agent_id, actor=self.actor)
|
63
|
+
agent_states.append(agent_state)
|
64
|
+
agent_messages_mapping[agent_id] = self.get_in_context_messages_per_agent(
|
65
|
+
agent_state=agent_state, input_messages=batch_request.messages
|
66
|
+
)
|
67
|
+
agent_tools_mapping[agent_id] = self.prepare_tools_per_agent(
|
68
|
+
agent_state, agent_step_state_mapping.get(agent_id).tool_rules_solver
|
69
|
+
)
|
70
|
+
|
71
|
+
# TODO: This is a hack, this is because LLM client expects a LLM config
|
72
|
+
# TODO: But that doesn't really work in batch land
|
73
|
+
# TODO: @caren will factor this out
|
74
|
+
llm_client = LLMClient.create(
|
75
|
+
llm_config=agent_states[0].llm_config,
|
76
|
+
put_inner_thoughts_first=True,
|
77
|
+
)
|
78
|
+
agent_llm_config_mapping = {agent_state.id: agent_state.llm_config for agent_state in agent_states}
|
79
|
+
batch_response = await llm_client.send_llm_batch_request_async(
|
80
|
+
agent_messages_mapping=agent_messages_mapping,
|
81
|
+
agent_tools_mapping=agent_tools_mapping,
|
82
|
+
agent_llm_config_mapping=agent_llm_config_mapping,
|
83
|
+
)
|
84
|
+
|
85
|
+
# Write the response into the jobs table, where it will get picked up by the next cron run
|
86
|
+
batch_job = self.batch_manager.create_batch_job(
|
87
|
+
llm_provider=ProviderType.anthropic, # TODO: Expand to more
|
88
|
+
create_batch_response=batch_response,
|
89
|
+
actor=self.actor,
|
90
|
+
status=JobStatus.running,
|
91
|
+
)
|
92
|
+
|
93
|
+
# TODO: Make this much more efficient by doing creates in bulk
|
94
|
+
for agent_state in agent_states:
|
95
|
+
agent_step_state = agent_step_state_mapping.get(agent_state.id)
|
96
|
+
self.batch_manager.create_batch_item(
|
97
|
+
batch_id=batch_job.id,
|
98
|
+
agent_id=agent_state.id,
|
99
|
+
llm_config=agent_state.llm_config,
|
100
|
+
actor=self.actor,
|
101
|
+
step_state=agent_step_state,
|
102
|
+
)
|
103
|
+
|
104
|
+
return LettaBatchResponse(
|
105
|
+
batch_id=batch_job.id, status=batch_job.status, last_polled_at=get_utc_time(), created_at=batch_job.created_at
|
106
|
+
)
|
107
|
+
|
108
|
+
async def resume_step_after_request(self, batch_id: str):
|
109
|
+
pass
|
110
|
+
|
111
|
+
def prepare_tools_per_agent(self, agent_state: AgentState, tool_rules_solver: ToolRulesSolver) -> List[dict]:
|
112
|
+
tools = [t for t in agent_state.tools if t.tool_type in {ToolType.CUSTOM, ToolType.LETTA_CORE, ToolType.LETTA_MEMORY_CORE}]
|
113
|
+
valid_tool_names = tool_rules_solver.get_allowed_tool_names(available_tools=set([t.name for t in tools]))
|
114
|
+
return [enable_strict_mode(t.json_schema) for t in tools if t.name in set(valid_tool_names)]
|
115
|
+
|
116
|
+
def get_in_context_messages_per_agent(self, agent_state: AgentState, input_messages: List[MessageCreate]) -> List[Message]:
|
117
|
+
current_in_context_messages, new_in_context_messages = _prepare_in_context_messages(
|
118
|
+
input_messages, agent_state, self.message_manager, self.actor
|
119
|
+
)
|
120
|
+
|
121
|
+
in_context_messages = self._rebuild_memory(current_in_context_messages + new_in_context_messages, agent_state)
|
122
|
+
return in_context_messages
|
123
|
+
|
124
|
+
# TODO: Make this a bullk function
|
125
|
+
def _rebuild_memory(self, in_context_messages: List[Message], agent_state: AgentState) -> List[Message]:
|
126
|
+
agent_state = self.agent_manager.refresh_memory(agent_state=agent_state, actor=self.actor)
|
127
|
+
|
128
|
+
# TODO: This is a pretty brittle pattern established all over our code, need to get rid of this
|
129
|
+
curr_system_message = in_context_messages[0]
|
130
|
+
curr_memory_str = agent_state.memory.compile()
|
131
|
+
curr_system_message_text = curr_system_message.content[0].text
|
132
|
+
if curr_memory_str in curr_system_message_text:
|
133
|
+
# NOTE: could this cause issues if a block is removed? (substring match would still work)
|
134
|
+
logger.debug(
|
135
|
+
f"Memory hasn't changed for agent id={agent_state.id} and actor=({self.actor.id}, {self.actor.name}), skipping system prompt rebuild"
|
136
|
+
)
|
137
|
+
return in_context_messages
|
138
|
+
|
139
|
+
memory_edit_timestamp = get_utc_time()
|
140
|
+
|
141
|
+
num_messages = self.message_manager.size(actor=self.actor, agent_id=agent_state.id)
|
142
|
+
num_archival_memories = self.passage_manager.size(actor=self.actor, agent_id=agent_state.id)
|
143
|
+
|
144
|
+
new_system_message_str = compile_system_message(
|
145
|
+
system_prompt=agent_state.system,
|
146
|
+
in_context_memory=agent_state.memory,
|
147
|
+
in_context_memory_last_edit=memory_edit_timestamp,
|
148
|
+
previous_message_count=num_messages,
|
149
|
+
archival_memory_size=num_archival_memories,
|
150
|
+
)
|
151
|
+
|
152
|
+
diff = united_diff(curr_system_message_text, new_system_message_str)
|
153
|
+
if len(diff) > 0:
|
154
|
+
logger.debug(f"Rebuilding system with new memory...\nDiff:\n{diff}")
|
155
|
+
|
156
|
+
new_system_message = self.message_manager.update_message_by_id(
|
157
|
+
curr_system_message.id, message_update=MessageUpdate(content=new_system_message_str), actor=self.actor
|
158
|
+
)
|
159
|
+
|
160
|
+
# Skip pulling down the agent's memory again to save on a db call
|
161
|
+
return [new_system_message] + in_context_messages[1:]
|
162
|
+
|
163
|
+
else:
|
164
|
+
return in_context_messages
|
letta/agents/voice_agent.py
CHANGED
@@ -19,8 +19,9 @@ from letta.log import get_logger
|
|
19
19
|
from letta.orm.enums import ToolType
|
20
20
|
from letta.schemas.agent import AgentState
|
21
21
|
from letta.schemas.block import BlockUpdate
|
22
|
+
from letta.schemas.letta_message_content import TextContent
|
22
23
|
from letta.schemas.letta_response import LettaResponse
|
23
|
-
from letta.schemas.message import Message, MessageUpdate
|
24
|
+
from letta.schemas.message import Message, MessageCreate, MessageUpdate
|
24
25
|
from letta.schemas.openai.chat_completion_request import (
|
25
26
|
AssistantMessage,
|
26
27
|
ChatCompletionRequest,
|
@@ -34,8 +35,8 @@ from letta.schemas.user import User
|
|
34
35
|
from letta.server.rest_api.utils import (
|
35
36
|
convert_letta_messages_to_openai,
|
36
37
|
create_assistant_messages_from_openai_response,
|
38
|
+
create_input_messages,
|
37
39
|
create_letta_messages_from_llm_response,
|
38
|
-
create_user_message,
|
39
40
|
)
|
40
41
|
from letta.services.agent_manager import AgentManager
|
41
42
|
from letta.services.block_manager import BlockManager
|
@@ -93,19 +94,18 @@ class VoiceAgent(BaseAgent):
|
|
93
94
|
agent_id=agent_id, openai_client=openai_client, message_manager=message_manager, agent_manager=agent_manager, actor=actor
|
94
95
|
)
|
95
96
|
|
96
|
-
async def step(self,
|
97
|
+
async def step(self, input_messages: List[MessageCreate], max_steps: int = 10) -> LettaResponse:
|
97
98
|
raise NotImplementedError("LowLatencyAgent does not have a synchronous step implemented currently.")
|
98
99
|
|
99
|
-
async def step_stream(self,
|
100
|
+
async def step_stream(self, input_messages: List[MessageCreate], max_steps: int = 10) -> AsyncGenerator[str, None]:
|
100
101
|
"""
|
101
102
|
Main streaming loop that yields partial tokens.
|
102
103
|
Whenever we detect a tool call, we yield from _handle_ai_response as well.
|
103
104
|
"""
|
104
|
-
input_message = self.pre_process_input_message(input_message)
|
105
105
|
agent_state = self.agent_manager.get_agent_by_id(self.agent_id, actor=self.actor)
|
106
106
|
in_context_messages = self.message_manager.get_messages_by_ids(message_ids=agent_state.message_ids, actor=self.actor)
|
107
|
-
letta_message_db_queue = [
|
108
|
-
in_memory_message_history =
|
107
|
+
letta_message_db_queue = [create_input_messages(input_messages=input_messages, agent_id=agent_state.id, actor=self.actor)]
|
108
|
+
in_memory_message_history = self.pre_process_input_message(input_messages)
|
109
109
|
|
110
110
|
# TODO: Define max steps here
|
111
111
|
for _ in range(max_steps):
|
@@ -372,7 +372,7 @@ class VoiceAgent(BaseAgent):
|
|
372
372
|
return f"Failed to call tool. Error: {e}", False
|
373
373
|
|
374
374
|
async def _recall_memory(self, query, agent_state: AgentState) -> None:
|
375
|
-
results = await self.offline_memory_agent.step(
|
375
|
+
results = await self.offline_memory_agent.step([MessageCreate(role="user", content=[TextContent(text=query)])])
|
376
376
|
target_block = next(b for b in agent_state.memory.blocks if b.label == self.summary_block_label)
|
377
377
|
self.block_manager.update_block(
|
378
378
|
block_id=target_block.id, block_update=BlockUpdate(value=results[0].content[0].text), actor=self.actor
|
@@ -226,7 +226,7 @@ def core_memory_insert(agent_state: "AgentState", target_block_label: str, new_m
|
|
226
226
|
if line_number is None:
|
227
227
|
line_number = len(current_value_list)
|
228
228
|
if replace:
|
229
|
-
current_value_list[line_number] = new_memory
|
229
|
+
current_value_list[line_number - 1] = new_memory
|
230
230
|
else:
|
231
231
|
current_value_list.insert(line_number, new_memory)
|
232
232
|
new_value = "\n".join(current_value_list)
|