letta-nightly 0.8.5.dev20250625104328__py3-none-any.whl → 0.8.6.dev20250626104326__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/agent.py +16 -12
- letta/agents/base_agent.py +4 -1
- letta/agents/helpers.py +35 -3
- letta/agents/letta_agent.py +132 -106
- letta/agents/letta_agent_batch.py +4 -3
- letta/agents/voice_agent.py +12 -2
- letta/agents/voice_sleeptime_agent.py +12 -2
- letta/constants.py +24 -3
- letta/data_sources/redis_client.py +6 -0
- letta/errors.py +5 -0
- letta/functions/function_sets/files.py +10 -3
- letta/functions/function_sets/multi_agent.py +0 -32
- letta/groups/sleeptime_multi_agent_v2.py +6 -0
- letta/helpers/converters.py +4 -1
- letta/helpers/datetime_helpers.py +16 -23
- letta/helpers/message_helper.py +5 -2
- letta/helpers/tool_rule_solver.py +29 -2
- letta/interfaces/openai_streaming_interface.py +9 -2
- letta/llm_api/anthropic.py +11 -1
- letta/llm_api/anthropic_client.py +14 -3
- letta/llm_api/aws_bedrock.py +29 -15
- letta/llm_api/bedrock_client.py +74 -0
- letta/llm_api/google_ai_client.py +7 -3
- letta/llm_api/google_vertex_client.py +18 -4
- letta/llm_api/llm_client.py +7 -0
- letta/llm_api/openai_client.py +13 -0
- letta/orm/agent.py +5 -0
- letta/orm/block_history.py +1 -1
- letta/orm/enums.py +6 -25
- letta/orm/job.py +1 -2
- letta/orm/llm_batch_items.py +1 -1
- letta/orm/mcp_server.py +1 -1
- letta/orm/passage.py +7 -1
- letta/orm/sqlalchemy_base.py +7 -5
- letta/orm/tool.py +2 -1
- letta/schemas/agent.py +34 -10
- letta/schemas/enums.py +42 -1
- letta/schemas/job.py +6 -3
- letta/schemas/letta_request.py +4 -0
- letta/schemas/llm_batch_job.py +7 -2
- letta/schemas/memory.py +2 -2
- letta/schemas/providers.py +32 -6
- letta/schemas/run.py +1 -1
- letta/schemas/tool_rule.py +40 -12
- letta/serialize_schemas/pydantic_agent_schema.py +9 -2
- letta/server/rest_api/app.py +3 -2
- letta/server/rest_api/routers/v1/agents.py +25 -22
- letta/server/rest_api/routers/v1/runs.py +2 -3
- letta/server/rest_api/routers/v1/sources.py +31 -0
- letta/server/rest_api/routers/v1/voice.py +1 -0
- letta/server/rest_api/utils.py +38 -13
- letta/server/server.py +52 -21
- letta/services/agent_manager.py +58 -7
- letta/services/block_manager.py +1 -1
- letta/services/file_processor/chunker/line_chunker.py +2 -1
- letta/services/file_processor/file_processor.py +2 -9
- letta/services/files_agents_manager.py +177 -37
- letta/services/helpers/agent_manager_helper.py +77 -48
- letta/services/helpers/tool_parser_helper.py +2 -1
- letta/services/job_manager.py +33 -2
- letta/services/llm_batch_manager.py +1 -1
- letta/services/provider_manager.py +6 -4
- letta/services/tool_executor/core_tool_executor.py +1 -1
- letta/services/tool_executor/files_tool_executor.py +99 -30
- letta/services/tool_executor/multi_agent_tool_executor.py +1 -17
- letta/services/tool_executor/tool_execution_manager.py +6 -0
- letta/services/tool_executor/tool_executor_base.py +3 -0
- letta/services/tool_sandbox/base.py +39 -1
- letta/services/tool_sandbox/e2b_sandbox.py +7 -0
- letta/services/user_manager.py +3 -2
- letta/settings.py +8 -14
- letta/system.py +17 -17
- letta/templates/sandbox_code_file_async.py.j2 +59 -0
- {letta_nightly-0.8.5.dev20250625104328.dist-info → letta_nightly-0.8.6.dev20250626104326.dist-info}/METADATA +3 -2
- {letta_nightly-0.8.5.dev20250625104328.dist-info → letta_nightly-0.8.6.dev20250626104326.dist-info}/RECORD +78 -76
- {letta_nightly-0.8.5.dev20250625104328.dist-info → letta_nightly-0.8.6.dev20250626104326.dist-info}/LICENSE +0 -0
- {letta_nightly-0.8.5.dev20250625104328.dist-info → letta_nightly-0.8.6.dev20250626104326.dist-info}/WHEEL +0 -0
- {letta_nightly-0.8.5.dev20250625104328.dist-info → letta_nightly-0.8.6.dev20250626104326.dist-info}/entry_points.txt +0 -0
letta/agent.py
CHANGED
@@ -255,7 +255,7 @@ class Agent(BaseAgent):
|
|
255
255
|
self.tool_rules_solver.register_tool_call(function_name)
|
256
256
|
|
257
257
|
# Extend conversation with function response
|
258
|
-
function_response = package_function_response(False, error_msg)
|
258
|
+
function_response = package_function_response(False, error_msg, self.agent_state.timezone)
|
259
259
|
new_message = Message(
|
260
260
|
agent_id=self.agent_state.id,
|
261
261
|
# Base info OpenAI-style
|
@@ -640,7 +640,7 @@ class Agent(BaseAgent):
|
|
640
640
|
function_response, return_char_limit=return_char_limit, truncate=truncate
|
641
641
|
)
|
642
642
|
function_args.pop("self", None)
|
643
|
-
function_response = package_function_response(True, function_response_string)
|
643
|
+
function_response = package_function_response(True, function_response_string, self.agent_state.timezone)
|
644
644
|
function_failed = False
|
645
645
|
except Exception as e:
|
646
646
|
function_args.pop("self", None)
|
@@ -763,7 +763,7 @@ class Agent(BaseAgent):
|
|
763
763
|
self.tool_rules_solver.clear_tool_history()
|
764
764
|
|
765
765
|
# Convert MessageCreate objects to Message objects
|
766
|
-
next_input_messages = convert_message_creates_to_messages(input_messages, self.agent_state.id)
|
766
|
+
next_input_messages = convert_message_creates_to_messages(input_messages, self.agent_state.id, self.agent_state.timezone)
|
767
767
|
counter = 0
|
768
768
|
total_usage = UsageStatistics()
|
769
769
|
step_count = 0
|
@@ -823,7 +823,7 @@ class Agent(BaseAgent):
|
|
823
823
|
model=self.model,
|
824
824
|
openai_message_dict={
|
825
825
|
"role": "user", # TODO: change to system?
|
826
|
-
"content": get_heartbeat(FUNC_FAILED_HEARTBEAT_MESSAGE),
|
826
|
+
"content": get_heartbeat(self.agent_state.timezone, FUNC_FAILED_HEARTBEAT_MESSAGE),
|
827
827
|
},
|
828
828
|
)
|
829
829
|
]
|
@@ -836,7 +836,7 @@ class Agent(BaseAgent):
|
|
836
836
|
model=self.model,
|
837
837
|
openai_message_dict={
|
838
838
|
"role": "user", # TODO: change to system?
|
839
|
-
"content": get_heartbeat(REQ_HEARTBEAT_MESSAGE),
|
839
|
+
"content": get_heartbeat(self.agent_state.timezone, REQ_HEARTBEAT_MESSAGE),
|
840
840
|
},
|
841
841
|
)
|
842
842
|
]
|
@@ -1000,11 +1000,12 @@ class Agent(BaseAgent):
|
|
1000
1000
|
)
|
1001
1001
|
if job_id:
|
1002
1002
|
for message in all_new_messages:
|
1003
|
-
|
1004
|
-
|
1005
|
-
|
1006
|
-
|
1007
|
-
|
1003
|
+
if message.role != "user":
|
1004
|
+
self.job_manager.add_message_to_job(
|
1005
|
+
job_id=job_id,
|
1006
|
+
message_id=message.id,
|
1007
|
+
actor=self.user,
|
1008
|
+
)
|
1008
1009
|
|
1009
1010
|
return AgentStepResponse(
|
1010
1011
|
messages=all_new_messages,
|
@@ -1079,7 +1080,7 @@ class Agent(BaseAgent):
|
|
1079
1080
|
assert user_message_str and isinstance(
|
1080
1081
|
user_message_str, str
|
1081
1082
|
), f"user_message_str should be a non-empty string, got {type(user_message_str)}"
|
1082
|
-
user_message_json_str = package_user_message(user_message_str)
|
1083
|
+
user_message_json_str = package_user_message(user_message_str, self.agent_state.timezone)
|
1083
1084
|
|
1084
1085
|
# Validate JSON via save/load
|
1085
1086
|
user_message = validate_json(user_message_json_str)
|
@@ -1142,7 +1143,9 @@ class Agent(BaseAgent):
|
|
1142
1143
|
remaining_message_count = 1 + len(in_context_messages) - cutoff # System + remaining
|
1143
1144
|
hidden_message_count = all_time_message_count - remaining_message_count
|
1144
1145
|
summary_message_count = len(message_sequence_to_summarize)
|
1145
|
-
summary_message = package_summarize_message(
|
1146
|
+
summary_message = package_summarize_message(
|
1147
|
+
summary, summary_message_count, hidden_message_count, all_time_message_count, self.agent_state.timezone
|
1148
|
+
)
|
1146
1149
|
logger.info(f"Packaged into message: {summary_message}")
|
1147
1150
|
|
1148
1151
|
prior_len = len(in_context_messages_openai)
|
@@ -1243,6 +1246,7 @@ class Agent(BaseAgent):
|
|
1243
1246
|
message_manager_size = self.message_manager.size(actor=self.user, agent_id=self.agent_state.id)
|
1244
1247
|
external_memory_summary = compile_memory_metadata_block(
|
1245
1248
|
memory_edit_timestamp=get_utc_time(),
|
1249
|
+
timezone=self.agent_state.timezone,
|
1246
1250
|
previous_message_count=self.message_manager.size(actor=self.user, agent_id=self.agent_state.id),
|
1247
1251
|
archival_memory_size=self.agent_manager.passage_size(actor=self.user, agent_id=self.agent_state.id),
|
1248
1252
|
)
|
letta/agents/base_agent.py
CHANGED
@@ -50,7 +50,9 @@ class BaseAgent(ABC):
|
|
50
50
|
self.logger = get_logger(agent_id)
|
51
51
|
|
52
52
|
@abstractmethod
|
53
|
-
async def step(
|
53
|
+
async def step(
|
54
|
+
self, input_messages: List[MessageCreate], max_steps: int = DEFAULT_MAX_STEPS, run_id: Optional[str] = None
|
55
|
+
) -> LettaResponse:
|
54
56
|
"""
|
55
57
|
Main execution loop for the agent.
|
56
58
|
"""
|
@@ -118,6 +120,7 @@ class BaseAgent(ABC):
|
|
118
120
|
system_prompt=agent_state.system,
|
119
121
|
in_context_memory=agent_state.memory,
|
120
122
|
in_context_memory_last_edit=memory_edit_timestamp,
|
123
|
+
timezone=agent_state.timezone,
|
121
124
|
previous_message_count=num_messages - len(in_context_messages),
|
122
125
|
archival_memory_size=num_archival_memories,
|
123
126
|
tool_rules_solver=tool_rules_solver,
|
letta/agents/helpers.py
CHANGED
@@ -1,12 +1,15 @@
|
|
1
|
+
import json
|
1
2
|
import uuid
|
2
3
|
import xml.etree.ElementTree as ET
|
3
4
|
from typing import List, Optional, Tuple
|
4
5
|
|
6
|
+
from letta.helpers import ToolRulesSolver
|
5
7
|
from letta.schemas.agent import AgentState
|
6
8
|
from letta.schemas.letta_message import MessageType
|
7
9
|
from letta.schemas.letta_response import LettaResponse
|
8
10
|
from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
|
9
11
|
from letta.schemas.message import Message, MessageCreate
|
12
|
+
from letta.schemas.tool_execution_result import ToolExecutionResult
|
10
13
|
from letta.schemas.usage import LettaUsageStatistics
|
11
14
|
from letta.schemas.user import User
|
12
15
|
from letta.server.rest_api.utils import create_input_messages
|
@@ -69,7 +72,8 @@ def _prepare_in_context_messages(
|
|
69
72
|
|
70
73
|
# Create a new user message from the input and store it
|
71
74
|
new_in_context_messages = message_manager.create_many_messages(
|
72
|
-
create_input_messages(input_messages=input_messages, agent_id=agent_state.id,
|
75
|
+
create_input_messages(input_messages=input_messages, agent_id=agent_state.id, timezone=agent_state.timezone, actor=actor),
|
76
|
+
actor=actor,
|
73
77
|
)
|
74
78
|
|
75
79
|
return current_in_context_messages, new_in_context_messages
|
@@ -106,7 +110,8 @@ async def _prepare_in_context_messages_async(
|
|
106
110
|
|
107
111
|
# Create a new user message from the input and store it
|
108
112
|
new_in_context_messages = await message_manager.create_many_messages_async(
|
109
|
-
create_input_messages(input_messages=input_messages, agent_id=agent_state.id,
|
113
|
+
create_input_messages(input_messages=input_messages, agent_id=agent_state.id, timezone=agent_state.timezone, actor=actor),
|
114
|
+
actor=actor,
|
110
115
|
)
|
111
116
|
|
112
117
|
return current_in_context_messages, new_in_context_messages
|
@@ -141,7 +146,9 @@ async def _prepare_in_context_messages_no_persist_async(
|
|
141
146
|
current_in_context_messages = await message_manager.get_messages_by_ids_async(message_ids=agent_state.message_ids, actor=actor)
|
142
147
|
|
143
148
|
# Create a new user message from the input but dont store it yet
|
144
|
-
new_in_context_messages = create_input_messages(
|
149
|
+
new_in_context_messages = create_input_messages(
|
150
|
+
input_messages=input_messages, agent_id=agent_state.id, timezone=agent_state.timezone, actor=actor
|
151
|
+
)
|
145
152
|
|
146
153
|
return current_in_context_messages, new_in_context_messages
|
147
154
|
|
@@ -201,3 +208,28 @@ def deserialize_message_history(xml_str: str) -> Tuple[List[str], str]:
|
|
201
208
|
|
202
209
|
def generate_step_id():
|
203
210
|
return f"step-{uuid.uuid4()}"
|
211
|
+
|
212
|
+
|
213
|
+
def _safe_load_dict(raw: str) -> dict:
|
214
|
+
"""Lenient JSON → dict with fallback to eval on assertion failure."""
|
215
|
+
if "}{" in raw: # strip accidental parallel calls
|
216
|
+
raw = raw.split("}{", 1)[0] + "}"
|
217
|
+
try:
|
218
|
+
data = json.loads(raw)
|
219
|
+
if not isinstance(data, dict):
|
220
|
+
raise AssertionError
|
221
|
+
return data
|
222
|
+
except (json.JSONDecodeError, AssertionError):
|
223
|
+
return json.loads(raw) if raw else {}
|
224
|
+
|
225
|
+
|
226
|
+
def _pop_heartbeat(tool_args: dict) -> bool:
|
227
|
+
hb = tool_args.pop("request_heartbeat", False)
|
228
|
+
return str(hb).lower() == "true" if isinstance(hb, str) else bool(hb)
|
229
|
+
|
230
|
+
|
231
|
+
def _build_rule_violation_result(tool_name: str, valid: list[str], solver: ToolRulesSolver) -> ToolExecutionResult:
|
232
|
+
hint_lines = solver.guess_rule_violation(tool_name)
|
233
|
+
hint_txt = ("\n** Hint: Possible rules that were violated:\n" + "\n".join(f"\t- {h}" for h in hint_lines)) if hint_lines else ""
|
234
|
+
msg = f"[ToolConstraintError] Cannot call {tool_name}, " f"valid tools include: {valid}.{hint_txt}"
|
235
|
+
return ToolExecutionResult(status="error", func_return=msg)
|
letta/agents/letta_agent.py
CHANGED
@@ -10,8 +10,15 @@ from opentelemetry.trace import Span
|
|
10
10
|
|
11
11
|
from letta.agents.base_agent import BaseAgent
|
12
12
|
from letta.agents.ephemeral_summary_agent import EphemeralSummaryAgent
|
13
|
-
from letta.agents.helpers import
|
14
|
-
|
13
|
+
from letta.agents.helpers import (
|
14
|
+
_build_rule_violation_result,
|
15
|
+
_create_letta_response,
|
16
|
+
_pop_heartbeat,
|
17
|
+
_prepare_in_context_messages_no_persist_async,
|
18
|
+
_safe_load_dict,
|
19
|
+
generate_step_id,
|
20
|
+
)
|
21
|
+
from letta.constants import DEFAULT_MAX_STEPS, NON_USER_MSG_PREFIX
|
15
22
|
from letta.errors import ContextWindowExceededError
|
16
23
|
from letta.helpers import ToolRulesSolver
|
17
24
|
from letta.helpers.datetime_helpers import AsyncTimer, get_utc_time, get_utc_timestamp_ns, ns_to_ms
|
@@ -27,7 +34,7 @@ from letta.otel.context import get_ctx_attributes
|
|
27
34
|
from letta.otel.metric_registry import MetricRegistry
|
28
35
|
from letta.otel.tracing import log_event, trace_method, tracer
|
29
36
|
from letta.schemas.agent import AgentState, UpdateAgent
|
30
|
-
from letta.schemas.enums import MessageRole
|
37
|
+
from letta.schemas.enums import MessageRole, ProviderType
|
31
38
|
from letta.schemas.letta_message import MessageType
|
32
39
|
from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
|
33
40
|
from letta.schemas.letta_response import LettaResponse
|
@@ -43,6 +50,7 @@ from letta.server.rest_api.utils import create_letta_messages_from_llm_response
|
|
43
50
|
from letta.services.agent_manager import AgentManager
|
44
51
|
from letta.services.block_manager import BlockManager
|
45
52
|
from letta.services.helpers.tool_parser_helper import runtime_override_tool_json_schema
|
53
|
+
from letta.services.job_manager import JobManager
|
46
54
|
from letta.services.message_manager import MessageManager
|
47
55
|
from letta.services.passage_manager import PassageManager
|
48
56
|
from letta.services.step_manager import NoopStepManager, StepManager
|
@@ -55,8 +63,6 @@ from letta.system import package_function_response
|
|
55
63
|
from letta.types import JsonDict
|
56
64
|
from letta.utils import log_telemetry, validate_function_response
|
57
65
|
|
58
|
-
logger = get_logger(__name__)
|
59
|
-
|
60
66
|
|
61
67
|
class LettaAgent(BaseAgent):
|
62
68
|
|
@@ -66,6 +72,7 @@ class LettaAgent(BaseAgent):
|
|
66
72
|
message_manager: MessageManager,
|
67
73
|
agent_manager: AgentManager,
|
68
74
|
block_manager: BlockManager,
|
75
|
+
job_manager: JobManager,
|
69
76
|
passage_manager: PassageManager,
|
70
77
|
actor: User,
|
71
78
|
step_manager: StepManager = NoopStepManager(),
|
@@ -81,6 +88,7 @@ class LettaAgent(BaseAgent):
|
|
81
88
|
# TODO: Make this more general, factorable
|
82
89
|
# Summarizer settings
|
83
90
|
self.block_manager = block_manager
|
91
|
+
self.job_manager = job_manager
|
84
92
|
self.passage_manager = passage_manager
|
85
93
|
self.step_manager = step_manager
|
86
94
|
self.telemetry_manager = telemetry_manager
|
@@ -95,6 +103,7 @@ class LettaAgent(BaseAgent):
|
|
95
103
|
self.summarization_agent = None
|
96
104
|
self.summary_block_label = summary_block_label
|
97
105
|
self.max_summarization_retries = max_summarization_retries
|
106
|
+
self.logger = get_logger(agent_id)
|
98
107
|
|
99
108
|
# TODO: Expand to more
|
100
109
|
if enable_summarization and model_settings.openai_api_key:
|
@@ -120,6 +129,7 @@ class LettaAgent(BaseAgent):
|
|
120
129
|
self,
|
121
130
|
input_messages: List[MessageCreate],
|
122
131
|
max_steps: int = DEFAULT_MAX_STEPS,
|
132
|
+
run_id: Optional[str] = None,
|
123
133
|
use_assistant_message: bool = True,
|
124
134
|
request_start_timestamp_ns: Optional[int] = None,
|
125
135
|
include_return_message_types: Optional[List[MessageType]] = None,
|
@@ -127,10 +137,11 @@ class LettaAgent(BaseAgent):
|
|
127
137
|
agent_state = await self.agent_manager.get_agent_by_id_async(
|
128
138
|
agent_id=self.agent_id, include_relationships=["tools", "memory", "tool_exec_environment_variables"], actor=self.actor
|
129
139
|
)
|
130
|
-
_, new_in_context_messages,
|
140
|
+
_, new_in_context_messages, stop_reason, usage = await self._step(
|
131
141
|
agent_state=agent_state,
|
132
142
|
input_messages=input_messages,
|
133
143
|
max_steps=max_steps,
|
144
|
+
run_id=run_id,
|
134
145
|
request_start_timestamp_ns=request_start_timestamp_ns,
|
135
146
|
)
|
136
147
|
return _create_letta_response(
|
@@ -193,7 +204,6 @@ class LettaAgent(BaseAgent):
|
|
193
204
|
response = llm_client.convert_response_to_chat_completion(response_data, in_context_messages, agent_state.llm_config)
|
194
205
|
|
195
206
|
# update usage
|
196
|
-
# TODO: add run_id
|
197
207
|
usage.step_count += 1
|
198
208
|
usage.completion_tokens += response.usage.completion_tokens
|
199
209
|
usage.prompt_tokens += response.usage.prompt_tokens
|
@@ -219,7 +229,7 @@ class LettaAgent(BaseAgent):
|
|
219
229
|
elif response.choices[0].message.content:
|
220
230
|
reasoning = [TextContent(text=response.choices[0].message.content)] # reasoning placed into content for legacy reasons
|
221
231
|
else:
|
222
|
-
logger.info("No reasoning content found.")
|
232
|
+
self.logger.info("No reasoning content found.")
|
223
233
|
reasoning = None
|
224
234
|
|
225
235
|
persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
|
@@ -233,8 +243,11 @@ class LettaAgent(BaseAgent):
|
|
233
243
|
agent_step_span=agent_step_span,
|
234
244
|
is_final_step=(i == max_steps - 1),
|
235
245
|
)
|
236
|
-
|
237
|
-
|
246
|
+
|
247
|
+
# TODO (cliandy): handle message contexts with larger refactor and dedupe logic
|
248
|
+
new_message_idx = len(initial_messages) if initial_messages else 0
|
249
|
+
self.response_messages.extend(persisted_messages[new_message_idx:])
|
250
|
+
new_in_context_messages.extend(persisted_messages[new_message_idx:])
|
238
251
|
initial_messages = None
|
239
252
|
log_event("agent.stream_no_tokens.llm_response.processed") # [4^]
|
240
253
|
|
@@ -266,7 +279,7 @@ class LettaAgent(BaseAgent):
|
|
266
279
|
if include_return_message_types is None or message.message_type in include_return_message_types:
|
267
280
|
yield f"data: {message.model_dump_json()}\n\n"
|
268
281
|
|
269
|
-
MetricRegistry().step_execution_time_ms_histogram.record(
|
282
|
+
MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
|
270
283
|
|
271
284
|
if not should_continue:
|
272
285
|
break
|
@@ -302,6 +315,7 @@ class LettaAgent(BaseAgent):
|
|
302
315
|
agent_state: AgentState,
|
303
316
|
input_messages: List[MessageCreate],
|
304
317
|
max_steps: int = DEFAULT_MAX_STEPS,
|
318
|
+
run_id: Optional[str] = None,
|
305
319
|
request_start_timestamp_ns: Optional[int] = None,
|
306
320
|
) -> Tuple[List[Message], List[Message], Optional[LettaStopReason], LettaUsageStatistics]:
|
307
321
|
"""
|
@@ -345,11 +359,11 @@ class LettaAgent(BaseAgent):
|
|
345
359
|
|
346
360
|
response = llm_client.convert_response_to_chat_completion(response_data, in_context_messages, agent_state.llm_config)
|
347
361
|
|
348
|
-
# TODO: add run_id
|
349
362
|
usage.step_count += 1
|
350
363
|
usage.completion_tokens += response.usage.completion_tokens
|
351
364
|
usage.prompt_tokens += response.usage.prompt_tokens
|
352
365
|
usage.total_tokens += response.usage.total_tokens
|
366
|
+
usage.run_ids = [run_id] if run_id else None
|
353
367
|
MetricRegistry().message_output_tokens.record(
|
354
368
|
response.usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
|
355
369
|
)
|
@@ -371,7 +385,7 @@ class LettaAgent(BaseAgent):
|
|
371
385
|
elif response.choices[0].message.omitted_reasoning_content:
|
372
386
|
reasoning = [OmittedReasoningContent()]
|
373
387
|
else:
|
374
|
-
logger.info("No reasoning content found.")
|
388
|
+
self.logger.info("No reasoning content found.")
|
375
389
|
reasoning = None
|
376
390
|
|
377
391
|
persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
|
@@ -385,9 +399,12 @@ class LettaAgent(BaseAgent):
|
|
385
399
|
initial_messages=initial_messages,
|
386
400
|
agent_step_span=agent_step_span,
|
387
401
|
is_final_step=(i == max_steps - 1),
|
402
|
+
run_id=run_id,
|
388
403
|
)
|
389
|
-
|
390
|
-
|
404
|
+
new_message_idx = len(initial_messages) if initial_messages else 0
|
405
|
+
self.response_messages.extend(persisted_messages[new_message_idx:])
|
406
|
+
new_in_context_messages.extend(persisted_messages[new_message_idx:])
|
407
|
+
|
391
408
|
initial_messages = None
|
392
409
|
log_event("agent.step.llm_response.processed") # [4^]
|
393
410
|
|
@@ -435,7 +452,7 @@ class LettaAgent(BaseAgent):
|
|
435
452
|
force=False,
|
436
453
|
)
|
437
454
|
|
438
|
-
return current_in_context_messages, new_in_context_messages,
|
455
|
+
return current_in_context_messages, new_in_context_messages, stop_reason, usage
|
439
456
|
|
440
457
|
async def _update_agent_last_run_metrics(self, completion_time: datetime, duration_ms: float) -> None:
|
441
458
|
try:
|
@@ -445,7 +462,7 @@ class LettaAgent(BaseAgent):
|
|
445
462
|
actor=self.actor,
|
446
463
|
)
|
447
464
|
except Exception as e:
|
448
|
-
logger.error(f"Failed to update agent's last run metrics: {e}")
|
465
|
+
self.logger.error(f"Failed to update agent's last run metrics: {e}")
|
449
466
|
|
450
467
|
@trace_method
|
451
468
|
async def step_stream(
|
@@ -512,12 +529,12 @@ class LettaAgent(BaseAgent):
|
|
512
529
|
|
513
530
|
# TODO: THIS IS INCREDIBLY UGLY
|
514
531
|
# TODO: THERE ARE MULTIPLE COPIES OF THE LLM_CONFIG EVERYWHERE THAT ARE GETTING MANIPULATED
|
515
|
-
if agent_state.llm_config.model_endpoint_type
|
532
|
+
if agent_state.llm_config.model_endpoint_type in [ProviderType.anthropic, ProviderType.bedrock]:
|
516
533
|
interface = AnthropicStreamingInterface(
|
517
534
|
use_assistant_message=use_assistant_message,
|
518
535
|
put_inner_thoughts_in_kwarg=agent_state.llm_config.put_inner_thoughts_in_kwargs,
|
519
536
|
)
|
520
|
-
elif agent_state.llm_config.model_endpoint_type ==
|
537
|
+
elif agent_state.llm_config.model_endpoint_type == ProviderType.openai:
|
521
538
|
interface = OpenAIStreamingInterface(
|
522
539
|
use_assistant_message=use_assistant_message,
|
523
540
|
put_inner_thoughts_in_kwarg=agent_state.llm_config.put_inner_thoughts_in_kwargs,
|
@@ -590,8 +607,10 @@ class LettaAgent(BaseAgent):
|
|
590
607
|
agent_step_span=agent_step_span,
|
591
608
|
is_final_step=(i == max_steps - 1),
|
592
609
|
)
|
593
|
-
|
594
|
-
|
610
|
+
new_message_idx = len(initial_messages) if initial_messages else 0
|
611
|
+
self.response_messages.extend(persisted_messages[new_message_idx:])
|
612
|
+
new_in_context_messages.extend(persisted_messages[new_message_idx:])
|
613
|
+
|
595
614
|
initial_messages = None
|
596
615
|
|
597
616
|
# log total step time
|
@@ -916,47 +935,18 @@ class LettaAgent(BaseAgent):
|
|
916
935
|
initial_messages: Optional[List[Message]] = None,
|
917
936
|
agent_step_span: Optional["Span"] = None,
|
918
937
|
is_final_step: Optional[bool] = None,
|
938
|
+
run_id: Optional[str] = None,
|
919
939
|
) -> Tuple[List[Message], bool, Optional[LettaStopReason]]:
|
920
940
|
"""
|
921
|
-
|
922
|
-
|
923
|
-
At the end, set self._continue_execution accordingly.
|
941
|
+
Handle the final AI response once streaming completes, execute / validate the
|
942
|
+
tool call, decide whether we should keep stepping, and persist state.
|
924
943
|
"""
|
925
|
-
|
926
|
-
|
927
|
-
|
928
|
-
|
929
|
-
|
930
|
-
|
931
|
-
if "}{" in tool_call_args_str:
|
932
|
-
tool_call_args_str = tool_call_args_str.split("}{", 1)[0] + "}"
|
933
|
-
|
934
|
-
try:
|
935
|
-
tool_args = json.loads(tool_call_args_str)
|
936
|
-
assert isinstance(tool_args, dict), "tool_args must be a dict"
|
937
|
-
except json.JSONDecodeError:
|
938
|
-
tool_args = {}
|
939
|
-
except AssertionError:
|
940
|
-
tool_args = json.loads(tool_args)
|
941
|
-
|
942
|
-
# Get request heartbeats and coerce to bool
|
943
|
-
request_heartbeat = tool_args.pop("request_heartbeat", False)
|
944
|
-
if is_final_step:
|
945
|
-
stop_reason = LettaStopReason(stop_reason=StopReasonType.max_steps.value)
|
946
|
-
logger.info("Agent has reached max steps.")
|
947
|
-
request_heartbeat = False
|
948
|
-
else:
|
949
|
-
# Pre-emptively pop out inner_thoughts
|
950
|
-
tool_args.pop(INNER_THOUGHTS_KWARG, "")
|
951
|
-
|
952
|
-
# So this is necessary, because sometimes non-structured outputs makes mistakes
|
953
|
-
if not isinstance(request_heartbeat, bool):
|
954
|
-
if isinstance(request_heartbeat, str):
|
955
|
-
request_heartbeat = request_heartbeat.lower() == "true"
|
956
|
-
else:
|
957
|
-
request_heartbeat = bool(request_heartbeat)
|
958
|
-
|
959
|
-
tool_call_id = tool_call.id or f"call_{uuid.uuid4().hex[:8]}"
|
944
|
+
# 1. Parse and validate the tool-call envelope
|
945
|
+
tool_call_name: str = tool_call.function.name
|
946
|
+
tool_call_id: str = tool_call.id or f"call_{uuid.uuid4().hex[:8]}"
|
947
|
+
tool_args = _safe_load_dict(tool_call.function.arguments)
|
948
|
+
request_heartbeat: bool = _pop_heartbeat(tool_args)
|
949
|
+
tool_args.pop(INNER_THOUGHTS_KWARG, None)
|
960
950
|
|
961
951
|
log_telemetry(
|
962
952
|
self.logger,
|
@@ -966,16 +956,11 @@ class LettaAgent(BaseAgent):
|
|
966
956
|
tool_call_id=tool_call_id,
|
967
957
|
request_heartbeat=request_heartbeat,
|
968
958
|
)
|
969
|
-
# Check if tool rule is violated - if so, we'll force continuation
|
970
|
-
tool_rule_violated = tool_call_name not in valid_tool_names
|
971
959
|
|
960
|
+
# 2. Execute the tool (or synthesize an error result if disallowed)
|
961
|
+
tool_rule_violated = tool_call_name not in valid_tool_names
|
972
962
|
if tool_rule_violated:
|
973
|
-
|
974
|
-
violated_rule_messages = tool_rules_solver.guess_rule_violation(tool_call_name)
|
975
|
-
if violated_rule_messages:
|
976
|
-
bullet_points = "\n".join(f"\t- {msg}" for msg in violated_rule_messages)
|
977
|
-
base_error_message += f"\n** Hint: Possible rules that were violated:\n{bullet_points}"
|
978
|
-
tool_execution_result = ToolExecutionResult(status="error", func_return=base_error_message)
|
963
|
+
tool_execution_result = _build_rule_violation_result(tool_call_name, valid_tool_names, tool_rules_solver)
|
979
964
|
else:
|
980
965
|
tool_execution_result = await self._execute_tool(
|
981
966
|
tool_name=tool_call_name,
|
@@ -984,51 +969,38 @@ class LettaAgent(BaseAgent):
|
|
984
969
|
agent_step_span=agent_step_span,
|
985
970
|
step_id=step_id,
|
986
971
|
)
|
972
|
+
|
987
973
|
log_telemetry(
|
988
974
|
self.logger, "_handle_ai_response execute tool finish", tool_execution_result=tool_execution_result, tool_call_id=tool_call_id
|
989
975
|
)
|
990
976
|
|
991
|
-
|
992
|
-
|
993
|
-
|
994
|
-
|
995
|
-
|
996
|
-
|
997
|
-
truncate = True
|
998
|
-
|
999
|
-
# get the function response limit
|
1000
|
-
target_tool = next((x for x in agent_state.tools if x.name == tool_call_name), None)
|
1001
|
-
return_char_limit = target_tool.return_char_limit if target_tool else None
|
977
|
+
# 3. Prepare the function-response payload
|
978
|
+
truncate = tool_call_name not in {"conversation_search", "conversation_search_date", "archival_memory_search"}
|
979
|
+
return_char_limit = next(
|
980
|
+
(t.return_char_limit for t in agent_state.tools if t.name == tool_call_name),
|
981
|
+
None,
|
982
|
+
)
|
1002
983
|
function_response_string = validate_function_response(
|
1003
|
-
tool_execution_result.func_return,
|
984
|
+
tool_execution_result.func_return,
|
985
|
+
return_char_limit=return_char_limit,
|
986
|
+
truncate=truncate,
|
1004
987
|
)
|
1005
|
-
|
988
|
+
self.last_function_response = package_function_response(
|
1006
989
|
was_success=tool_execution_result.success_flag,
|
1007
990
|
response_string=function_response_string,
|
991
|
+
timezone=agent_state.timezone,
|
1008
992
|
)
|
1009
993
|
|
1010
|
-
# 4.
|
1011
|
-
|
1012
|
-
|
1013
|
-
|
1014
|
-
|
1015
|
-
|
1016
|
-
|
1017
|
-
|
1018
|
-
tool_rules_solver.register_tool_call(tool_name=tool_call_name)
|
1019
|
-
if tool_rules_solver.is_terminal_tool(tool_name=tool_call_name):
|
1020
|
-
if continue_stepping:
|
1021
|
-
stop_reason = LettaStopReason(stop_reason=StopReasonType.tool_rule.value)
|
1022
|
-
continue_stepping = False
|
1023
|
-
elif tool_rules_solver.has_children_tools(tool_name=tool_call_name):
|
1024
|
-
continue_stepping = True
|
1025
|
-
elif tool_rules_solver.is_continue_tool(tool_name=tool_call_name):
|
1026
|
-
continue_stepping = True
|
994
|
+
# 4. Decide whether to keep stepping (<<< focal section simplified)
|
995
|
+
continue_stepping, heartbeat_reason, stop_reason = self._decide_continuation(
|
996
|
+
request_heartbeat=request_heartbeat,
|
997
|
+
tool_call_name=tool_call_name,
|
998
|
+
tool_rule_violated=tool_rule_violated,
|
999
|
+
tool_rules_solver=tool_rules_solver,
|
1000
|
+
is_final_step=is_final_step,
|
1001
|
+
)
|
1027
1002
|
|
1028
|
-
#
|
1029
|
-
# Following agent loop to persist this before messages
|
1030
|
-
# TODO (cliandy): determine what should match old loop w/provider_id, job_id
|
1031
|
-
# TODO (cliandy): UsageStatistics and LettaUsageStatistics are used in many places, but are not the same.
|
1003
|
+
# 5. Persist step + messages and propagate to jobs
|
1032
1004
|
logged_step = await self.step_manager.log_step_async(
|
1033
1005
|
actor=self.actor,
|
1034
1006
|
agent_id=agent_state.id,
|
@@ -1039,11 +1011,10 @@ class LettaAgent(BaseAgent):
|
|
1039
1011
|
context_window_limit=agent_state.llm_config.context_window,
|
1040
1012
|
usage=usage,
|
1041
1013
|
provider_id=None,
|
1042
|
-
job_id=
|
1014
|
+
job_id=run_id,
|
1043
1015
|
step_id=step_id,
|
1044
1016
|
)
|
1045
1017
|
|
1046
|
-
# 5b. Persist Messages to DB
|
1047
1018
|
tool_call_messages = create_letta_messages_from_llm_response(
|
1048
1019
|
agent_id=agent_state.id,
|
1049
1020
|
model=agent_state.llm_config.model,
|
@@ -1053,20 +1024,74 @@ class LettaAgent(BaseAgent):
|
|
1053
1024
|
tool_call_id=tool_call_id,
|
1054
1025
|
function_call_success=tool_execution_result.success_flag,
|
1055
1026
|
function_response=function_response_string,
|
1027
|
+
timezone=agent_state.timezone,
|
1056
1028
|
actor=self.actor,
|
1057
|
-
|
1029
|
+
continue_stepping=continue_stepping,
|
1030
|
+
heartbeat_reason=heartbeat_reason,
|
1058
1031
|
reasoning_content=reasoning_content,
|
1059
1032
|
pre_computed_assistant_message_id=pre_computed_assistant_message_id,
|
1060
|
-
step_id=logged_step.id if logged_step else None,
|
1033
|
+
step_id=logged_step.id if logged_step else None,
|
1061
1034
|
)
|
1062
1035
|
|
1063
1036
|
persisted_messages = await self.message_manager.create_many_messages_async(
|
1064
1037
|
(initial_messages or []) + tool_call_messages, actor=self.actor
|
1065
1038
|
)
|
1066
|
-
|
1039
|
+
|
1040
|
+
if run_id:
|
1041
|
+
await self.job_manager.add_messages_to_job_async(
|
1042
|
+
job_id=run_id,
|
1043
|
+
message_ids=[m.id for m in persisted_messages if m.role != "user"],
|
1044
|
+
actor=self.actor,
|
1045
|
+
)
|
1067
1046
|
|
1068
1047
|
return persisted_messages, continue_stepping, stop_reason
|
1069
1048
|
|
1049
|
+
def _decide_continuation(
|
1050
|
+
self,
|
1051
|
+
request_heartbeat: bool,
|
1052
|
+
tool_call_name: str,
|
1053
|
+
tool_rule_violated: bool,
|
1054
|
+
tool_rules_solver: ToolRulesSolver,
|
1055
|
+
is_final_step: bool | None,
|
1056
|
+
) -> tuple[bool, str | None, LettaStopReason | None]:
|
1057
|
+
|
1058
|
+
continue_stepping = request_heartbeat
|
1059
|
+
heartbeat_reason: str | None = None
|
1060
|
+
stop_reason: LettaStopReason | None = None
|
1061
|
+
|
1062
|
+
if tool_rule_violated:
|
1063
|
+
continue_stepping = True
|
1064
|
+
heartbeat_reason = f"{NON_USER_MSG_PREFIX}Continuing: tool rule violation."
|
1065
|
+
else:
|
1066
|
+
tool_rules_solver.register_tool_call(tool_call_name)
|
1067
|
+
|
1068
|
+
if tool_rules_solver.is_terminal_tool(tool_call_name):
|
1069
|
+
if continue_stepping:
|
1070
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.tool_rule.value)
|
1071
|
+
continue_stepping = False
|
1072
|
+
|
1073
|
+
elif tool_rules_solver.has_children_tools(tool_call_name):
|
1074
|
+
continue_stepping = True
|
1075
|
+
heartbeat_reason = f"{NON_USER_MSG_PREFIX}Continuing: child tool rule."
|
1076
|
+
|
1077
|
+
elif tool_rules_solver.is_continue_tool(tool_call_name):
|
1078
|
+
continue_stepping = True
|
1079
|
+
heartbeat_reason = f"{NON_USER_MSG_PREFIX}Continuing: continue tool rule."
|
1080
|
+
|
1081
|
+
# – hard stop overrides –
|
1082
|
+
if is_final_step:
|
1083
|
+
continue_stepping = False
|
1084
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.max_steps.value)
|
1085
|
+
else:
|
1086
|
+
uncalled = tool_rules_solver.get_uncalled_required_tools()
|
1087
|
+
if not continue_stepping and uncalled:
|
1088
|
+
continue_stepping = True
|
1089
|
+
heartbeat_reason = f"{NON_USER_MSG_PREFIX}Missing required tools: " f"{', '.join(uncalled)}"
|
1090
|
+
|
1091
|
+
stop_reason = None # reset – we’re still going
|
1092
|
+
|
1093
|
+
return continue_stepping, heartbeat_reason, stop_reason
|
1094
|
+
|
1070
1095
|
@trace_method
|
1071
1096
|
async def _execute_tool(
|
1072
1097
|
self,
|
@@ -1102,6 +1127,7 @@ class LettaAgent(BaseAgent):
|
|
1102
1127
|
message_manager=self.message_manager,
|
1103
1128
|
agent_manager=self.agent_manager,
|
1104
1129
|
block_manager=self.block_manager,
|
1130
|
+
job_manager=self.job_manager,
|
1105
1131
|
passage_manager=self.passage_manager,
|
1106
1132
|
sandbox_env_vars=sandbox_env_vars,
|
1107
1133
|
actor=self.actor,
|