letta-nightly 0.8.0.dev20250606195656__py3-none-any.whl → 0.8.3.dev20250607000559__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +16 -12
- letta/agents/base_agent.py +1 -1
- letta/agents/helpers.py +13 -2
- letta/agents/letta_agent.py +72 -34
- letta/agents/letta_agent_batch.py +1 -2
- letta/agents/voice_agent.py +19 -13
- letta/agents/voice_sleeptime_agent.py +23 -6
- letta/constants.py +18 -0
- letta/data_sources/__init__.py +0 -0
- letta/data_sources/redis_client.py +282 -0
- letta/errors.py +0 -4
- letta/functions/function_sets/files.py +58 -0
- letta/functions/schema_generator.py +18 -1
- letta/groups/sleeptime_multi_agent_v2.py +13 -3
- letta/helpers/datetime_helpers.py +47 -3
- letta/helpers/decorators.py +69 -0
- letta/{services/helpers/noop_helper.py → helpers/singleton.py} +5 -0
- letta/interfaces/anthropic_streaming_interface.py +43 -24
- letta/interfaces/openai_streaming_interface.py +21 -19
- letta/llm_api/anthropic.py +1 -1
- letta/llm_api/anthropic_client.py +30 -16
- letta/llm_api/google_vertex_client.py +1 -1
- letta/llm_api/helpers.py +36 -30
- letta/llm_api/llm_api_tools.py +1 -1
- letta/llm_api/llm_client_base.py +29 -1
- letta/llm_api/openai.py +1 -1
- letta/llm_api/openai_client.py +6 -8
- letta/local_llm/chat_completion_proxy.py +1 -1
- letta/memory.py +1 -1
- letta/orm/enums.py +1 -0
- letta/orm/file.py +80 -3
- letta/orm/files_agents.py +13 -0
- letta/orm/passage.py +2 -0
- letta/orm/sqlalchemy_base.py +34 -11
- letta/otel/__init__.py +0 -0
- letta/otel/context.py +25 -0
- letta/otel/events.py +0 -0
- letta/otel/metric_registry.py +122 -0
- letta/otel/metrics.py +66 -0
- letta/otel/resource.py +26 -0
- letta/{tracing.py → otel/tracing.py} +55 -78
- letta/plugins/README.md +22 -0
- letta/plugins/__init__.py +0 -0
- letta/plugins/defaults.py +11 -0
- letta/plugins/plugins.py +72 -0
- letta/schemas/enums.py +8 -0
- letta/schemas/file.py +12 -0
- letta/schemas/letta_request.py +6 -0
- letta/schemas/passage.py +1 -0
- letta/schemas/tool.py +4 -0
- letta/server/db.py +7 -7
- letta/server/rest_api/app.py +8 -6
- letta/server/rest_api/routers/v1/agents.py +46 -37
- letta/server/rest_api/routers/v1/groups.py +3 -3
- letta/server/rest_api/routers/v1/sources.py +26 -3
- letta/server/rest_api/routers/v1/tools.py +7 -2
- letta/server/rest_api/utils.py +9 -6
- letta/server/server.py +25 -13
- letta/services/agent_manager.py +186 -194
- letta/services/block_manager.py +1 -1
- letta/services/context_window_calculator/context_window_calculator.py +1 -1
- letta/services/context_window_calculator/token_counter.py +3 -2
- letta/services/file_processor/chunker/line_chunker.py +34 -0
- letta/services/file_processor/file_processor.py +43 -12
- letta/services/file_processor/parser/mistral_parser.py +11 -1
- letta/services/files_agents_manager.py +96 -7
- letta/services/group_manager.py +6 -6
- letta/services/helpers/agent_manager_helper.py +404 -3
- letta/services/identity_manager.py +1 -1
- letta/services/job_manager.py +1 -1
- letta/services/llm_batch_manager.py +1 -1
- letta/services/mcp/stdio_client.py +5 -1
- letta/services/mcp_manager.py +4 -4
- letta/services/message_manager.py +1 -1
- letta/services/organization_manager.py +1 -1
- letta/services/passage_manager.py +604 -19
- letta/services/per_agent_lock_manager.py +1 -1
- letta/services/provider_manager.py +1 -1
- letta/services/sandbox_config_manager.py +1 -1
- letta/services/source_manager.py +178 -19
- letta/services/step_manager.py +2 -2
- letta/services/summarizer/summarizer.py +1 -1
- letta/services/telemetry_manager.py +1 -1
- letta/services/tool_executor/builtin_tool_executor.py +117 -0
- letta/services/tool_executor/composio_tool_executor.py +53 -0
- letta/services/tool_executor/core_tool_executor.py +474 -0
- letta/services/tool_executor/files_tool_executor.py +138 -0
- letta/services/tool_executor/mcp_tool_executor.py +45 -0
- letta/services/tool_executor/multi_agent_tool_executor.py +123 -0
- letta/services/tool_executor/tool_execution_manager.py +34 -14
- letta/services/tool_executor/tool_execution_sandbox.py +1 -1
- letta/services/tool_executor/tool_executor.py +3 -802
- letta/services/tool_executor/tool_executor_base.py +43 -0
- letta/services/tool_manager.py +55 -59
- letta/services/tool_sandbox/e2b_sandbox.py +1 -1
- letta/services/tool_sandbox/local_sandbox.py +6 -3
- letta/services/user_manager.py +6 -3
- letta/settings.py +23 -2
- letta/utils.py +7 -2
- {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.3.dev20250607000559.dist-info}/METADATA +4 -2
- {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.3.dev20250607000559.dist-info}/RECORD +105 -83
- {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.3.dev20250607000559.dist-info}/LICENSE +0 -0
- {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.3.dev20250607000559.dist-info}/WHEEL +0 -0
- {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.3.dev20250607000559.dist-info}/entry_points.txt +0 -0
letta/__init__.py
CHANGED
letta/agent.py
CHANGED
@@ -41,6 +41,7 @@ from letta.log import get_logger
|
|
41
41
|
from letta.memory import summarize_messages
|
42
42
|
from letta.orm import User
|
43
43
|
from letta.orm.enums import ToolType
|
44
|
+
from letta.otel.tracing import log_event, trace_method
|
44
45
|
from letta.schemas.agent import AgentState, AgentStepResponse, UpdateAgent, get_prompt_template_for_agent_type
|
45
46
|
from letta.schemas.block import BlockUpdate
|
46
47
|
from letta.schemas.embedding_config import EmbeddingConfig
|
@@ -69,10 +70,9 @@ from letta.services.step_manager import StepManager
|
|
69
70
|
from letta.services.telemetry_manager import NoopTelemetryManager, TelemetryManager
|
70
71
|
from letta.services.tool_executor.tool_execution_sandbox import ToolExecutionSandbox
|
71
72
|
from letta.services.tool_manager import ToolManager
|
72
|
-
from letta.settings import settings, summarizer_settings
|
73
|
+
from letta.settings import settings, summarizer_settings
|
73
74
|
from letta.streaming_interface import StreamingRefreshCLIInterface
|
74
75
|
from letta.system import get_heartbeat, get_token_limit_warning, package_function_response, package_summarize_message, package_user_message
|
75
|
-
from letta.tracing import log_event, trace_method
|
76
76
|
from letta.utils import count_tokens, get_friendly_error_msg, get_tool_call_id, log_telemetry, parse_json, validate_function_response
|
77
77
|
|
78
78
|
logger = get_logger(__name__)
|
@@ -503,7 +503,7 @@ class Agent(BaseAgent):
|
|
503
503
|
response_message.function_call if response_message.function_call is not None else response_message.tool_calls[0].function
|
504
504
|
)
|
505
505
|
function_name = function_call.name
|
506
|
-
self.logger.
|
506
|
+
self.logger.info(f"Request to call function {function_name} with tool_call_id: {tool_call_id}")
|
507
507
|
|
508
508
|
# Failure case 1: function name is wrong (not in agent_state.tools)
|
509
509
|
target_letta_tool = None
|
@@ -1282,7 +1282,7 @@ class Agent(BaseAgent):
|
|
1282
1282
|
)
|
1283
1283
|
|
1284
1284
|
async def get_context_window_async(self) -> ContextWindowOverview:
|
1285
|
-
if os.getenv("LETTA_ENVIRONMENT") == "PRODUCTION"
|
1285
|
+
if os.getenv("LETTA_ENVIRONMENT") == "PRODUCTION":
|
1286
1286
|
return await self.get_context_window_from_anthropic_async()
|
1287
1287
|
return await self.get_context_window_from_tiktoken_async()
|
1288
1288
|
|
@@ -1291,8 +1291,8 @@ class Agent(BaseAgent):
|
|
1291
1291
|
# Grab the in-context messages
|
1292
1292
|
# conversion of messages to OpenAI dict format, which is passed to the token counter
|
1293
1293
|
(in_context_messages, passage_manager_size, message_manager_size) = await asyncio.gather(
|
1294
|
-
self.
|
1295
|
-
self.passage_manager.
|
1294
|
+
self.message_manager.get_messages_by_ids_async(message_ids=self.agent_state.message_ids, actor=self.user),
|
1295
|
+
self.passage_manager.agent_passage_size_async(actor=self.user, agent_id=self.agent_state.id),
|
1296
1296
|
self.message_manager.size_async(actor=self.user, agent_id=self.agent_state.id),
|
1297
1297
|
)
|
1298
1298
|
in_context_messages_openai = [m.to_openai_dict() for m in in_context_messages]
|
@@ -1315,11 +1315,13 @@ class Agent(BaseAgent):
|
|
1315
1315
|
core_memory = system_message[core_memory_marker_pos:].strip()
|
1316
1316
|
else:
|
1317
1317
|
# if no markers found, put everything in system message
|
1318
|
+
self.logger.info("No markers found in system message, core_memory and external_memory_summary will not be loaded")
|
1318
1319
|
system_prompt = system_message
|
1319
1320
|
external_memory_summary = ""
|
1320
1321
|
core_memory = ""
|
1321
1322
|
else:
|
1322
1323
|
# if no system message, fall back on agent's system prompt
|
1324
|
+
self.logger.info("No system message found in history, core_memory and external_memory_summary will not be loaded")
|
1323
1325
|
system_prompt = self.agent_state.system
|
1324
1326
|
external_memory_summary = ""
|
1325
1327
|
core_memory = ""
|
@@ -1411,8 +1413,8 @@ class Agent(BaseAgent):
|
|
1411
1413
|
# Grab the in-context messages
|
1412
1414
|
# conversion of messages to anthropic dict format, which is passed to the token counter
|
1413
1415
|
(in_context_messages, passage_manager_size, message_manager_size) = await asyncio.gather(
|
1414
|
-
self.
|
1415
|
-
self.passage_manager.
|
1416
|
+
self.message_manager.get_messages_by_ids_async(message_ids=self.agent_state.message_ids, actor=self.user),
|
1417
|
+
self.passage_manager.agent_passage_size_async(actor=self.user, agent_id=self.agent_state.id),
|
1416
1418
|
self.message_manager.size_async(actor=self.user, agent_id=self.agent_state.id),
|
1417
1419
|
)
|
1418
1420
|
in_context_messages_anthropic = [m.to_anthropic_dict() for m in in_context_messages]
|
@@ -1435,14 +1437,16 @@ class Agent(BaseAgent):
|
|
1435
1437
|
core_memory = system_message[core_memory_marker_pos:].strip()
|
1436
1438
|
else:
|
1437
1439
|
# if no markers found, put everything in system message
|
1440
|
+
self.logger.info("No markers found in system message, core_memory and external_memory_summary will not be loaded")
|
1438
1441
|
system_prompt = system_message
|
1439
|
-
external_memory_summary =
|
1440
|
-
core_memory =
|
1442
|
+
external_memory_summary = ""
|
1443
|
+
core_memory = ""
|
1441
1444
|
else:
|
1442
1445
|
# if no system message, fall back on agent's system prompt
|
1446
|
+
self.logger.info("No system message found in history, core_memory and external_memory_summary will not be loaded")
|
1443
1447
|
system_prompt = self.agent_state.system
|
1444
|
-
external_memory_summary =
|
1445
|
-
core_memory =
|
1448
|
+
external_memory_summary = ""
|
1449
|
+
core_memory = ""
|
1446
1450
|
|
1447
1451
|
num_tokens_system_coroutine = anthropic_client.count_tokens(model=model, messages=[{"role": "user", "content": system_prompt}])
|
1448
1452
|
num_tokens_core_memory_coroutine = (
|
letta/agents/base_agent.py
CHANGED
@@ -104,7 +104,7 @@ class BaseAgent(ABC):
|
|
104
104
|
if num_messages is None:
|
105
105
|
num_messages = await self.message_manager.size_async(actor=self.actor, agent_id=agent_state.id)
|
106
106
|
if num_archival_memories is None:
|
107
|
-
num_archival_memories = await self.passage_manager.
|
107
|
+
num_archival_memories = await self.passage_manager.agent_passage_size_async(actor=self.actor, agent_id=agent_state.id)
|
108
108
|
|
109
109
|
new_system_message_str = compile_system_message(
|
110
110
|
system_prompt=agent_state.system,
|
letta/agents/helpers.py
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
import uuid
|
2
2
|
import xml.etree.ElementTree as ET
|
3
|
-
from typing import List, Tuple
|
3
|
+
from typing import List, Optional, Tuple
|
4
4
|
|
5
5
|
from letta.schemas.agent import AgentState
|
6
|
+
from letta.schemas.letta_message import MessageType
|
6
7
|
from letta.schemas.letta_response import LettaResponse
|
7
8
|
from letta.schemas.message import Message, MessageCreate
|
8
9
|
from letta.schemas.usage import LettaUsageStatistics
|
@@ -12,16 +13,26 @@ from letta.services.message_manager import MessageManager
|
|
12
13
|
|
13
14
|
|
14
15
|
def _create_letta_response(
|
15
|
-
new_in_context_messages: list[Message],
|
16
|
+
new_in_context_messages: list[Message],
|
17
|
+
use_assistant_message: bool,
|
18
|
+
usage: LettaUsageStatistics,
|
19
|
+
include_return_message_types: Optional[List[MessageType]] = None,
|
16
20
|
) -> LettaResponse:
|
17
21
|
"""
|
18
22
|
Converts the newly created/persisted messages into a LettaResponse.
|
19
23
|
"""
|
20
24
|
# NOTE: hacky solution to avoid returning heartbeat messages and the original user message
|
21
25
|
filter_user_messages = [m for m in new_in_context_messages if m.role != "user"]
|
26
|
+
|
27
|
+
# Convert to Letta messages first
|
22
28
|
response_messages = Message.to_letta_messages_from_list(
|
23
29
|
messages=filter_user_messages, use_assistant_message=use_assistant_message, reverse=False
|
24
30
|
)
|
31
|
+
|
32
|
+
# Apply message type filtering if specified
|
33
|
+
if include_return_message_types is not None:
|
34
|
+
response_messages = [msg for msg in response_messages if msg.message_type in include_return_message_types]
|
35
|
+
|
25
36
|
return LettaResponse(messages=response_messages, usage=usage)
|
26
37
|
|
27
38
|
|
letta/agents/letta_agent.py
CHANGED
@@ -14,9 +14,9 @@ from letta.agents.helpers import (
|
|
14
14
|
_prepare_in_context_messages_no_persist_async,
|
15
15
|
generate_step_id,
|
16
16
|
)
|
17
|
-
from letta.errors import
|
17
|
+
from letta.errors import ContextWindowExceededError
|
18
18
|
from letta.helpers import ToolRulesSolver
|
19
|
-
from letta.helpers.datetime_helpers import get_utc_timestamp_ns
|
19
|
+
from letta.helpers.datetime_helpers import AsyncTimer, get_utc_timestamp_ns, ns_to_ms
|
20
20
|
from letta.helpers.tool_execution_helper import enable_strict_mode
|
21
21
|
from letta.interfaces.anthropic_streaming_interface import AnthropicStreamingInterface
|
22
22
|
from letta.interfaces.openai_streaming_interface import OpenAIStreamingInterface
|
@@ -25,8 +25,12 @@ from letta.llm_api.llm_client_base import LLMClientBase
|
|
25
25
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG
|
26
26
|
from letta.log import get_logger
|
27
27
|
from letta.orm.enums import ToolType
|
28
|
+
from letta.otel.context import get_ctx_attributes
|
29
|
+
from letta.otel.metric_registry import MetricRegistry
|
30
|
+
from letta.otel.tracing import log_event, trace_method, tracer
|
28
31
|
from letta.schemas.agent import AgentState
|
29
32
|
from letta.schemas.enums import MessageRole, MessageStreamStatus
|
33
|
+
from letta.schemas.letta_message import MessageType
|
30
34
|
from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
|
31
35
|
from letta.schemas.letta_response import LettaResponse
|
32
36
|
from letta.schemas.llm_config import LLMConfig
|
@@ -48,7 +52,7 @@ from letta.services.telemetry_manager import NoopTelemetryManager, TelemetryMana
|
|
48
52
|
from letta.services.tool_executor.tool_execution_manager import ToolExecutionManager
|
49
53
|
from letta.settings import model_settings
|
50
54
|
from letta.system import package_function_response
|
51
|
-
from letta.
|
55
|
+
from letta.types import JsonDict
|
52
56
|
from letta.utils import log_telemetry, validate_function_response
|
53
57
|
|
54
58
|
logger = get_logger(__name__)
|
@@ -118,6 +122,7 @@ class LettaAgent(BaseAgent):
|
|
118
122
|
max_steps: int = 10,
|
119
123
|
use_assistant_message: bool = True,
|
120
124
|
request_start_timestamp_ns: Optional[int] = None,
|
125
|
+
include_return_message_types: Optional[List[MessageType]] = None,
|
121
126
|
) -> LettaResponse:
|
122
127
|
agent_state = await self.agent_manager.get_agent_by_id_async(
|
123
128
|
agent_id=self.agent_id, include_relationships=["tools", "memory", "tool_exec_environment_variables"], actor=self.actor
|
@@ -129,7 +134,10 @@ class LettaAgent(BaseAgent):
|
|
129
134
|
request_start_timestamp_ns=request_start_timestamp_ns,
|
130
135
|
)
|
131
136
|
return _create_letta_response(
|
132
|
-
new_in_context_messages=new_in_context_messages,
|
137
|
+
new_in_context_messages=new_in_context_messages,
|
138
|
+
use_assistant_message=use_assistant_message,
|
139
|
+
usage=usage,
|
140
|
+
include_return_message_types=include_return_message_types,
|
133
141
|
)
|
134
142
|
|
135
143
|
@trace_method
|
@@ -139,6 +147,7 @@ class LettaAgent(BaseAgent):
|
|
139
147
|
max_steps: int = 10,
|
140
148
|
use_assistant_message: bool = True,
|
141
149
|
request_start_timestamp_ns: Optional[int] = None,
|
150
|
+
include_return_message_types: Optional[List[MessageType]] = None,
|
142
151
|
):
|
143
152
|
agent_state = await self.agent_manager.get_agent_by_id_async(
|
144
153
|
agent_id=self.agent_id, include_relationships=["tools", "memory", "tool_exec_environment_variables"], actor=self.actor
|
@@ -178,7 +187,7 @@ class LettaAgent(BaseAgent):
|
|
178
187
|
# log llm request time
|
179
188
|
now = get_utc_timestamp_ns()
|
180
189
|
llm_request_ns = now - step_start
|
181
|
-
agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ns
|
190
|
+
agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": ns_to_ms(llm_request_ns)})
|
182
191
|
|
183
192
|
response = llm_client.convert_response_to_chat_completion(response_data, in_context_messages, agent_state.llm_config)
|
184
193
|
|
@@ -210,7 +219,7 @@ class LettaAgent(BaseAgent):
|
|
210
219
|
# log LLM request time
|
211
220
|
now = get_utc_timestamp_ns()
|
212
221
|
llm_request_ns = now - step_start
|
213
|
-
agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ns
|
222
|
+
agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": ns_to_ms(llm_request_ns)})
|
214
223
|
|
215
224
|
persisted_messages, should_continue = await self._handle_ai_response(
|
216
225
|
tool_call,
|
@@ -227,7 +236,7 @@ class LettaAgent(BaseAgent):
|
|
227
236
|
# log step time
|
228
237
|
now = get_utc_timestamp_ns()
|
229
238
|
step_ns = now - step_start
|
230
|
-
agent_step_span.add_event(name="step_ms", attributes={"duration_ms": step_ns
|
239
|
+
agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
|
231
240
|
agent_step_span.end()
|
232
241
|
|
233
242
|
# Log LLM Trace
|
@@ -247,8 +256,12 @@ class LettaAgent(BaseAgent):
|
|
247
256
|
letta_messages = Message.to_letta_messages_from_list(
|
248
257
|
filter_user_messages, use_assistant_message=use_assistant_message, reverse=False
|
249
258
|
)
|
259
|
+
|
250
260
|
for message in letta_messages:
|
251
|
-
|
261
|
+
if not include_return_message_types:
|
262
|
+
yield f"data: {message.model_dump_json()}\n\n"
|
263
|
+
elif include_return_message_types and message.message_type in include_return_message_types:
|
264
|
+
yield f"data: {message.model_dump_json()}\n\n"
|
252
265
|
|
253
266
|
if not should_continue:
|
254
267
|
break
|
@@ -267,7 +280,7 @@ class LettaAgent(BaseAgent):
|
|
267
280
|
if request_start_timestamp_ns:
|
268
281
|
now = get_utc_timestamp_ns()
|
269
282
|
request_ns = now - request_start_timestamp_ns
|
270
|
-
request_span.add_event(name="letta_request_ms", attributes={"duration_ms": request_ns
|
283
|
+
request_span.add_event(name="letta_request_ms", attributes={"duration_ms": ns_to_ms(request_ns)})
|
271
284
|
request_span.end()
|
272
285
|
|
273
286
|
# Return back usage
|
@@ -321,7 +334,7 @@ class LettaAgent(BaseAgent):
|
|
321
334
|
# log LLM request time
|
322
335
|
now = get_utc_timestamp_ns()
|
323
336
|
llm_request_ns = now - step_start
|
324
|
-
agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ns
|
337
|
+
agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": ns_to_ms(llm_request_ns)})
|
325
338
|
|
326
339
|
# TODO: add run_id
|
327
340
|
usage.step_count += 1
|
@@ -363,7 +376,7 @@ class LettaAgent(BaseAgent):
|
|
363
376
|
# log step time
|
364
377
|
now = get_utc_timestamp_ns()
|
365
378
|
step_ns = now - step_start
|
366
|
-
agent_step_span.add_event(name="step_ms", attributes={"duration_ms": step_ns
|
379
|
+
agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
|
367
380
|
agent_step_span.end()
|
368
381
|
|
369
382
|
# Log LLM Trace
|
@@ -384,7 +397,7 @@ class LettaAgent(BaseAgent):
|
|
384
397
|
if request_start_timestamp_ns:
|
385
398
|
now = get_utc_timestamp_ns()
|
386
399
|
request_ns = now - request_start_timestamp_ns
|
387
|
-
request_span.add_event(name="request_ms", attributes={"duration_ms": request_ns
|
400
|
+
request_span.add_event(name="request_ms", attributes={"duration_ms": ns_to_ms(request_ns)})
|
388
401
|
request_span.end()
|
389
402
|
|
390
403
|
# Extend the in context message ids
|
@@ -406,6 +419,7 @@ class LettaAgent(BaseAgent):
|
|
406
419
|
max_steps: int = 10,
|
407
420
|
use_assistant_message: bool = True,
|
408
421
|
request_start_timestamp_ns: Optional[int] = None,
|
422
|
+
include_return_message_types: Optional[List[MessageType]] = None,
|
409
423
|
) -> AsyncGenerator[str, None]:
|
410
424
|
"""
|
411
425
|
Carries out an invocation of the agent loop in a streaming fashion that yields partial tokens.
|
@@ -480,16 +494,24 @@ class LettaAgent(BaseAgent):
|
|
480
494
|
if first_chunk and request_span is not None:
|
481
495
|
now = get_utc_timestamp_ns()
|
482
496
|
ttft_ns = now - request_start_timestamp_ns
|
483
|
-
request_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ttft_ns
|
497
|
+
request_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ns_to_ms(ttft_ns)})
|
484
498
|
first_chunk = False
|
485
499
|
|
486
|
-
|
500
|
+
if include_return_message_types is None:
|
501
|
+
# return all data
|
502
|
+
yield f"data: {chunk.model_dump_json()}\n\n"
|
503
|
+
elif include_return_message_types and chunk.message_type in include_return_message_types:
|
504
|
+
# filter down returned data
|
505
|
+
yield f"data: {chunk.model_dump_json()}\n\n"
|
487
506
|
|
488
507
|
# update usage
|
489
508
|
usage.step_count += 1
|
490
509
|
usage.completion_tokens += interface.output_tokens
|
491
510
|
usage.prompt_tokens += interface.input_tokens
|
492
511
|
usage.total_tokens += interface.input_tokens + interface.output_tokens
|
512
|
+
MetricRegistry().message_output_tokens.record(
|
513
|
+
interface.output_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
|
514
|
+
)
|
493
515
|
|
494
516
|
# Persist input messages if not already
|
495
517
|
# Special strategy to lower TTFT
|
@@ -500,7 +522,7 @@ class LettaAgent(BaseAgent):
|
|
500
522
|
# log LLM request time
|
501
523
|
now = get_utc_timestamp_ns()
|
502
524
|
llm_request_ns = now - step_start
|
503
|
-
agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ns
|
525
|
+
agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": ns_to_ms(llm_request_ns)})
|
504
526
|
|
505
527
|
# Process resulting stream content
|
506
528
|
tool_call = interface.get_tool_call_object()
|
@@ -515,8 +537,7 @@ class LettaAgent(BaseAgent):
|
|
515
537
|
total_tokens=interface.input_tokens + interface.output_tokens,
|
516
538
|
),
|
517
539
|
reasoning_content=reasoning_content,
|
518
|
-
pre_computed_assistant_message_id=interface.
|
519
|
-
pre_computed_tool_message_id=interface.letta_tool_message_id,
|
540
|
+
pre_computed_assistant_message_id=interface.letta_message_id,
|
520
541
|
step_id=step_id,
|
521
542
|
agent_step_span=agent_step_span,
|
522
543
|
)
|
@@ -526,7 +547,7 @@ class LettaAgent(BaseAgent):
|
|
526
547
|
# log total step time
|
527
548
|
now = get_utc_timestamp_ns()
|
528
549
|
step_ns = now - step_start
|
529
|
-
agent_step_span.add_event(name="step_ms", attributes={"duration_ms": step_ns
|
550
|
+
agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
|
530
551
|
agent_step_span.end()
|
531
552
|
|
532
553
|
# TODO (cliandy): the stream POST request span has ended at this point, we should tie this to the stream
|
@@ -556,9 +577,11 @@ class LettaAgent(BaseAgent):
|
|
556
577
|
),
|
557
578
|
)
|
558
579
|
|
559
|
-
if
|
560
|
-
|
561
|
-
|
580
|
+
tool_return = [msg for msg in persisted_messages if msg.role == "tool"][-1].to_letta_messages()[0]
|
581
|
+
if not (use_assistant_message and tool_return.name == "send_message"):
|
582
|
+
# Apply message type filtering if specified
|
583
|
+
if include_return_message_types is None or tool_return.message_type in include_return_message_types:
|
584
|
+
yield f"data: {tool_return.model_dump_json()}\n\n"
|
562
585
|
|
563
586
|
if not should_continue:
|
564
587
|
break
|
@@ -577,7 +600,7 @@ class LettaAgent(BaseAgent):
|
|
577
600
|
if request_start_timestamp_ns:
|
578
601
|
now = get_utc_timestamp_ns()
|
579
602
|
request_ns = now - request_start_timestamp_ns
|
580
|
-
request_span.add_event(name="letta_request_ms", attributes={"duration_ms": request_ns
|
603
|
+
request_span.add_event(name="letta_request_ms", attributes={"duration_ms": ns_to_ms(request_ns)})
|
581
604
|
request_span.end()
|
582
605
|
|
583
606
|
# TODO: Also yield out a letta usage stats SSE
|
@@ -604,10 +627,16 @@ class LettaAgent(BaseAgent):
|
|
604
627
|
)
|
605
628
|
log_event("agent.stream_no_tokens.llm_request.created")
|
606
629
|
|
630
|
+
async with AsyncTimer() as timer:
|
631
|
+
response = await llm_client.request_async(request_data, agent_state.llm_config)
|
632
|
+
MetricRegistry().llm_execution_time_ms_histogram.record(
|
633
|
+
timer.elapsed_ms,
|
634
|
+
dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model}),
|
635
|
+
)
|
607
636
|
# Attempt LLM request
|
608
637
|
return (
|
609
638
|
request_data,
|
610
|
-
|
639
|
+
response,
|
611
640
|
current_in_context_messages,
|
612
641
|
new_in_context_messages,
|
613
642
|
)
|
@@ -654,9 +683,7 @@ class LettaAgent(BaseAgent):
|
|
654
683
|
if first_chunk and ttft_span is not None:
|
655
684
|
provider_request_start_timestamp_ns = get_utc_timestamp_ns()
|
656
685
|
provider_req_start_ns = provider_request_start_timestamp_ns - request_start_timestamp_ns
|
657
|
-
ttft_span.add_event(
|
658
|
-
name="provider_req_start_ns", attributes={"provider_req_start_ms": provider_req_start_ns // 1_000_000}
|
659
|
-
)
|
686
|
+
ttft_span.add_event(name="provider_req_start_ns", attributes={"provider_req_start_ms": ns_to_ms(provider_req_start_ns)})
|
660
687
|
|
661
688
|
# Attempt LLM request
|
662
689
|
return (
|
@@ -692,7 +719,7 @@ class LettaAgent(BaseAgent):
|
|
692
719
|
llm_config: LLMConfig,
|
693
720
|
force: bool,
|
694
721
|
) -> List[Message]:
|
695
|
-
if isinstance(e,
|
722
|
+
if isinstance(e, ContextWindowExceededError):
|
696
723
|
return await self._rebuild_context_window(
|
697
724
|
in_context_messages=in_context_messages, new_letta_messages=new_letta_messages, llm_config=llm_config, force=force
|
698
725
|
)
|
@@ -754,7 +781,7 @@ class LettaAgent(BaseAgent):
|
|
754
781
|
else asyncio.sleep(0, result=self.num_messages)
|
755
782
|
),
|
756
783
|
(
|
757
|
-
self.passage_manager.
|
784
|
+
self.passage_manager.agent_passage_size_async(actor=self.actor, agent_id=agent_state.id)
|
758
785
|
if self.num_archival_memories is None
|
759
786
|
else asyncio.sleep(0, result=self.num_archival_memories)
|
760
787
|
),
|
@@ -775,6 +802,7 @@ class LettaAgent(BaseAgent):
|
|
775
802
|
ToolType.LETTA_SLEEPTIME_CORE,
|
776
803
|
ToolType.LETTA_VOICE_SLEEPTIME_CORE,
|
777
804
|
ToolType.LETTA_BUILTIN,
|
805
|
+
ToolType.LETTA_FILES_CORE,
|
778
806
|
ToolType.EXTERNAL_COMPOSIO,
|
779
807
|
ToolType.EXTERNAL_MCP,
|
780
808
|
}
|
@@ -810,7 +838,6 @@ class LettaAgent(BaseAgent):
|
|
810
838
|
usage: UsageStatistics,
|
811
839
|
reasoning_content: Optional[List[Union[TextContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent]]] = None,
|
812
840
|
pre_computed_assistant_message_id: Optional[str] = None,
|
813
|
-
pre_computed_tool_message_id: Optional[str] = None,
|
814
841
|
step_id: str | None = None,
|
815
842
|
new_in_context_messages: Optional[List[Message]] = None,
|
816
843
|
agent_step_span: Optional["Span"] = None,
|
@@ -822,6 +849,9 @@ class LettaAgent(BaseAgent):
|
|
822
849
|
"""
|
823
850
|
tool_call_name = tool_call.function.name
|
824
851
|
tool_call_args_str = tool_call.function.arguments
|
852
|
+
# Temp hack to gracefully handle parallel tool calling attempt, only take first one
|
853
|
+
if "}{" in tool_call_args_str:
|
854
|
+
tool_call_args_str = tool_call_args_str.split("}{", 1)[0] + "}"
|
825
855
|
|
826
856
|
try:
|
827
857
|
tool_args = json.loads(tool_call_args_str)
|
@@ -859,6 +889,7 @@ class LettaAgent(BaseAgent):
|
|
859
889
|
tool_args=tool_args,
|
860
890
|
agent_state=agent_state,
|
861
891
|
agent_step_span=agent_step_span,
|
892
|
+
step_id=step_id,
|
862
893
|
)
|
863
894
|
log_telemetry(
|
864
895
|
self.logger, "_handle_ai_response execute tool finish", tool_execution_result=tool_execution_result, tool_call_id=tool_call_id
|
@@ -926,7 +957,6 @@ class LettaAgent(BaseAgent):
|
|
926
957
|
add_heartbeat_request_system_message=continue_stepping,
|
927
958
|
reasoning_content=reasoning_content,
|
928
959
|
pre_computed_assistant_message_id=pre_computed_assistant_message_id,
|
929
|
-
pre_computed_tool_message_id=pre_computed_tool_message_id,
|
930
960
|
step_id=logged_step.id if logged_step else None, # TODO (cliandy): eventually move over other agent loops
|
931
961
|
)
|
932
962
|
|
@@ -937,10 +967,15 @@ class LettaAgent(BaseAgent):
|
|
937
967
|
|
938
968
|
@trace_method
|
939
969
|
async def _execute_tool(
|
940
|
-
self,
|
970
|
+
self,
|
971
|
+
tool_name: str,
|
972
|
+
tool_args: JsonDict,
|
973
|
+
agent_state: AgentState,
|
974
|
+
agent_step_span: Optional["Span"] = None,
|
975
|
+
step_id: str | None = None,
|
941
976
|
) -> "ToolExecutionResult":
|
942
977
|
"""
|
943
|
-
Executes a tool and returns
|
978
|
+
Executes a tool and returns the ToolExecutionResult.
|
944
979
|
"""
|
945
980
|
from letta.schemas.tool_execution_result import ToolExecutionResult
|
946
981
|
|
@@ -972,7 +1007,10 @@ class LettaAgent(BaseAgent):
|
|
972
1007
|
# TODO: Integrate sandbox result
|
973
1008
|
log_event(name=f"start_{tool_name}_execution", attributes=tool_args)
|
974
1009
|
tool_execution_result = await tool_execution_manager.execute_tool_async(
|
975
|
-
function_name=tool_name,
|
1010
|
+
function_name=tool_name,
|
1011
|
+
function_args=tool_args,
|
1012
|
+
tool=target_tool,
|
1013
|
+
step_id=step_id,
|
976
1014
|
)
|
977
1015
|
if agent_step_span:
|
978
1016
|
end_time = get_utc_timestamp_ns()
|
@@ -980,7 +1018,7 @@ class LettaAgent(BaseAgent):
|
|
980
1018
|
name="tool_execution_completed",
|
981
1019
|
attributes={
|
982
1020
|
"tool_name": target_tool.name,
|
983
|
-
"duration_ms": (end_time - start_time)
|
1021
|
+
"duration_ms": ns_to_ms((end_time - start_time)),
|
984
1022
|
"success": tool_execution_result.success_flag,
|
985
1023
|
"tool_type": target_tool.tool_type,
|
986
1024
|
"tool_id": target_tool.id,
|
@@ -16,6 +16,7 @@ from letta.llm_api.llm_client import LLMClient
|
|
16
16
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG
|
17
17
|
from letta.log import get_logger
|
18
18
|
from letta.orm.enums import ToolType
|
19
|
+
from letta.otel.tracing import log_event, trace_method
|
19
20
|
from letta.schemas.agent import AgentState, AgentStepState
|
20
21
|
from letta.schemas.enums import AgentStepStatus, JobStatus, MessageStreamStatus, ProviderType
|
21
22
|
from letta.schemas.job import JobUpdate
|
@@ -39,7 +40,6 @@ from letta.services.passage_manager import PassageManager
|
|
39
40
|
from letta.services.sandbox_config_manager import SandboxConfigManager
|
40
41
|
from letta.services.tool_executor.tool_execution_manager import ToolExecutionManager
|
41
42
|
from letta.settings import tool_settings
|
42
|
-
from letta.tracing import log_event, trace_method
|
43
43
|
|
44
44
|
logger = get_logger(__name__)
|
45
45
|
|
@@ -551,7 +551,6 @@ class LettaAgentBatch(BaseAgent):
|
|
551
551
|
add_heartbeat_request_system_message=False,
|
552
552
|
reasoning_content=reasoning_content,
|
553
553
|
pre_computed_assistant_message_id=None,
|
554
|
-
pre_computed_tool_message_id=None,
|
555
554
|
llm_batch_item_id=llm_batch_item_id,
|
556
555
|
)
|
557
556
|
|
letta/agents/voice_agent.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
import asyncio
|
1
2
|
import json
|
2
3
|
import uuid
|
3
4
|
from datetime import datetime, timedelta, timezone
|
@@ -81,8 +82,8 @@ class VoiceAgent(BaseAgent):
|
|
81
82
|
self.summary_block_label = "human"
|
82
83
|
|
83
84
|
# Cached archival memory/message size
|
84
|
-
self.num_messages =
|
85
|
-
self.num_archival_memories =
|
85
|
+
self.num_messages = None
|
86
|
+
self.num_archival_memories = None
|
86
87
|
|
87
88
|
def init_summarizer(self, agent_state: AgentState) -> Summarizer:
|
88
89
|
if not agent_state.multi_agent_group:
|
@@ -118,13 +119,12 @@ class VoiceAgent(BaseAgent):
|
|
118
119
|
Main streaming loop that yields partial tokens.
|
119
120
|
Whenever we detect a tool call, we yield from _handle_ai_response as well.
|
120
121
|
"""
|
121
|
-
print("CALL STREAM")
|
122
122
|
if len(input_messages) != 1 or input_messages[0].role != MessageRole.user:
|
123
123
|
raise ValueError(f"Voice Agent was invoked with multiple input messages or message did not have role `user`: {input_messages}")
|
124
124
|
|
125
125
|
user_query = input_messages[0].content[0].text
|
126
126
|
|
127
|
-
agent_state = self.agent_manager.
|
127
|
+
agent_state = await self.agent_manager.get_agent_by_id_async(self.agent_id, actor=self.actor)
|
128
128
|
|
129
129
|
# TODO: Refactor this so it uses our in-house clients
|
130
130
|
# TODO: For now, piggyback off of OpenAI client for ease
|
@@ -140,7 +140,7 @@ class VoiceAgent(BaseAgent):
|
|
140
140
|
|
141
141
|
summarizer = self.init_summarizer(agent_state=agent_state)
|
142
142
|
|
143
|
-
in_context_messages = self.message_manager.
|
143
|
+
in_context_messages = await self.message_manager.get_messages_by_ids_async(message_ids=agent_state.message_ids, actor=self.actor)
|
144
144
|
memory_edit_timestamp = get_utc_time()
|
145
145
|
in_context_messages[0].content[0].text = compile_system_message(
|
146
146
|
system_prompt=agent_state.system,
|
@@ -183,10 +183,6 @@ class VoiceAgent(BaseAgent):
|
|
183
183
|
# Rebuild context window if desired
|
184
184
|
await self._rebuild_context_window(summarizer, in_context_messages, letta_message_db_queue)
|
185
185
|
|
186
|
-
# TODO: This may be out of sync, if in between steps users add files
|
187
|
-
self.num_messages = self.message_manager.size(actor=self.actor, agent_id=agent_state.id)
|
188
|
-
self.num_archival_memories = self.passage_manager.size(actor=self.actor, agent_id=agent_state.id)
|
189
|
-
|
190
186
|
yield "data: [DONE]\n\n"
|
191
187
|
|
192
188
|
async def _handle_ai_response(
|
@@ -286,14 +282,14 @@ class VoiceAgent(BaseAgent):
|
|
286
282
|
async def _rebuild_context_window(
|
287
283
|
self, summarizer: Summarizer, in_context_messages: List[Message], letta_message_db_queue: List[Message]
|
288
284
|
) -> None:
|
289
|
-
new_letta_messages = self.message_manager.
|
285
|
+
new_letta_messages = await self.message_manager.create_many_messages_async(letta_message_db_queue, actor=self.actor)
|
290
286
|
|
291
287
|
# TODO: Make this more general and configurable, less brittle
|
292
288
|
new_in_context_messages, updated = summarizer.summarize(
|
293
289
|
in_context_messages=in_context_messages, new_letta_messages=new_letta_messages
|
294
290
|
)
|
295
291
|
|
296
|
-
self.agent_manager.
|
292
|
+
await self.agent_manager.set_in_context_messages_async(
|
297
293
|
agent_id=self.agent_id, message_ids=[m.id for m in new_in_context_messages], actor=self.actor
|
298
294
|
)
|
299
295
|
|
@@ -301,9 +297,19 @@ class VoiceAgent(BaseAgent):
|
|
301
297
|
self,
|
302
298
|
in_context_messages: List[Message],
|
303
299
|
agent_state: AgentState,
|
304
|
-
num_messages: int | None = None,
|
305
|
-
num_archival_memories: int | None = None,
|
306
300
|
) -> List[Message]:
|
301
|
+
self.num_messages, self.num_archival_memories = await asyncio.gather(
|
302
|
+
(
|
303
|
+
self.message_manager.size_async(actor=self.actor, agent_id=agent_state.id)
|
304
|
+
if self.num_messages is None
|
305
|
+
else asyncio.sleep(0, result=self.num_messages)
|
306
|
+
),
|
307
|
+
(
|
308
|
+
self.passage_manager.agent_passage_size_async(actor=self.actor, agent_id=agent_state.id)
|
309
|
+
if self.num_archival_memories is None
|
310
|
+
else asyncio.sleep(0, result=self.num_archival_memories)
|
311
|
+
),
|
312
|
+
)
|
307
313
|
return await super()._rebuild_memory_async(
|
308
314
|
in_context_messages, agent_state, num_messages=self.num_messages, num_archival_memories=self.num_archival_memories
|
309
315
|
)
|