letta-nightly 0.8.0.dev20250606195656__py3-none-any.whl → 0.8.3.dev20250607000559__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +16 -12
  3. letta/agents/base_agent.py +1 -1
  4. letta/agents/helpers.py +13 -2
  5. letta/agents/letta_agent.py +72 -34
  6. letta/agents/letta_agent_batch.py +1 -2
  7. letta/agents/voice_agent.py +19 -13
  8. letta/agents/voice_sleeptime_agent.py +23 -6
  9. letta/constants.py +18 -0
  10. letta/data_sources/__init__.py +0 -0
  11. letta/data_sources/redis_client.py +282 -0
  12. letta/errors.py +0 -4
  13. letta/functions/function_sets/files.py +58 -0
  14. letta/functions/schema_generator.py +18 -1
  15. letta/groups/sleeptime_multi_agent_v2.py +13 -3
  16. letta/helpers/datetime_helpers.py +47 -3
  17. letta/helpers/decorators.py +69 -0
  18. letta/{services/helpers/noop_helper.py → helpers/singleton.py} +5 -0
  19. letta/interfaces/anthropic_streaming_interface.py +43 -24
  20. letta/interfaces/openai_streaming_interface.py +21 -19
  21. letta/llm_api/anthropic.py +1 -1
  22. letta/llm_api/anthropic_client.py +30 -16
  23. letta/llm_api/google_vertex_client.py +1 -1
  24. letta/llm_api/helpers.py +36 -30
  25. letta/llm_api/llm_api_tools.py +1 -1
  26. letta/llm_api/llm_client_base.py +29 -1
  27. letta/llm_api/openai.py +1 -1
  28. letta/llm_api/openai_client.py +6 -8
  29. letta/local_llm/chat_completion_proxy.py +1 -1
  30. letta/memory.py +1 -1
  31. letta/orm/enums.py +1 -0
  32. letta/orm/file.py +80 -3
  33. letta/orm/files_agents.py +13 -0
  34. letta/orm/passage.py +2 -0
  35. letta/orm/sqlalchemy_base.py +34 -11
  36. letta/otel/__init__.py +0 -0
  37. letta/otel/context.py +25 -0
  38. letta/otel/events.py +0 -0
  39. letta/otel/metric_registry.py +122 -0
  40. letta/otel/metrics.py +66 -0
  41. letta/otel/resource.py +26 -0
  42. letta/{tracing.py → otel/tracing.py} +55 -78
  43. letta/plugins/README.md +22 -0
  44. letta/plugins/__init__.py +0 -0
  45. letta/plugins/defaults.py +11 -0
  46. letta/plugins/plugins.py +72 -0
  47. letta/schemas/enums.py +8 -0
  48. letta/schemas/file.py +12 -0
  49. letta/schemas/letta_request.py +6 -0
  50. letta/schemas/passage.py +1 -0
  51. letta/schemas/tool.py +4 -0
  52. letta/server/db.py +7 -7
  53. letta/server/rest_api/app.py +8 -6
  54. letta/server/rest_api/routers/v1/agents.py +46 -37
  55. letta/server/rest_api/routers/v1/groups.py +3 -3
  56. letta/server/rest_api/routers/v1/sources.py +26 -3
  57. letta/server/rest_api/routers/v1/tools.py +7 -2
  58. letta/server/rest_api/utils.py +9 -6
  59. letta/server/server.py +25 -13
  60. letta/services/agent_manager.py +186 -194
  61. letta/services/block_manager.py +1 -1
  62. letta/services/context_window_calculator/context_window_calculator.py +1 -1
  63. letta/services/context_window_calculator/token_counter.py +3 -2
  64. letta/services/file_processor/chunker/line_chunker.py +34 -0
  65. letta/services/file_processor/file_processor.py +43 -12
  66. letta/services/file_processor/parser/mistral_parser.py +11 -1
  67. letta/services/files_agents_manager.py +96 -7
  68. letta/services/group_manager.py +6 -6
  69. letta/services/helpers/agent_manager_helper.py +404 -3
  70. letta/services/identity_manager.py +1 -1
  71. letta/services/job_manager.py +1 -1
  72. letta/services/llm_batch_manager.py +1 -1
  73. letta/services/mcp/stdio_client.py +5 -1
  74. letta/services/mcp_manager.py +4 -4
  75. letta/services/message_manager.py +1 -1
  76. letta/services/organization_manager.py +1 -1
  77. letta/services/passage_manager.py +604 -19
  78. letta/services/per_agent_lock_manager.py +1 -1
  79. letta/services/provider_manager.py +1 -1
  80. letta/services/sandbox_config_manager.py +1 -1
  81. letta/services/source_manager.py +178 -19
  82. letta/services/step_manager.py +2 -2
  83. letta/services/summarizer/summarizer.py +1 -1
  84. letta/services/telemetry_manager.py +1 -1
  85. letta/services/tool_executor/builtin_tool_executor.py +117 -0
  86. letta/services/tool_executor/composio_tool_executor.py +53 -0
  87. letta/services/tool_executor/core_tool_executor.py +474 -0
  88. letta/services/tool_executor/files_tool_executor.py +138 -0
  89. letta/services/tool_executor/mcp_tool_executor.py +45 -0
  90. letta/services/tool_executor/multi_agent_tool_executor.py +123 -0
  91. letta/services/tool_executor/tool_execution_manager.py +34 -14
  92. letta/services/tool_executor/tool_execution_sandbox.py +1 -1
  93. letta/services/tool_executor/tool_executor.py +3 -802
  94. letta/services/tool_executor/tool_executor_base.py +43 -0
  95. letta/services/tool_manager.py +55 -59
  96. letta/services/tool_sandbox/e2b_sandbox.py +1 -1
  97. letta/services/tool_sandbox/local_sandbox.py +6 -3
  98. letta/services/user_manager.py +6 -3
  99. letta/settings.py +23 -2
  100. letta/utils.py +7 -2
  101. {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.3.dev20250607000559.dist-info}/METADATA +4 -2
  102. {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.3.dev20250607000559.dist-info}/RECORD +105 -83
  103. {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.3.dev20250607000559.dist-info}/LICENSE +0 -0
  104. {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.3.dev20250607000559.dist-info}/WHEEL +0 -0
  105. {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.3.dev20250607000559.dist-info}/entry_points.txt +0 -0
letta/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import os
2
2
 
3
- __version__ = "0.8.0"
3
+ __version__ = "0.8.3"
4
4
 
5
5
  if os.environ.get("LETTA_VERSION"):
6
6
  __version__ = os.environ["LETTA_VERSION"]
letta/agent.py CHANGED
@@ -41,6 +41,7 @@ from letta.log import get_logger
41
41
  from letta.memory import summarize_messages
42
42
  from letta.orm import User
43
43
  from letta.orm.enums import ToolType
44
+ from letta.otel.tracing import log_event, trace_method
44
45
  from letta.schemas.agent import AgentState, AgentStepResponse, UpdateAgent, get_prompt_template_for_agent_type
45
46
  from letta.schemas.block import BlockUpdate
46
47
  from letta.schemas.embedding_config import EmbeddingConfig
@@ -69,10 +70,9 @@ from letta.services.step_manager import StepManager
69
70
  from letta.services.telemetry_manager import NoopTelemetryManager, TelemetryManager
70
71
  from letta.services.tool_executor.tool_execution_sandbox import ToolExecutionSandbox
71
72
  from letta.services.tool_manager import ToolManager
72
- from letta.settings import settings, summarizer_settings, model_settings
73
+ from letta.settings import settings, summarizer_settings
73
74
  from letta.streaming_interface import StreamingRefreshCLIInterface
74
75
  from letta.system import get_heartbeat, get_token_limit_warning, package_function_response, package_summarize_message, package_user_message
75
- from letta.tracing import log_event, trace_method
76
76
  from letta.utils import count_tokens, get_friendly_error_msg, get_tool_call_id, log_telemetry, parse_json, validate_function_response
77
77
 
78
78
  logger = get_logger(__name__)
@@ -503,7 +503,7 @@ class Agent(BaseAgent):
503
503
  response_message.function_call if response_message.function_call is not None else response_message.tool_calls[0].function
504
504
  )
505
505
  function_name = function_call.name
506
- self.logger.debug(f"Request to call function {function_name} with tool_call_id: {tool_call_id}")
506
+ self.logger.info(f"Request to call function {function_name} with tool_call_id: {tool_call_id}")
507
507
 
508
508
  # Failure case 1: function name is wrong (not in agent_state.tools)
509
509
  target_letta_tool = None
@@ -1282,7 +1282,7 @@ class Agent(BaseAgent):
1282
1282
  )
1283
1283
 
1284
1284
  async def get_context_window_async(self) -> ContextWindowOverview:
1285
- if os.getenv("LETTA_ENVIRONMENT") == "PRODUCTION" and model_settings.anthropic_api_key is not None:
1285
+ if os.getenv("LETTA_ENVIRONMENT") == "PRODUCTION":
1286
1286
  return await self.get_context_window_from_anthropic_async()
1287
1287
  return await self.get_context_window_from_tiktoken_async()
1288
1288
 
@@ -1291,8 +1291,8 @@ class Agent(BaseAgent):
1291
1291
  # Grab the in-context messages
1292
1292
  # conversion of messages to OpenAI dict format, which is passed to the token counter
1293
1293
  (in_context_messages, passage_manager_size, message_manager_size) = await asyncio.gather(
1294
- self.agent_manager.get_in_context_messages_async(agent_id=self.agent_state.id, actor=self.user),
1295
- self.passage_manager.size_async(actor=self.user, agent_id=self.agent_state.id),
1294
+ self.message_manager.get_messages_by_ids_async(message_ids=self.agent_state.message_ids, actor=self.user),
1295
+ self.passage_manager.agent_passage_size_async(actor=self.user, agent_id=self.agent_state.id),
1296
1296
  self.message_manager.size_async(actor=self.user, agent_id=self.agent_state.id),
1297
1297
  )
1298
1298
  in_context_messages_openai = [m.to_openai_dict() for m in in_context_messages]
@@ -1315,11 +1315,13 @@ class Agent(BaseAgent):
1315
1315
  core_memory = system_message[core_memory_marker_pos:].strip()
1316
1316
  else:
1317
1317
  # if no markers found, put everything in system message
1318
+ self.logger.info("No markers found in system message, core_memory and external_memory_summary will not be loaded")
1318
1319
  system_prompt = system_message
1319
1320
  external_memory_summary = ""
1320
1321
  core_memory = ""
1321
1322
  else:
1322
1323
  # if no system message, fall back on agent's system prompt
1324
+ self.logger.info("No system message found in history, core_memory and external_memory_summary will not be loaded")
1323
1325
  system_prompt = self.agent_state.system
1324
1326
  external_memory_summary = ""
1325
1327
  core_memory = ""
@@ -1411,8 +1413,8 @@ class Agent(BaseAgent):
1411
1413
  # Grab the in-context messages
1412
1414
  # conversion of messages to anthropic dict format, which is passed to the token counter
1413
1415
  (in_context_messages, passage_manager_size, message_manager_size) = await asyncio.gather(
1414
- self.agent_manager.get_in_context_messages_async(agent_id=self.agent_state.id, actor=self.user),
1415
- self.passage_manager.size_async(actor=self.user, agent_id=self.agent_state.id),
1416
+ self.message_manager.get_messages_by_ids_async(message_ids=self.agent_state.message_ids, actor=self.user),
1417
+ self.passage_manager.agent_passage_size_async(actor=self.user, agent_id=self.agent_state.id),
1416
1418
  self.message_manager.size_async(actor=self.user, agent_id=self.agent_state.id),
1417
1419
  )
1418
1420
  in_context_messages_anthropic = [m.to_anthropic_dict() for m in in_context_messages]
@@ -1435,14 +1437,16 @@ class Agent(BaseAgent):
1435
1437
  core_memory = system_message[core_memory_marker_pos:].strip()
1436
1438
  else:
1437
1439
  # if no markers found, put everything in system message
1440
+ self.logger.info("No markers found in system message, core_memory and external_memory_summary will not be loaded")
1438
1441
  system_prompt = system_message
1439
- external_memory_summary = None
1440
- core_memory = None
1442
+ external_memory_summary = ""
1443
+ core_memory = ""
1441
1444
  else:
1442
1445
  # if no system message, fall back on agent's system prompt
1446
+ self.logger.info("No system message found in history, core_memory and external_memory_summary will not be loaded")
1443
1447
  system_prompt = self.agent_state.system
1444
- external_memory_summary = None
1445
- core_memory = None
1448
+ external_memory_summary = ""
1449
+ core_memory = ""
1446
1450
 
1447
1451
  num_tokens_system_coroutine = anthropic_client.count_tokens(model=model, messages=[{"role": "user", "content": system_prompt}])
1448
1452
  num_tokens_core_memory_coroutine = (
@@ -104,7 +104,7 @@ class BaseAgent(ABC):
104
104
  if num_messages is None:
105
105
  num_messages = await self.message_manager.size_async(actor=self.actor, agent_id=agent_state.id)
106
106
  if num_archival_memories is None:
107
- num_archival_memories = await self.passage_manager.size_async(actor=self.actor, agent_id=agent_state.id)
107
+ num_archival_memories = await self.passage_manager.agent_passage_size_async(actor=self.actor, agent_id=agent_state.id)
108
108
 
109
109
  new_system_message_str = compile_system_message(
110
110
  system_prompt=agent_state.system,
letta/agents/helpers.py CHANGED
@@ -1,8 +1,9 @@
1
1
  import uuid
2
2
  import xml.etree.ElementTree as ET
3
- from typing import List, Tuple
3
+ from typing import List, Optional, Tuple
4
4
 
5
5
  from letta.schemas.agent import AgentState
6
+ from letta.schemas.letta_message import MessageType
6
7
  from letta.schemas.letta_response import LettaResponse
7
8
  from letta.schemas.message import Message, MessageCreate
8
9
  from letta.schemas.usage import LettaUsageStatistics
@@ -12,16 +13,26 @@ from letta.services.message_manager import MessageManager
12
13
 
13
14
 
14
15
  def _create_letta_response(
15
- new_in_context_messages: list[Message], use_assistant_message: bool, usage: LettaUsageStatistics
16
+ new_in_context_messages: list[Message],
17
+ use_assistant_message: bool,
18
+ usage: LettaUsageStatistics,
19
+ include_return_message_types: Optional[List[MessageType]] = None,
16
20
  ) -> LettaResponse:
17
21
  """
18
22
  Converts the newly created/persisted messages into a LettaResponse.
19
23
  """
20
24
  # NOTE: hacky solution to avoid returning heartbeat messages and the original user message
21
25
  filter_user_messages = [m for m in new_in_context_messages if m.role != "user"]
26
+
27
+ # Convert to Letta messages first
22
28
  response_messages = Message.to_letta_messages_from_list(
23
29
  messages=filter_user_messages, use_assistant_message=use_assistant_message, reverse=False
24
30
  )
31
+
32
+ # Apply message type filtering if specified
33
+ if include_return_message_types is not None:
34
+ response_messages = [msg for msg in response_messages if msg.message_type in include_return_message_types]
35
+
25
36
  return LettaResponse(messages=response_messages, usage=usage)
26
37
 
27
38
 
@@ -14,9 +14,9 @@ from letta.agents.helpers import (
14
14
  _prepare_in_context_messages_no_persist_async,
15
15
  generate_step_id,
16
16
  )
17
- from letta.errors import LLMContextWindowExceededError
17
+ from letta.errors import ContextWindowExceededError
18
18
  from letta.helpers import ToolRulesSolver
19
- from letta.helpers.datetime_helpers import get_utc_timestamp_ns
19
+ from letta.helpers.datetime_helpers import AsyncTimer, get_utc_timestamp_ns, ns_to_ms
20
20
  from letta.helpers.tool_execution_helper import enable_strict_mode
21
21
  from letta.interfaces.anthropic_streaming_interface import AnthropicStreamingInterface
22
22
  from letta.interfaces.openai_streaming_interface import OpenAIStreamingInterface
@@ -25,8 +25,12 @@ from letta.llm_api.llm_client_base import LLMClientBase
25
25
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG
26
26
  from letta.log import get_logger
27
27
  from letta.orm.enums import ToolType
28
+ from letta.otel.context import get_ctx_attributes
29
+ from letta.otel.metric_registry import MetricRegistry
30
+ from letta.otel.tracing import log_event, trace_method, tracer
28
31
  from letta.schemas.agent import AgentState
29
32
  from letta.schemas.enums import MessageRole, MessageStreamStatus
33
+ from letta.schemas.letta_message import MessageType
30
34
  from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
31
35
  from letta.schemas.letta_response import LettaResponse
32
36
  from letta.schemas.llm_config import LLMConfig
@@ -48,7 +52,7 @@ from letta.services.telemetry_manager import NoopTelemetryManager, TelemetryMana
48
52
  from letta.services.tool_executor.tool_execution_manager import ToolExecutionManager
49
53
  from letta.settings import model_settings
50
54
  from letta.system import package_function_response
51
- from letta.tracing import log_event, trace_method, tracer
55
+ from letta.types import JsonDict
52
56
  from letta.utils import log_telemetry, validate_function_response
53
57
 
54
58
  logger = get_logger(__name__)
@@ -118,6 +122,7 @@ class LettaAgent(BaseAgent):
118
122
  max_steps: int = 10,
119
123
  use_assistant_message: bool = True,
120
124
  request_start_timestamp_ns: Optional[int] = None,
125
+ include_return_message_types: Optional[List[MessageType]] = None,
121
126
  ) -> LettaResponse:
122
127
  agent_state = await self.agent_manager.get_agent_by_id_async(
123
128
  agent_id=self.agent_id, include_relationships=["tools", "memory", "tool_exec_environment_variables"], actor=self.actor
@@ -129,7 +134,10 @@ class LettaAgent(BaseAgent):
129
134
  request_start_timestamp_ns=request_start_timestamp_ns,
130
135
  )
131
136
  return _create_letta_response(
132
- new_in_context_messages=new_in_context_messages, use_assistant_message=use_assistant_message, usage=usage
137
+ new_in_context_messages=new_in_context_messages,
138
+ use_assistant_message=use_assistant_message,
139
+ usage=usage,
140
+ include_return_message_types=include_return_message_types,
133
141
  )
134
142
 
135
143
  @trace_method
@@ -139,6 +147,7 @@ class LettaAgent(BaseAgent):
139
147
  max_steps: int = 10,
140
148
  use_assistant_message: bool = True,
141
149
  request_start_timestamp_ns: Optional[int] = None,
150
+ include_return_message_types: Optional[List[MessageType]] = None,
142
151
  ):
143
152
  agent_state = await self.agent_manager.get_agent_by_id_async(
144
153
  agent_id=self.agent_id, include_relationships=["tools", "memory", "tool_exec_environment_variables"], actor=self.actor
@@ -178,7 +187,7 @@ class LettaAgent(BaseAgent):
178
187
  # log llm request time
179
188
  now = get_utc_timestamp_ns()
180
189
  llm_request_ns = now - step_start
181
- agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ns // 1_000_000})
190
+ agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": ns_to_ms(llm_request_ns)})
182
191
 
183
192
  response = llm_client.convert_response_to_chat_completion(response_data, in_context_messages, agent_state.llm_config)
184
193
 
@@ -210,7 +219,7 @@ class LettaAgent(BaseAgent):
210
219
  # log LLM request time
211
220
  now = get_utc_timestamp_ns()
212
221
  llm_request_ns = now - step_start
213
- agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ns // 1_000_000})
222
+ agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": ns_to_ms(llm_request_ns)})
214
223
 
215
224
  persisted_messages, should_continue = await self._handle_ai_response(
216
225
  tool_call,
@@ -227,7 +236,7 @@ class LettaAgent(BaseAgent):
227
236
  # log step time
228
237
  now = get_utc_timestamp_ns()
229
238
  step_ns = now - step_start
230
- agent_step_span.add_event(name="step_ms", attributes={"duration_ms": step_ns // 1_000_000})
239
+ agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
231
240
  agent_step_span.end()
232
241
 
233
242
  # Log LLM Trace
@@ -247,8 +256,12 @@ class LettaAgent(BaseAgent):
247
256
  letta_messages = Message.to_letta_messages_from_list(
248
257
  filter_user_messages, use_assistant_message=use_assistant_message, reverse=False
249
258
  )
259
+
250
260
  for message in letta_messages:
251
- yield f"data: {message.model_dump_json()}\n\n"
261
+ if not include_return_message_types:
262
+ yield f"data: {message.model_dump_json()}\n\n"
263
+ elif include_return_message_types and message.message_type in include_return_message_types:
264
+ yield f"data: {message.model_dump_json()}\n\n"
252
265
 
253
266
  if not should_continue:
254
267
  break
@@ -267,7 +280,7 @@ class LettaAgent(BaseAgent):
267
280
  if request_start_timestamp_ns:
268
281
  now = get_utc_timestamp_ns()
269
282
  request_ns = now - request_start_timestamp_ns
270
- request_span.add_event(name="letta_request_ms", attributes={"duration_ms": request_ns // 1_000_000})
283
+ request_span.add_event(name="letta_request_ms", attributes={"duration_ms": ns_to_ms(request_ns)})
271
284
  request_span.end()
272
285
 
273
286
  # Return back usage
@@ -321,7 +334,7 @@ class LettaAgent(BaseAgent):
321
334
  # log LLM request time
322
335
  now = get_utc_timestamp_ns()
323
336
  llm_request_ns = now - step_start
324
- agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ns // 1_000_000})
337
+ agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": ns_to_ms(llm_request_ns)})
325
338
 
326
339
  # TODO: add run_id
327
340
  usage.step_count += 1
@@ -363,7 +376,7 @@ class LettaAgent(BaseAgent):
363
376
  # log step time
364
377
  now = get_utc_timestamp_ns()
365
378
  step_ns = now - step_start
366
- agent_step_span.add_event(name="step_ms", attributes={"duration_ms": step_ns // 1_000_000})
379
+ agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
367
380
  agent_step_span.end()
368
381
 
369
382
  # Log LLM Trace
@@ -384,7 +397,7 @@ class LettaAgent(BaseAgent):
384
397
  if request_start_timestamp_ns:
385
398
  now = get_utc_timestamp_ns()
386
399
  request_ns = now - request_start_timestamp_ns
387
- request_span.add_event(name="request_ms", attributes={"duration_ms": request_ns // 1_000_000})
400
+ request_span.add_event(name="request_ms", attributes={"duration_ms": ns_to_ms(request_ns)})
388
401
  request_span.end()
389
402
 
390
403
  # Extend the in context message ids
@@ -406,6 +419,7 @@ class LettaAgent(BaseAgent):
406
419
  max_steps: int = 10,
407
420
  use_assistant_message: bool = True,
408
421
  request_start_timestamp_ns: Optional[int] = None,
422
+ include_return_message_types: Optional[List[MessageType]] = None,
409
423
  ) -> AsyncGenerator[str, None]:
410
424
  """
411
425
  Carries out an invocation of the agent loop in a streaming fashion that yields partial tokens.
@@ -480,16 +494,24 @@ class LettaAgent(BaseAgent):
480
494
  if first_chunk and request_span is not None:
481
495
  now = get_utc_timestamp_ns()
482
496
  ttft_ns = now - request_start_timestamp_ns
483
- request_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ttft_ns // 1_000_000})
497
+ request_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ns_to_ms(ttft_ns)})
484
498
  first_chunk = False
485
499
 
486
- yield f"data: {chunk.model_dump_json()}\n\n"
500
+ if include_return_message_types is None:
501
+ # return all data
502
+ yield f"data: {chunk.model_dump_json()}\n\n"
503
+ elif include_return_message_types and chunk.message_type in include_return_message_types:
504
+ # filter down returned data
505
+ yield f"data: {chunk.model_dump_json()}\n\n"
487
506
 
488
507
  # update usage
489
508
  usage.step_count += 1
490
509
  usage.completion_tokens += interface.output_tokens
491
510
  usage.prompt_tokens += interface.input_tokens
492
511
  usage.total_tokens += interface.input_tokens + interface.output_tokens
512
+ MetricRegistry().message_output_tokens.record(
513
+ interface.output_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
514
+ )
493
515
 
494
516
  # Persist input messages if not already
495
517
  # Special strategy to lower TTFT
@@ -500,7 +522,7 @@ class LettaAgent(BaseAgent):
500
522
  # log LLM request time
501
523
  now = get_utc_timestamp_ns()
502
524
  llm_request_ns = now - step_start
503
- agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ns // 1_000_000})
525
+ agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": ns_to_ms(llm_request_ns)})
504
526
 
505
527
  # Process resulting stream content
506
528
  tool_call = interface.get_tool_call_object()
@@ -515,8 +537,7 @@ class LettaAgent(BaseAgent):
515
537
  total_tokens=interface.input_tokens + interface.output_tokens,
516
538
  ),
517
539
  reasoning_content=reasoning_content,
518
- pre_computed_assistant_message_id=interface.letta_assistant_message_id,
519
- pre_computed_tool_message_id=interface.letta_tool_message_id,
540
+ pre_computed_assistant_message_id=interface.letta_message_id,
520
541
  step_id=step_id,
521
542
  agent_step_span=agent_step_span,
522
543
  )
@@ -526,7 +547,7 @@ class LettaAgent(BaseAgent):
526
547
  # log total step time
527
548
  now = get_utc_timestamp_ns()
528
549
  step_ns = now - step_start
529
- agent_step_span.add_event(name="step_ms", attributes={"duration_ms": step_ns // 1_000_000})
550
+ agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
530
551
  agent_step_span.end()
531
552
 
532
553
  # TODO (cliandy): the stream POST request span has ended at this point, we should tie this to the stream
@@ -556,9 +577,11 @@ class LettaAgent(BaseAgent):
556
577
  ),
557
578
  )
558
579
 
559
- if not use_assistant_message or should_continue:
560
- tool_return = [msg for msg in persisted_messages if msg.role == "tool"][-1].to_letta_messages()[0]
561
- yield f"data: {tool_return.model_dump_json()}\n\n"
580
+ tool_return = [msg for msg in persisted_messages if msg.role == "tool"][-1].to_letta_messages()[0]
581
+ if not (use_assistant_message and tool_return.name == "send_message"):
582
+ # Apply message type filtering if specified
583
+ if include_return_message_types is None or tool_return.message_type in include_return_message_types:
584
+ yield f"data: {tool_return.model_dump_json()}\n\n"
562
585
 
563
586
  if not should_continue:
564
587
  break
@@ -577,7 +600,7 @@ class LettaAgent(BaseAgent):
577
600
  if request_start_timestamp_ns:
578
601
  now = get_utc_timestamp_ns()
579
602
  request_ns = now - request_start_timestamp_ns
580
- request_span.add_event(name="letta_request_ms", attributes={"duration_ms": request_ns // 1_000_000})
603
+ request_span.add_event(name="letta_request_ms", attributes={"duration_ms": ns_to_ms(request_ns)})
581
604
  request_span.end()
582
605
 
583
606
  # TODO: Also yield out a letta usage stats SSE
@@ -604,10 +627,16 @@ class LettaAgent(BaseAgent):
604
627
  )
605
628
  log_event("agent.stream_no_tokens.llm_request.created")
606
629
 
630
+ async with AsyncTimer() as timer:
631
+ response = await llm_client.request_async(request_data, agent_state.llm_config)
632
+ MetricRegistry().llm_execution_time_ms_histogram.record(
633
+ timer.elapsed_ms,
634
+ dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model}),
635
+ )
607
636
  # Attempt LLM request
608
637
  return (
609
638
  request_data,
610
- await llm_client.request_async(request_data, agent_state.llm_config),
639
+ response,
611
640
  current_in_context_messages,
612
641
  new_in_context_messages,
613
642
  )
@@ -654,9 +683,7 @@ class LettaAgent(BaseAgent):
654
683
  if first_chunk and ttft_span is not None:
655
684
  provider_request_start_timestamp_ns = get_utc_timestamp_ns()
656
685
  provider_req_start_ns = provider_request_start_timestamp_ns - request_start_timestamp_ns
657
- ttft_span.add_event(
658
- name="provider_req_start_ns", attributes={"provider_req_start_ms": provider_req_start_ns // 1_000_000}
659
- )
686
+ ttft_span.add_event(name="provider_req_start_ns", attributes={"provider_req_start_ms": ns_to_ms(provider_req_start_ns)})
660
687
 
661
688
  # Attempt LLM request
662
689
  return (
@@ -692,7 +719,7 @@ class LettaAgent(BaseAgent):
692
719
  llm_config: LLMConfig,
693
720
  force: bool,
694
721
  ) -> List[Message]:
695
- if isinstance(e, LLMContextWindowExceededError):
722
+ if isinstance(e, ContextWindowExceededError):
696
723
  return await self._rebuild_context_window(
697
724
  in_context_messages=in_context_messages, new_letta_messages=new_letta_messages, llm_config=llm_config, force=force
698
725
  )
@@ -754,7 +781,7 @@ class LettaAgent(BaseAgent):
754
781
  else asyncio.sleep(0, result=self.num_messages)
755
782
  ),
756
783
  (
757
- self.passage_manager.size_async(actor=self.actor, agent_id=agent_state.id)
784
+ self.passage_manager.agent_passage_size_async(actor=self.actor, agent_id=agent_state.id)
758
785
  if self.num_archival_memories is None
759
786
  else asyncio.sleep(0, result=self.num_archival_memories)
760
787
  ),
@@ -775,6 +802,7 @@ class LettaAgent(BaseAgent):
775
802
  ToolType.LETTA_SLEEPTIME_CORE,
776
803
  ToolType.LETTA_VOICE_SLEEPTIME_CORE,
777
804
  ToolType.LETTA_BUILTIN,
805
+ ToolType.LETTA_FILES_CORE,
778
806
  ToolType.EXTERNAL_COMPOSIO,
779
807
  ToolType.EXTERNAL_MCP,
780
808
  }
@@ -810,7 +838,6 @@ class LettaAgent(BaseAgent):
810
838
  usage: UsageStatistics,
811
839
  reasoning_content: Optional[List[Union[TextContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent]]] = None,
812
840
  pre_computed_assistant_message_id: Optional[str] = None,
813
- pre_computed_tool_message_id: Optional[str] = None,
814
841
  step_id: str | None = None,
815
842
  new_in_context_messages: Optional[List[Message]] = None,
816
843
  agent_step_span: Optional["Span"] = None,
@@ -822,6 +849,9 @@ class LettaAgent(BaseAgent):
822
849
  """
823
850
  tool_call_name = tool_call.function.name
824
851
  tool_call_args_str = tool_call.function.arguments
852
+ # Temp hack to gracefully handle parallel tool calling attempt, only take first one
853
+ if "}{" in tool_call_args_str:
854
+ tool_call_args_str = tool_call_args_str.split("}{", 1)[0] + "}"
825
855
 
826
856
  try:
827
857
  tool_args = json.loads(tool_call_args_str)
@@ -859,6 +889,7 @@ class LettaAgent(BaseAgent):
859
889
  tool_args=tool_args,
860
890
  agent_state=agent_state,
861
891
  agent_step_span=agent_step_span,
892
+ step_id=step_id,
862
893
  )
863
894
  log_telemetry(
864
895
  self.logger, "_handle_ai_response execute tool finish", tool_execution_result=tool_execution_result, tool_call_id=tool_call_id
@@ -926,7 +957,6 @@ class LettaAgent(BaseAgent):
926
957
  add_heartbeat_request_system_message=continue_stepping,
927
958
  reasoning_content=reasoning_content,
928
959
  pre_computed_assistant_message_id=pre_computed_assistant_message_id,
929
- pre_computed_tool_message_id=pre_computed_tool_message_id,
930
960
  step_id=logged_step.id if logged_step else None, # TODO (cliandy): eventually move over other agent loops
931
961
  )
932
962
 
@@ -937,10 +967,15 @@ class LettaAgent(BaseAgent):
937
967
 
938
968
  @trace_method
939
969
  async def _execute_tool(
940
- self, tool_name: str, tool_args: dict, agent_state: AgentState, agent_step_span: Optional["Span"] = None
970
+ self,
971
+ tool_name: str,
972
+ tool_args: JsonDict,
973
+ agent_state: AgentState,
974
+ agent_step_span: Optional["Span"] = None,
975
+ step_id: str | None = None,
941
976
  ) -> "ToolExecutionResult":
942
977
  """
943
- Executes a tool and returns (result, success_flag).
978
+ Executes a tool and returns the ToolExecutionResult.
944
979
  """
945
980
  from letta.schemas.tool_execution_result import ToolExecutionResult
946
981
 
@@ -972,7 +1007,10 @@ class LettaAgent(BaseAgent):
972
1007
  # TODO: Integrate sandbox result
973
1008
  log_event(name=f"start_{tool_name}_execution", attributes=tool_args)
974
1009
  tool_execution_result = await tool_execution_manager.execute_tool_async(
975
- function_name=tool_name, function_args=tool_args, tool=target_tool
1010
+ function_name=tool_name,
1011
+ function_args=tool_args,
1012
+ tool=target_tool,
1013
+ step_id=step_id,
976
1014
  )
977
1015
  if agent_step_span:
978
1016
  end_time = get_utc_timestamp_ns()
@@ -980,7 +1018,7 @@ class LettaAgent(BaseAgent):
980
1018
  name="tool_execution_completed",
981
1019
  attributes={
982
1020
  "tool_name": target_tool.name,
983
- "duration_ms": (end_time - start_time) // 1_000_000,
1021
+ "duration_ms": ns_to_ms((end_time - start_time)),
984
1022
  "success": tool_execution_result.success_flag,
985
1023
  "tool_type": target_tool.tool_type,
986
1024
  "tool_id": target_tool.id,
@@ -16,6 +16,7 @@ from letta.llm_api.llm_client import LLMClient
16
16
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG
17
17
  from letta.log import get_logger
18
18
  from letta.orm.enums import ToolType
19
+ from letta.otel.tracing import log_event, trace_method
19
20
  from letta.schemas.agent import AgentState, AgentStepState
20
21
  from letta.schemas.enums import AgentStepStatus, JobStatus, MessageStreamStatus, ProviderType
21
22
  from letta.schemas.job import JobUpdate
@@ -39,7 +40,6 @@ from letta.services.passage_manager import PassageManager
39
40
  from letta.services.sandbox_config_manager import SandboxConfigManager
40
41
  from letta.services.tool_executor.tool_execution_manager import ToolExecutionManager
41
42
  from letta.settings import tool_settings
42
- from letta.tracing import log_event, trace_method
43
43
 
44
44
  logger = get_logger(__name__)
45
45
 
@@ -551,7 +551,6 @@ class LettaAgentBatch(BaseAgent):
551
551
  add_heartbeat_request_system_message=False,
552
552
  reasoning_content=reasoning_content,
553
553
  pre_computed_assistant_message_id=None,
554
- pre_computed_tool_message_id=None,
555
554
  llm_batch_item_id=llm_batch_item_id,
556
555
  )
557
556
 
@@ -1,3 +1,4 @@
1
+ import asyncio
1
2
  import json
2
3
  import uuid
3
4
  from datetime import datetime, timedelta, timezone
@@ -81,8 +82,8 @@ class VoiceAgent(BaseAgent):
81
82
  self.summary_block_label = "human"
82
83
 
83
84
  # Cached archival memory/message size
84
- self.num_messages = self.message_manager.size(actor=self.actor, agent_id=agent_id)
85
- self.num_archival_memories = self.passage_manager.size(actor=self.actor, agent_id=agent_id)
85
+ self.num_messages = None
86
+ self.num_archival_memories = None
86
87
 
87
88
  def init_summarizer(self, agent_state: AgentState) -> Summarizer:
88
89
  if not agent_state.multi_agent_group:
@@ -118,13 +119,12 @@ class VoiceAgent(BaseAgent):
118
119
  Main streaming loop that yields partial tokens.
119
120
  Whenever we detect a tool call, we yield from _handle_ai_response as well.
120
121
  """
121
- print("CALL STREAM")
122
122
  if len(input_messages) != 1 or input_messages[0].role != MessageRole.user:
123
123
  raise ValueError(f"Voice Agent was invoked with multiple input messages or message did not have role `user`: {input_messages}")
124
124
 
125
125
  user_query = input_messages[0].content[0].text
126
126
 
127
- agent_state = self.agent_manager.get_agent_by_id(self.agent_id, actor=self.actor)
127
+ agent_state = await self.agent_manager.get_agent_by_id_async(self.agent_id, actor=self.actor)
128
128
 
129
129
  # TODO: Refactor this so it uses our in-house clients
130
130
  # TODO: For now, piggyback off of OpenAI client for ease
@@ -140,7 +140,7 @@ class VoiceAgent(BaseAgent):
140
140
 
141
141
  summarizer = self.init_summarizer(agent_state=agent_state)
142
142
 
143
- in_context_messages = self.message_manager.get_messages_by_ids(message_ids=agent_state.message_ids, actor=self.actor)
143
+ in_context_messages = await self.message_manager.get_messages_by_ids_async(message_ids=agent_state.message_ids, actor=self.actor)
144
144
  memory_edit_timestamp = get_utc_time()
145
145
  in_context_messages[0].content[0].text = compile_system_message(
146
146
  system_prompt=agent_state.system,
@@ -183,10 +183,6 @@ class VoiceAgent(BaseAgent):
183
183
  # Rebuild context window if desired
184
184
  await self._rebuild_context_window(summarizer, in_context_messages, letta_message_db_queue)
185
185
 
186
- # TODO: This may be out of sync, if in between steps users add files
187
- self.num_messages = self.message_manager.size(actor=self.actor, agent_id=agent_state.id)
188
- self.num_archival_memories = self.passage_manager.size(actor=self.actor, agent_id=agent_state.id)
189
-
190
186
  yield "data: [DONE]\n\n"
191
187
 
192
188
  async def _handle_ai_response(
@@ -286,14 +282,14 @@ class VoiceAgent(BaseAgent):
286
282
  async def _rebuild_context_window(
287
283
  self, summarizer: Summarizer, in_context_messages: List[Message], letta_message_db_queue: List[Message]
288
284
  ) -> None:
289
- new_letta_messages = self.message_manager.create_many_messages(letta_message_db_queue, actor=self.actor)
285
+ new_letta_messages = await self.message_manager.create_many_messages_async(letta_message_db_queue, actor=self.actor)
290
286
 
291
287
  # TODO: Make this more general and configurable, less brittle
292
288
  new_in_context_messages, updated = summarizer.summarize(
293
289
  in_context_messages=in_context_messages, new_letta_messages=new_letta_messages
294
290
  )
295
291
 
296
- self.agent_manager.set_in_context_messages(
292
+ await self.agent_manager.set_in_context_messages_async(
297
293
  agent_id=self.agent_id, message_ids=[m.id for m in new_in_context_messages], actor=self.actor
298
294
  )
299
295
 
@@ -301,9 +297,19 @@ class VoiceAgent(BaseAgent):
301
297
  self,
302
298
  in_context_messages: List[Message],
303
299
  agent_state: AgentState,
304
- num_messages: int | None = None,
305
- num_archival_memories: int | None = None,
306
300
  ) -> List[Message]:
301
+ self.num_messages, self.num_archival_memories = await asyncio.gather(
302
+ (
303
+ self.message_manager.size_async(actor=self.actor, agent_id=agent_state.id)
304
+ if self.num_messages is None
305
+ else asyncio.sleep(0, result=self.num_messages)
306
+ ),
307
+ (
308
+ self.passage_manager.agent_passage_size_async(actor=self.actor, agent_id=agent_state.id)
309
+ if self.num_archival_memories is None
310
+ else asyncio.sleep(0, result=self.num_archival_memories)
311
+ ),
312
+ )
307
313
  return await super()._rebuild_memory_async(
308
314
  in_context_messages, agent_state, num_messages=self.num_messages, num_archival_memories=self.num_archival_memories
309
315
  )