letta-nightly 0.8.4.dev20250615104252__py3-none-any.whl → 0.8.4.dev20250615221417__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. letta/__init__.py +1 -0
  2. letta/agents/base_agent.py +12 -1
  3. letta/agents/helpers.py +5 -2
  4. letta/agents/letta_agent.py +98 -61
  5. letta/agents/voice_sleeptime_agent.py +2 -1
  6. letta/constants.py +3 -5
  7. letta/data_sources/redis_client.py +30 -10
  8. letta/functions/function_sets/files.py +4 -4
  9. letta/functions/helpers.py +6 -1
  10. letta/functions/mcp_client/types.py +95 -0
  11. letta/groups/sleeptime_multi_agent_v2.py +2 -1
  12. letta/helpers/decorators.py +91 -0
  13. letta/interfaces/anthropic_streaming_interface.py +11 -0
  14. letta/interfaces/openai_streaming_interface.py +244 -225
  15. letta/llm_api/openai_client.py +1 -1
  16. letta/local_llm/utils.py +5 -1
  17. letta/orm/enums.py +1 -0
  18. letta/orm/mcp_server.py +3 -0
  19. letta/orm/tool.py +3 -0
  20. letta/otel/metric_registry.py +12 -0
  21. letta/otel/metrics.py +16 -7
  22. letta/schemas/letta_response.py +6 -1
  23. letta/schemas/letta_stop_reason.py +22 -0
  24. letta/schemas/mcp.py +48 -6
  25. letta/schemas/openai/chat_completion_request.py +1 -1
  26. letta/schemas/openai/chat_completion_response.py +1 -1
  27. letta/schemas/pip_requirement.py +14 -0
  28. letta/schemas/sandbox_config.py +1 -19
  29. letta/schemas/tool.py +5 -0
  30. letta/server/rest_api/json_parser.py +39 -3
  31. letta/server/rest_api/routers/v1/tools.py +3 -1
  32. letta/server/rest_api/routers/v1/voice.py +2 -3
  33. letta/server/rest_api/utils.py +1 -1
  34. letta/server/server.py +11 -2
  35. letta/services/agent_manager.py +37 -29
  36. letta/services/helpers/tool_execution_helper.py +39 -9
  37. letta/services/mcp/base_client.py +13 -2
  38. letta/services/mcp/sse_client.py +8 -1
  39. letta/services/mcp/streamable_http_client.py +56 -0
  40. letta/services/mcp_manager.py +23 -9
  41. letta/services/message_manager.py +30 -3
  42. letta/services/tool_executor/files_tool_executor.py +2 -3
  43. letta/services/tool_sandbox/e2b_sandbox.py +53 -3
  44. letta/services/tool_sandbox/local_sandbox.py +3 -1
  45. letta/services/user_manager.py +22 -0
  46. letta/settings.py +3 -0
  47. {letta_nightly-0.8.4.dev20250615104252.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/METADATA +5 -6
  48. {letta_nightly-0.8.4.dev20250615104252.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/RECORD +51 -48
  49. {letta_nightly-0.8.4.dev20250615104252.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/LICENSE +0 -0
  50. {letta_nightly-0.8.4.dev20250615104252.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/WHEEL +0 -0
  51. {letta_nightly-0.8.4.dev20250615104252.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/entry_points.txt +0 -0
letta/__init__.py CHANGED
@@ -17,6 +17,7 @@ from letta.schemas.enums import JobStatus
17
17
  from letta.schemas.file import FileMetadata
18
18
  from letta.schemas.job import Job
19
19
  from letta.schemas.letta_message import LettaMessage
20
+ from letta.schemas.letta_stop_reason import LettaStopReason
20
21
  from letta.schemas.llm_config import LLMConfig
21
22
  from letta.schemas.memory import ArchivalMemorySummary, BasicBlockMemory, ChatMemory, Memory, RecallMemorySummary
22
23
  from letta.schemas.message import Message
@@ -12,7 +12,9 @@ from letta.schemas.enums import MessageStreamStatus
12
12
  from letta.schemas.letta_message import LegacyLettaMessage, LettaMessage
13
13
  from letta.schemas.letta_message_content import TextContent
14
14
  from letta.schemas.letta_response import LettaResponse
15
+ from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
15
16
  from letta.schemas.message import Message, MessageCreate, MessageUpdate
17
+ from letta.schemas.usage import LettaUsageStatistics
16
18
  from letta.schemas.user import User
17
19
  from letta.services.agent_manager import AgentManager
18
20
  from letta.services.helpers.agent_manager_helper import compile_system_message
@@ -116,7 +118,7 @@ class BaseAgent(ABC):
116
118
  system_prompt=agent_state.system,
117
119
  in_context_memory=agent_state.memory,
118
120
  in_context_memory_last_edit=memory_edit_timestamp,
119
- previous_message_count=num_messages,
121
+ previous_message_count=num_messages - len(in_context_messages),
120
122
  archival_memory_size=num_archival_memories,
121
123
  tool_rules_solver=tool_rules_solver,
122
124
  )
@@ -136,3 +138,12 @@ class BaseAgent(ABC):
136
138
  except:
137
139
  logger.exception(f"Failed to rebuild memory for agent id={agent_state.id} and actor=({self.actor.id}, {self.actor.name})")
138
140
  raise
141
+
142
+ def get_finish_chunks_for_stream(self, usage: LettaUsageStatistics, stop_reason: Optional[LettaStopReason] = None):
143
+ if stop_reason is None:
144
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
145
+ return [
146
+ stop_reason.model_dump_json(),
147
+ usage.model_dump_json(),
148
+ MessageStreamStatus.done.value,
149
+ ]
letta/agents/helpers.py CHANGED
@@ -5,6 +5,7 @@ from typing import List, Optional, Tuple
5
5
  from letta.schemas.agent import AgentState
6
6
  from letta.schemas.letta_message import MessageType
7
7
  from letta.schemas.letta_response import LettaResponse
8
+ from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
8
9
  from letta.schemas.message import Message, MessageCreate
9
10
  from letta.schemas.usage import LettaUsageStatistics
10
11
  from letta.schemas.user import User
@@ -16,6 +17,7 @@ def _create_letta_response(
16
17
  new_in_context_messages: list[Message],
17
18
  use_assistant_message: bool,
18
19
  usage: LettaUsageStatistics,
20
+ stop_reason: Optional[LettaStopReason] = None,
19
21
  include_return_message_types: Optional[List[MessageType]] = None,
20
22
  ) -> LettaResponse:
21
23
  """
@@ -32,8 +34,9 @@ def _create_letta_response(
32
34
  # Apply message type filtering if specified
33
35
  if include_return_message_types is not None:
34
36
  response_messages = [msg for msg in response_messages if msg.message_type in include_return_message_types]
35
-
36
- return LettaResponse(messages=response_messages, usage=usage)
37
+ if stop_reason is None:
38
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
39
+ return LettaResponse(messages=response_messages, stop_reason=stop_reason, usage=usage)
37
40
 
38
41
 
39
42
  def _prepare_in_context_messages(
@@ -5,6 +5,7 @@ from typing import AsyncGenerator, Dict, List, Optional, Tuple, Union
5
5
 
6
6
  from openai import AsyncStream
7
7
  from openai.types.chat import ChatCompletionChunk
8
+ from opentelemetry.trace import Span
8
9
 
9
10
  from letta.agents.base_agent import BaseAgent
10
11
  from letta.agents.ephemeral_summary_agent import EphemeralSummaryAgent
@@ -25,10 +26,11 @@ from letta.otel.context import get_ctx_attributes
25
26
  from letta.otel.metric_registry import MetricRegistry
26
27
  from letta.otel.tracing import log_event, trace_method, tracer
27
28
  from letta.schemas.agent import AgentState
28
- from letta.schemas.enums import MessageRole, MessageStreamStatus
29
+ from letta.schemas.enums import MessageRole
29
30
  from letta.schemas.letta_message import MessageType
30
31
  from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
31
32
  from letta.schemas.letta_response import LettaResponse
33
+ from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
32
34
  from letta.schemas.llm_config import LLMConfig
33
35
  from letta.schemas.message import Message, MessageCreate
34
36
  from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
@@ -124,7 +126,7 @@ class LettaAgent(BaseAgent):
124
126
  agent_state = await self.agent_manager.get_agent_by_id_async(
125
127
  agent_id=self.agent_id, include_relationships=["tools", "memory", "tool_exec_environment_variables"], actor=self.actor
126
128
  )
127
- _, new_in_context_messages, usage = await self._step(
129
+ _, new_in_context_messages, usage, stop_reason = await self._step(
128
130
  agent_state=agent_state,
129
131
  input_messages=input_messages,
130
132
  max_steps=max_steps,
@@ -133,6 +135,7 @@ class LettaAgent(BaseAgent):
133
135
  return _create_letta_response(
134
136
  new_in_context_messages=new_in_context_messages,
135
137
  use_assistant_message=use_assistant_message,
138
+ stop_reason=stop_reason,
136
139
  usage=usage,
137
140
  include_return_message_types=include_return_message_types,
138
141
  )
@@ -159,6 +162,7 @@ class LettaAgent(BaseAgent):
159
162
  put_inner_thoughts_first=True,
160
163
  actor=self.actor,
161
164
  )
165
+ stop_reason = None
162
166
  usage = LettaUsageStatistics()
163
167
 
164
168
  # span for request
@@ -178,17 +182,13 @@ class LettaAgent(BaseAgent):
178
182
  agent_state,
179
183
  llm_client,
180
184
  tool_rules_solver,
185
+ agent_step_span,
181
186
  )
182
187
  )
183
188
  in_context_messages = current_in_context_messages + new_in_context_messages
184
189
 
185
190
  log_event("agent.stream_no_tokens.llm_response.received") # [3^]
186
191
 
187
- # log llm request time
188
- now = get_utc_timestamp_ns()
189
- llm_request_ns = now - step_start
190
- agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": ns_to_ms(llm_request_ns)})
191
-
192
192
  response = llm_client.convert_response_to_chat_completion(response_data, in_context_messages, agent_state.llm_config)
193
193
 
194
194
  # update usage
@@ -197,6 +197,9 @@ class LettaAgent(BaseAgent):
197
197
  usage.completion_tokens += response.usage.completion_tokens
198
198
  usage.prompt_tokens += response.usage.prompt_tokens
199
199
  usage.total_tokens += response.usage.total_tokens
200
+ MetricRegistry().message_output_tokens.record(
201
+ response.usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
202
+ )
200
203
 
201
204
  if not response.choices[0].message.tool_calls:
202
205
  # TODO: make into a real error
@@ -210,18 +213,15 @@ class LettaAgent(BaseAgent):
210
213
  signature=response.choices[0].message.reasoning_content_signature,
211
214
  )
212
215
  ]
216
+ elif response.choices[0].message.omitted_reasoning_content:
217
+ reasoning = [OmittedReasoningContent()]
213
218
  elif response.choices[0].message.content:
214
219
  reasoning = [TextContent(text=response.choices[0].message.content)] # reasoning placed into content for legacy reasons
215
220
  else:
216
221
  logger.info("No reasoning content found.")
217
222
  reasoning = None
218
223
 
219
- # log LLM request time
220
- now = get_utc_timestamp_ns()
221
- llm_request_ns = now - step_start
222
- agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": ns_to_ms(llm_request_ns)})
223
-
224
- persisted_messages, should_continue = await self._handle_ai_response(
224
+ persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
225
225
  tool_call,
226
226
  valid_tool_names,
227
227
  agent_state,
@@ -262,11 +262,11 @@ class LettaAgent(BaseAgent):
262
262
  )
263
263
 
264
264
  for message in letta_messages:
265
- if not include_return_message_types:
266
- yield f"data: {message.model_dump_json()}\n\n"
267
- elif include_return_message_types and message.message_type in include_return_message_types:
265
+ if include_return_message_types is None or message.message_type in include_return_message_types:
268
266
  yield f"data: {message.model_dump_json()}\n\n"
269
267
 
268
+ MetricRegistry().step_execution_time_ms_histogram.record(step_start - get_utc_timestamp_ns(), get_ctx_attributes())
269
+
270
270
  if not should_continue:
271
271
  break
272
272
 
@@ -288,8 +288,8 @@ class LettaAgent(BaseAgent):
288
288
  request_span.end()
289
289
 
290
290
  # Return back usage
291
- yield f"data: {usage.model_dump_json()}\n\n"
292
- yield f"data: {MessageStreamStatus.done.model_dump_json()}\n\n"
291
+ for finish_chunk in self.get_finish_chunks_for_stream(usage, stop_reason):
292
+ yield f"data: {finish_chunk}\n\n"
293
293
 
294
294
  async def _step(
295
295
  self,
@@ -297,7 +297,7 @@ class LettaAgent(BaseAgent):
297
297
  input_messages: List[MessageCreate],
298
298
  max_steps: int = DEFAULT_MAX_STEPS,
299
299
  request_start_timestamp_ns: Optional[int] = None,
300
- ) -> Tuple[List[Message], List[Message], LettaUsageStatistics]:
300
+ ) -> Tuple[List[Message], List[Message], Optional[LettaStopReason], LettaUsageStatistics]:
301
301
  """
302
302
  Carries out an invocation of the agent loop. In each step, the agent
303
303
  1. Rebuilds its memory
@@ -320,6 +320,7 @@ class LettaAgent(BaseAgent):
320
320
  request_span = tracer.start_span("time_to_first_token")
321
321
  request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
322
322
 
323
+ stop_reason = None
323
324
  usage = LettaUsageStatistics()
324
325
  for i in range(max_steps):
325
326
  step_id = generate_step_id()
@@ -329,7 +330,7 @@ class LettaAgent(BaseAgent):
329
330
 
330
331
  request_data, response_data, current_in_context_messages, new_in_context_messages, valid_tool_names = (
331
332
  await self._build_and_request_from_llm(
332
- current_in_context_messages, new_in_context_messages, agent_state, llm_client, tool_rules_solver
333
+ current_in_context_messages, new_in_context_messages, agent_state, llm_client, tool_rules_solver, agent_step_span
333
334
  )
334
335
  )
335
336
  in_context_messages = current_in_context_messages + new_in_context_messages
@@ -338,16 +339,14 @@ class LettaAgent(BaseAgent):
338
339
 
339
340
  response = llm_client.convert_response_to_chat_completion(response_data, in_context_messages, agent_state.llm_config)
340
341
 
341
- # log LLM request time
342
- now = get_utc_timestamp_ns()
343
- llm_request_ns = now - step_start
344
- agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": ns_to_ms(llm_request_ns)})
345
-
346
342
  # TODO: add run_id
347
343
  usage.step_count += 1
348
344
  usage.completion_tokens += response.usage.completion_tokens
349
345
  usage.prompt_tokens += response.usage.prompt_tokens
350
346
  usage.total_tokens += response.usage.total_tokens
347
+ MetricRegistry().message_output_tokens.record(
348
+ response.usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
349
+ )
351
350
 
352
351
  if not response.choices[0].message.tool_calls:
353
352
  # TODO: make into a real error
@@ -363,11 +362,13 @@ class LettaAgent(BaseAgent):
363
362
  ]
364
363
  elif response.choices[0].message.content:
365
364
  reasoning = [TextContent(text=response.choices[0].message.content)] # reasoning placed into content for legacy reasons
365
+ elif response.choices[0].message.omitted_reasoning_content:
366
+ reasoning = [OmittedReasoningContent()]
366
367
  else:
367
368
  logger.info("No reasoning content found.")
368
369
  reasoning = None
369
370
 
370
- persisted_messages, should_continue = await self._handle_ai_response(
371
+ persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
371
372
  tool_call,
372
373
  valid_tool_names,
373
374
  agent_state,
@@ -401,6 +402,8 @@ class LettaAgent(BaseAgent):
401
402
  ),
402
403
  )
403
404
 
405
+ MetricRegistry().step_execution_time_ms_histogram.record(step_start - get_utc_timestamp_ns(), get_ctx_attributes())
406
+
404
407
  if not should_continue:
405
408
  break
406
409
 
@@ -421,7 +424,7 @@ class LettaAgent(BaseAgent):
421
424
  force=False,
422
425
  )
423
426
 
424
- return current_in_context_messages, new_in_context_messages, usage
427
+ return current_in_context_messages, new_in_context_messages, usage, stop_reason
425
428
 
426
429
  @trace_method
427
430
  async def step_stream(
@@ -454,30 +457,35 @@ class LettaAgent(BaseAgent):
454
457
  put_inner_thoughts_first=True,
455
458
  actor=self.actor,
456
459
  )
460
+ stop_reason = None
457
461
  usage = LettaUsageStatistics()
458
462
  first_chunk, request_span = True, None
459
463
  if request_start_timestamp_ns:
460
464
  request_span = tracer.start_span("time_to_first_token", start_time=request_start_timestamp_ns)
461
465
  request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
462
466
 
463
- provider_request_start_timestamp_ns = None
464
467
  for i in range(max_steps):
465
468
  step_id = generate_step_id()
466
469
  step_start = get_utc_timestamp_ns()
467
470
  agent_step_span = tracer.start_span("agent_step", start_time=step_start)
468
471
  agent_step_span.set_attributes({"step_id": step_id})
469
472
 
470
- request_data, stream, current_in_context_messages, new_in_context_messages, valid_tool_names = (
471
- await self._build_and_request_from_llm_streaming(
472
- first_chunk,
473
- agent_step_span,
474
- request_start_timestamp_ns,
475
- current_in_context_messages,
476
- new_in_context_messages,
477
- agent_state,
478
- llm_client,
479
- tool_rules_solver,
480
- )
473
+ (
474
+ request_data,
475
+ stream,
476
+ current_in_context_messages,
477
+ new_in_context_messages,
478
+ valid_tool_names,
479
+ provider_request_start_timestamp_ns,
480
+ ) = await self._build_and_request_from_llm_streaming(
481
+ first_chunk,
482
+ agent_step_span,
483
+ request_start_timestamp_ns,
484
+ current_in_context_messages,
485
+ new_in_context_messages,
486
+ agent_state,
487
+ llm_client,
488
+ tool_rules_solver,
481
489
  )
482
490
  log_event("agent.stream.llm_response.received") # [3^]
483
491
 
@@ -504,15 +512,17 @@ class LettaAgent(BaseAgent):
504
512
  now = get_utc_timestamp_ns()
505
513
  ttft_ns = now - request_start_timestamp_ns
506
514
  request_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ns_to_ms(ttft_ns)})
515
+ metric_attributes = get_ctx_attributes()
516
+ metric_attributes["model.name"] = agent_state.llm_config.model
517
+ MetricRegistry().ttft_ms_histogram.record(ns_to_ms(ttft_ns), metric_attributes)
507
518
  first_chunk = False
508
519
 
509
- if include_return_message_types is None:
510
- # return all data
511
- yield f"data: {chunk.model_dump_json()}\n\n"
512
- elif include_return_message_types and chunk.message_type in include_return_message_types:
520
+ if include_return_message_types is None or chunk.message_type in include_return_message_types:
513
521
  # filter down returned data
514
522
  yield f"data: {chunk.model_dump_json()}\n\n"
515
523
 
524
+ stream_end_time_ns = get_utc_timestamp_ns()
525
+
516
526
  # update usage
517
527
  usage.step_count += 1
518
528
  usage.completion_tokens += interface.output_tokens
@@ -523,14 +533,26 @@ class LettaAgent(BaseAgent):
523
533
  )
524
534
 
525
535
  # log LLM request time
526
- now = get_utc_timestamp_ns()
527
- llm_request_ns = now - step_start
528
- agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": ns_to_ms(llm_request_ns)})
536
+ llm_request_ms = ns_to_ms(stream_end_time_ns - request_start_timestamp_ns)
537
+ agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ms})
538
+ MetricRegistry().llm_execution_time_ms_histogram.record(
539
+ llm_request_ms,
540
+ dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model}),
541
+ )
529
542
 
530
543
  # Process resulting stream content
531
- tool_call = interface.get_tool_call_object()
544
+ try:
545
+ tool_call = interface.get_tool_call_object()
546
+ except ValueError as e:
547
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.no_tool_call.value)
548
+ yield f"data: {stop_reason.model_dump_json()}\n\n"
549
+ raise e
550
+ except Exception as e:
551
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_tool_call.value)
552
+ yield f"data: {stop_reason.model_dump_json()}\n\n"
553
+ raise e
532
554
  reasoning_content = interface.get_reasoning_content()
533
- persisted_messages, should_continue = await self._handle_ai_response(
555
+ persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
534
556
  tool_call,
535
557
  valid_tool_names,
536
558
  agent_state,
@@ -590,6 +612,9 @@ class LettaAgent(BaseAgent):
590
612
  if include_return_message_types is None or tool_return.message_type in include_return_message_types:
591
613
  yield f"data: {tool_return.model_dump_json()}\n\n"
592
614
 
615
+ # TODO (cliandy): consolidate and expand with trace
616
+ MetricRegistry().step_execution_time_ms_histogram.record(step_start - get_utc_timestamp_ns(), get_ctx_attributes())
617
+
593
618
  if not should_continue:
594
619
  break
595
620
 
@@ -610,10 +635,10 @@ class LettaAgent(BaseAgent):
610
635
  request_span.add_event(name="letta_request_ms", attributes={"duration_ms": ns_to_ms(request_ns)})
611
636
  request_span.end()
612
637
 
613
- # TODO: Also yield out a letta usage stats SSE
614
- yield f"data: {usage.model_dump_json()}\n\n"
615
- yield f"data: {MessageStreamStatus.done.model_dump_json()}\n\n"
638
+ for finish_chunk in self.get_finish_chunks_for_stream(usage, stop_reason):
639
+ yield f"data: {finish_chunk}\n\n"
616
640
 
641
+ # noinspection PyInconsistentReturns
617
642
  async def _build_and_request_from_llm(
618
643
  self,
619
644
  current_in_context_messages: List[Message],
@@ -621,7 +646,8 @@ class LettaAgent(BaseAgent):
621
646
  agent_state: AgentState,
622
647
  llm_client: LLMClientBase,
623
648
  tool_rules_solver: ToolRulesSolver,
624
- ) -> Tuple[Dict, Dict, List[Message], List[Message], List[str]]:
649
+ agent_step_span: "Span",
650
+ ) -> Tuple[Dict, Dict, List[Message], List[Message], List[str]] | None:
625
651
  for attempt in range(self.max_summarization_retries + 1):
626
652
  try:
627
653
  log_event("agent.stream_no_tokens.messages.refreshed")
@@ -635,13 +661,15 @@ class LettaAgent(BaseAgent):
635
661
  log_event("agent.stream_no_tokens.llm_request.created")
636
662
 
637
663
  async with AsyncTimer() as timer:
664
+ # Attempt LLM request
638
665
  response = await llm_client.request_async(request_data, agent_state.llm_config)
639
666
  MetricRegistry().llm_execution_time_ms_histogram.record(
640
667
  timer.elapsed_ms,
641
668
  dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model}),
642
669
  )
643
- # Attempt LLM request
644
- return (request_data, response, current_in_context_messages, new_in_context_messages, valid_tool_names)
670
+ agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": timer.elapsed_ms})
671
+
672
+ return request_data, response, current_in_context_messages, new_in_context_messages, valid_tool_names
645
673
 
646
674
  except Exception as e:
647
675
  if attempt == self.max_summarization_retries:
@@ -659,6 +687,7 @@ class LettaAgent(BaseAgent):
659
687
  new_in_context_messages = []
660
688
  log_event(f"agent.stream_no_tokens.retry_attempt.{attempt + 1}")
661
689
 
690
+ # noinspection PyInconsistentReturns
662
691
  async def _build_and_request_from_llm_streaming(
663
692
  self,
664
693
  first_chunk: bool,
@@ -669,7 +698,7 @@ class LettaAgent(BaseAgent):
669
698
  agent_state: AgentState,
670
699
  llm_client: LLMClientBase,
671
700
  tool_rules_solver: ToolRulesSolver,
672
- ) -> Tuple[Dict, AsyncStream[ChatCompletionChunk], List[Message], List[Message], List[str]]:
701
+ ) -> Tuple[Dict, AsyncStream[ChatCompletionChunk], List[Message], List[Message], List[str], int] | None:
673
702
  for attempt in range(self.max_summarization_retries + 1):
674
703
  try:
675
704
  log_event("agent.stream_no_tokens.messages.refreshed")
@@ -682,10 +711,13 @@ class LettaAgent(BaseAgent):
682
711
  )
683
712
  log_event("agent.stream.llm_request.created") # [2^]
684
713
 
714
+ provider_request_start_timestamp_ns = get_utc_timestamp_ns()
685
715
  if first_chunk and ttft_span is not None:
686
- provider_request_start_timestamp_ns = get_utc_timestamp_ns()
687
- provider_req_start_ns = provider_request_start_timestamp_ns - request_start_timestamp_ns
688
- ttft_span.add_event(name="provider_req_start_ns", attributes={"provider_req_start_ms": ns_to_ms(provider_req_start_ns)})
716
+ request_start_to_provider_request_start_ns = provider_request_start_timestamp_ns - request_start_timestamp_ns
717
+ ttft_span.add_event(
718
+ name="request_start_to_provider_request_start_ns",
719
+ attributes={"request_start_to_provider_request_start_ns": ns_to_ms(request_start_to_provider_request_start_ns)},
720
+ )
689
721
 
690
722
  # Attempt LLM request
691
723
  return (
@@ -694,6 +726,7 @@ class LettaAgent(BaseAgent):
694
726
  current_in_context_messages,
695
727
  new_in_context_messages,
696
728
  valid_tool_names,
729
+ provider_request_start_timestamp_ns,
697
730
  )
698
731
 
699
732
  except Exception as e:
@@ -709,7 +742,7 @@ class LettaAgent(BaseAgent):
709
742
  llm_config=agent_state.llm_config,
710
743
  force=True,
711
744
  )
712
- new_in_context_messages = []
745
+ new_in_context_messages: list[Message] = []
713
746
  log_event(f"agent.stream_no_tokens.retry_attempt.{attempt + 1}")
714
747
 
715
748
  @trace_method
@@ -857,12 +890,13 @@ class LettaAgent(BaseAgent):
857
890
  initial_messages: Optional[List[Message]] = None,
858
891
  agent_step_span: Optional["Span"] = None,
859
892
  is_final_step: Optional[bool] = None,
860
- ) -> Tuple[List[Message], bool]:
893
+ ) -> Tuple[List[Message], bool, Optional[LettaStopReason]]:
861
894
  """
862
895
  Now that streaming is done, handle the final AI response.
863
896
  This might yield additional SSE tokens if we do stalling.
864
897
  At the end, set self._continue_execution accordingly.
865
898
  """
899
+ stop_reason = None
866
900
  # Check if the called tool is allowed by tool name:
867
901
  tool_call_name = tool_call.function.name
868
902
  tool_call_args_str = tool_call.function.arguments
@@ -880,6 +914,7 @@ class LettaAgent(BaseAgent):
880
914
  tool_args = json.loads(tool_args)
881
915
 
882
916
  if is_final_step:
917
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.max_steps.value)
883
918
  logger.info("Agent has reached max steps.")
884
919
  request_heartbeat = False
885
920
  else:
@@ -948,6 +983,8 @@ class LettaAgent(BaseAgent):
948
983
  continue_stepping = request_heartbeat
949
984
  tool_rules_solver.register_tool_call(tool_name=tool_call_name)
950
985
  if tool_rules_solver.is_terminal_tool(tool_name=tool_call_name):
986
+ if continue_stepping:
987
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.tool_rule.value)
951
988
  continue_stepping = False
952
989
  elif tool_rules_solver.has_children_tools(tool_name=tool_call_name):
953
990
  continue_stepping = True
@@ -994,7 +1031,7 @@ class LettaAgent(BaseAgent):
994
1031
  )
995
1032
  self.last_function_response = function_response
996
1033
 
997
- return persisted_messages, continue_stepping
1034
+ return persisted_messages, continue_stepping, stop_reason
998
1035
 
999
1036
  @trace_method
1000
1037
  async def _execute_tool(
@@ -82,7 +82,7 @@ class VoiceSleeptimeAgent(LettaAgent):
82
82
  ]
83
83
 
84
84
  # Summarize
85
- current_in_context_messages, new_in_context_messages, usage = await super()._step(
85
+ current_in_context_messages, new_in_context_messages, usage, stop_reason = await super()._step(
86
86
  agent_state=agent_state, input_messages=input_messages, max_steps=max_steps
87
87
  )
88
88
  new_in_context_messages, updated = self.summarizer.summarize(
@@ -95,6 +95,7 @@ class VoiceSleeptimeAgent(LettaAgent):
95
95
  return _create_letta_response(
96
96
  new_in_context_messages=new_in_context_messages,
97
97
  use_assistant_message=use_assistant_message,
98
+ stop_reason=stop_reason,
98
99
  usage=usage,
99
100
  include_return_message_types=include_return_message_types,
100
101
  )
letta/constants.py CHANGED
@@ -292,9 +292,6 @@ MESSAGE_SUMMARY_WARNING_STR = " ".join(
292
292
  # "Remember to pass request_heartbeat = true if you would like to send a message immediately after.",
293
293
  ]
294
294
  )
295
- DATA_SOURCE_ATTACH_ALERT = (
296
- "[ALERT] New data was just uploaded to archival memory. You can view this data by calling the archival_memory_search tool."
297
- )
298
295
 
299
296
  # Throw an error message when a read-only block is edited
300
297
  READ_ONLY_BLOCK_EDIT_ERROR = f"{ERROR_MESSAGE_PREFIX} This block is read-only and cannot be edited."
@@ -337,6 +334,7 @@ WEB_SEARCH_CLIP_CONTENT = False
337
334
  WEB_SEARCH_INCLUDE_SCORE = False
338
335
  WEB_SEARCH_SEPARATOR = "\n" + "-" * 40 + "\n"
339
336
 
340
- REDIS_INCLUDE = "INCLUDE"
341
- REDIS_EXCLUDE = "EXCLUDE"
337
+ REDIS_INCLUDE = "include"
338
+ REDIS_EXCLUDE = "exclude"
342
339
  REDIS_SET_DEFAULT_VAL = "None"
340
+ REDIS_DEFAULT_CACHE_PREFIX = "letta_cache"
@@ -2,12 +2,17 @@ import asyncio
2
2
  from functools import wraps
3
3
  from typing import Any, Optional, Set, Union
4
4
 
5
- import redis.asyncio as redis
6
- from redis import RedisError
7
-
8
5
  from letta.constants import REDIS_EXCLUDE, REDIS_INCLUDE, REDIS_SET_DEFAULT_VAL
9
6
  from letta.log import get_logger
10
7
 
8
+ try:
9
+ from redis import RedisError
10
+ from redis.asyncio import ConnectionPool, Redis
11
+ except ImportError:
12
+ RedisError = None
13
+ Redis = None
14
+ ConnectionPool = None
15
+
11
16
  logger = get_logger(__name__)
12
17
 
13
18
  _client_instance = None
@@ -44,7 +49,7 @@ class AsyncRedisClient:
44
49
  retry_on_timeout: Retry operations on timeout
45
50
  health_check_interval: Seconds between health checks
46
51
  """
47
- self.pool = redis.ConnectionPool(
52
+ self.pool = ConnectionPool(
48
53
  host=host,
49
54
  port=port,
50
55
  db=db,
@@ -59,12 +64,12 @@ class AsyncRedisClient:
59
64
  self._client = None
60
65
  self._lock = asyncio.Lock()
61
66
 
62
- async def get_client(self) -> redis.Redis:
67
+ async def get_client(self) -> Redis:
63
68
  """Get or create Redis client instance."""
64
69
  if self._client is None:
65
70
  async with self._lock:
66
71
  if self._client is None:
67
- self._client = redis.Redis(connection_pool=self.pool)
72
+ self._client = Redis(connection_pool=self.pool)
68
73
  return self._client
69
74
 
70
75
  async def close(self):
@@ -213,8 +218,8 @@ class AsyncRedisClient:
213
218
  return await client.decr(key)
214
219
 
215
220
  async def check_inclusion_and_exclusion(self, member: str, group: str) -> bool:
216
- exclude_key = f"{group}_{REDIS_EXCLUDE}"
217
- include_key = f"{group}_{REDIS_INCLUDE}"
221
+ exclude_key = self._get_group_exclusion_key(group)
222
+ include_key = self._get_group_inclusion_key(group)
218
223
  # 1. if the member IS excluded from the group
219
224
  if self.exists(exclude_key) and await self.scard(exclude_key) > 1:
220
225
  return bool(await self.smismember(exclude_key, member))
@@ -231,14 +236,29 @@ class AsyncRedisClient:
231
236
 
232
237
  @staticmethod
233
238
  def _get_group_inclusion_key(group: str) -> str:
234
- return f"{group}_{REDIS_INCLUDE}"
239
+ return f"{group}:{REDIS_INCLUDE}"
235
240
 
236
241
  @staticmethod
237
242
  def _get_group_exclusion_key(group: str) -> str:
238
- return f"{group}_{REDIS_EXCLUDE}"
243
+ return f"{group}:{REDIS_EXCLUDE}"
239
244
 
240
245
 
241
246
  class NoopAsyncRedisClient(AsyncRedisClient):
247
+ # noinspection PyMissingConstructor
248
+ def __init__(self):
249
+ pass
250
+
251
+ async def set(
252
+ self,
253
+ key: str,
254
+ value: Union[str, int, float],
255
+ ex: Optional[int] = None,
256
+ px: Optional[int] = None,
257
+ nx: bool = False,
258
+ xx: bool = False,
259
+ ) -> bool:
260
+ return False
261
+
242
262
  async def get(self, key: str, default: Any = None) -> Any:
243
263
  return default
244
264
 
@@ -7,10 +7,10 @@ if TYPE_CHECKING:
7
7
 
8
8
  async def open_file(agent_state: "AgentState", file_name: str, view_range: Optional[Tuple[int, int]]) -> str:
9
9
  """
10
- Open up a file in core memory.
10
+ Open the file with name `file_name` and load the contents into files section in core memory.
11
11
 
12
12
  Args:
13
- file_name (str): Name of the file to view.
13
+ file_name (str): Name of the file to view. Required.
14
14
  view_range (Optional[Tuple[int, int]]): Optional tuple indicating range to view.
15
15
 
16
16
  Returns:
@@ -21,7 +21,7 @@ async def open_file(agent_state: "AgentState", file_name: str, view_range: Optio
21
21
 
22
22
  async def close_file(agent_state: "AgentState", file_name: str) -> str:
23
23
  """
24
- Close a file in core memory.
24
+ Close file with name `file_name` in files section in core memory.
25
25
 
26
26
  Args:
27
27
  file_name (str): Name of the file to close.
@@ -48,7 +48,7 @@ async def grep(agent_state: "AgentState", pattern: str, include: Optional[str] =
48
48
 
49
49
  async def search_files(agent_state: "AgentState", query: str) -> List["FileMetadata"]:
50
50
  """
51
- Get list of most relevant files across all data sources.
51
+ Get list of most relevant files across all data sources using embedding search.
52
52
 
53
53
  Args:
54
54
  query (str): The search query.