letta-nightly 0.8.4.dev20250615104252__py3-none-any.whl → 0.8.4.dev20250615221417__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -0
- letta/agents/base_agent.py +12 -1
- letta/agents/helpers.py +5 -2
- letta/agents/letta_agent.py +98 -61
- letta/agents/voice_sleeptime_agent.py +2 -1
- letta/constants.py +3 -5
- letta/data_sources/redis_client.py +30 -10
- letta/functions/function_sets/files.py +4 -4
- letta/functions/helpers.py +6 -1
- letta/functions/mcp_client/types.py +95 -0
- letta/groups/sleeptime_multi_agent_v2.py +2 -1
- letta/helpers/decorators.py +91 -0
- letta/interfaces/anthropic_streaming_interface.py +11 -0
- letta/interfaces/openai_streaming_interface.py +244 -225
- letta/llm_api/openai_client.py +1 -1
- letta/local_llm/utils.py +5 -1
- letta/orm/enums.py +1 -0
- letta/orm/mcp_server.py +3 -0
- letta/orm/tool.py +3 -0
- letta/otel/metric_registry.py +12 -0
- letta/otel/metrics.py +16 -7
- letta/schemas/letta_response.py +6 -1
- letta/schemas/letta_stop_reason.py +22 -0
- letta/schemas/mcp.py +48 -6
- letta/schemas/openai/chat_completion_request.py +1 -1
- letta/schemas/openai/chat_completion_response.py +1 -1
- letta/schemas/pip_requirement.py +14 -0
- letta/schemas/sandbox_config.py +1 -19
- letta/schemas/tool.py +5 -0
- letta/server/rest_api/json_parser.py +39 -3
- letta/server/rest_api/routers/v1/tools.py +3 -1
- letta/server/rest_api/routers/v1/voice.py +2 -3
- letta/server/rest_api/utils.py +1 -1
- letta/server/server.py +11 -2
- letta/services/agent_manager.py +37 -29
- letta/services/helpers/tool_execution_helper.py +39 -9
- letta/services/mcp/base_client.py +13 -2
- letta/services/mcp/sse_client.py +8 -1
- letta/services/mcp/streamable_http_client.py +56 -0
- letta/services/mcp_manager.py +23 -9
- letta/services/message_manager.py +30 -3
- letta/services/tool_executor/files_tool_executor.py +2 -3
- letta/services/tool_sandbox/e2b_sandbox.py +53 -3
- letta/services/tool_sandbox/local_sandbox.py +3 -1
- letta/services/user_manager.py +22 -0
- letta/settings.py +3 -0
- {letta_nightly-0.8.4.dev20250615104252.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/METADATA +5 -6
- {letta_nightly-0.8.4.dev20250615104252.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/RECORD +51 -48
- {letta_nightly-0.8.4.dev20250615104252.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/LICENSE +0 -0
- {letta_nightly-0.8.4.dev20250615104252.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/WHEEL +0 -0
- {letta_nightly-0.8.4.dev20250615104252.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/entry_points.txt +0 -0
letta/__init__.py
CHANGED
@@ -17,6 +17,7 @@ from letta.schemas.enums import JobStatus
|
|
17
17
|
from letta.schemas.file import FileMetadata
|
18
18
|
from letta.schemas.job import Job
|
19
19
|
from letta.schemas.letta_message import LettaMessage
|
20
|
+
from letta.schemas.letta_stop_reason import LettaStopReason
|
20
21
|
from letta.schemas.llm_config import LLMConfig
|
21
22
|
from letta.schemas.memory import ArchivalMemorySummary, BasicBlockMemory, ChatMemory, Memory, RecallMemorySummary
|
22
23
|
from letta.schemas.message import Message
|
letta/agents/base_agent.py
CHANGED
@@ -12,7 +12,9 @@ from letta.schemas.enums import MessageStreamStatus
|
|
12
12
|
from letta.schemas.letta_message import LegacyLettaMessage, LettaMessage
|
13
13
|
from letta.schemas.letta_message_content import TextContent
|
14
14
|
from letta.schemas.letta_response import LettaResponse
|
15
|
+
from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
|
15
16
|
from letta.schemas.message import Message, MessageCreate, MessageUpdate
|
17
|
+
from letta.schemas.usage import LettaUsageStatistics
|
16
18
|
from letta.schemas.user import User
|
17
19
|
from letta.services.agent_manager import AgentManager
|
18
20
|
from letta.services.helpers.agent_manager_helper import compile_system_message
|
@@ -116,7 +118,7 @@ class BaseAgent(ABC):
|
|
116
118
|
system_prompt=agent_state.system,
|
117
119
|
in_context_memory=agent_state.memory,
|
118
120
|
in_context_memory_last_edit=memory_edit_timestamp,
|
119
|
-
previous_message_count=num_messages,
|
121
|
+
previous_message_count=num_messages - len(in_context_messages),
|
120
122
|
archival_memory_size=num_archival_memories,
|
121
123
|
tool_rules_solver=tool_rules_solver,
|
122
124
|
)
|
@@ -136,3 +138,12 @@ class BaseAgent(ABC):
|
|
136
138
|
except:
|
137
139
|
logger.exception(f"Failed to rebuild memory for agent id={agent_state.id} and actor=({self.actor.id}, {self.actor.name})")
|
138
140
|
raise
|
141
|
+
|
142
|
+
def get_finish_chunks_for_stream(self, usage: LettaUsageStatistics, stop_reason: Optional[LettaStopReason] = None):
|
143
|
+
if stop_reason is None:
|
144
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
|
145
|
+
return [
|
146
|
+
stop_reason.model_dump_json(),
|
147
|
+
usage.model_dump_json(),
|
148
|
+
MessageStreamStatus.done.value,
|
149
|
+
]
|
letta/agents/helpers.py
CHANGED
@@ -5,6 +5,7 @@ from typing import List, Optional, Tuple
|
|
5
5
|
from letta.schemas.agent import AgentState
|
6
6
|
from letta.schemas.letta_message import MessageType
|
7
7
|
from letta.schemas.letta_response import LettaResponse
|
8
|
+
from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
|
8
9
|
from letta.schemas.message import Message, MessageCreate
|
9
10
|
from letta.schemas.usage import LettaUsageStatistics
|
10
11
|
from letta.schemas.user import User
|
@@ -16,6 +17,7 @@ def _create_letta_response(
|
|
16
17
|
new_in_context_messages: list[Message],
|
17
18
|
use_assistant_message: bool,
|
18
19
|
usage: LettaUsageStatistics,
|
20
|
+
stop_reason: Optional[LettaStopReason] = None,
|
19
21
|
include_return_message_types: Optional[List[MessageType]] = None,
|
20
22
|
) -> LettaResponse:
|
21
23
|
"""
|
@@ -32,8 +34,9 @@ def _create_letta_response(
|
|
32
34
|
# Apply message type filtering if specified
|
33
35
|
if include_return_message_types is not None:
|
34
36
|
response_messages = [msg for msg in response_messages if msg.message_type in include_return_message_types]
|
35
|
-
|
36
|
-
|
37
|
+
if stop_reason is None:
|
38
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
|
39
|
+
return LettaResponse(messages=response_messages, stop_reason=stop_reason, usage=usage)
|
37
40
|
|
38
41
|
|
39
42
|
def _prepare_in_context_messages(
|
letta/agents/letta_agent.py
CHANGED
@@ -5,6 +5,7 @@ from typing import AsyncGenerator, Dict, List, Optional, Tuple, Union
|
|
5
5
|
|
6
6
|
from openai import AsyncStream
|
7
7
|
from openai.types.chat import ChatCompletionChunk
|
8
|
+
from opentelemetry.trace import Span
|
8
9
|
|
9
10
|
from letta.agents.base_agent import BaseAgent
|
10
11
|
from letta.agents.ephemeral_summary_agent import EphemeralSummaryAgent
|
@@ -25,10 +26,11 @@ from letta.otel.context import get_ctx_attributes
|
|
25
26
|
from letta.otel.metric_registry import MetricRegistry
|
26
27
|
from letta.otel.tracing import log_event, trace_method, tracer
|
27
28
|
from letta.schemas.agent import AgentState
|
28
|
-
from letta.schemas.enums import MessageRole
|
29
|
+
from letta.schemas.enums import MessageRole
|
29
30
|
from letta.schemas.letta_message import MessageType
|
30
31
|
from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
|
31
32
|
from letta.schemas.letta_response import LettaResponse
|
33
|
+
from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
|
32
34
|
from letta.schemas.llm_config import LLMConfig
|
33
35
|
from letta.schemas.message import Message, MessageCreate
|
34
36
|
from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
|
@@ -124,7 +126,7 @@ class LettaAgent(BaseAgent):
|
|
124
126
|
agent_state = await self.agent_manager.get_agent_by_id_async(
|
125
127
|
agent_id=self.agent_id, include_relationships=["tools", "memory", "tool_exec_environment_variables"], actor=self.actor
|
126
128
|
)
|
127
|
-
_, new_in_context_messages, usage = await self._step(
|
129
|
+
_, new_in_context_messages, usage, stop_reason = await self._step(
|
128
130
|
agent_state=agent_state,
|
129
131
|
input_messages=input_messages,
|
130
132
|
max_steps=max_steps,
|
@@ -133,6 +135,7 @@ class LettaAgent(BaseAgent):
|
|
133
135
|
return _create_letta_response(
|
134
136
|
new_in_context_messages=new_in_context_messages,
|
135
137
|
use_assistant_message=use_assistant_message,
|
138
|
+
stop_reason=stop_reason,
|
136
139
|
usage=usage,
|
137
140
|
include_return_message_types=include_return_message_types,
|
138
141
|
)
|
@@ -159,6 +162,7 @@ class LettaAgent(BaseAgent):
|
|
159
162
|
put_inner_thoughts_first=True,
|
160
163
|
actor=self.actor,
|
161
164
|
)
|
165
|
+
stop_reason = None
|
162
166
|
usage = LettaUsageStatistics()
|
163
167
|
|
164
168
|
# span for request
|
@@ -178,17 +182,13 @@ class LettaAgent(BaseAgent):
|
|
178
182
|
agent_state,
|
179
183
|
llm_client,
|
180
184
|
tool_rules_solver,
|
185
|
+
agent_step_span,
|
181
186
|
)
|
182
187
|
)
|
183
188
|
in_context_messages = current_in_context_messages + new_in_context_messages
|
184
189
|
|
185
190
|
log_event("agent.stream_no_tokens.llm_response.received") # [3^]
|
186
191
|
|
187
|
-
# log llm request time
|
188
|
-
now = get_utc_timestamp_ns()
|
189
|
-
llm_request_ns = now - step_start
|
190
|
-
agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": ns_to_ms(llm_request_ns)})
|
191
|
-
|
192
192
|
response = llm_client.convert_response_to_chat_completion(response_data, in_context_messages, agent_state.llm_config)
|
193
193
|
|
194
194
|
# update usage
|
@@ -197,6 +197,9 @@ class LettaAgent(BaseAgent):
|
|
197
197
|
usage.completion_tokens += response.usage.completion_tokens
|
198
198
|
usage.prompt_tokens += response.usage.prompt_tokens
|
199
199
|
usage.total_tokens += response.usage.total_tokens
|
200
|
+
MetricRegistry().message_output_tokens.record(
|
201
|
+
response.usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
|
202
|
+
)
|
200
203
|
|
201
204
|
if not response.choices[0].message.tool_calls:
|
202
205
|
# TODO: make into a real error
|
@@ -210,18 +213,15 @@ class LettaAgent(BaseAgent):
|
|
210
213
|
signature=response.choices[0].message.reasoning_content_signature,
|
211
214
|
)
|
212
215
|
]
|
216
|
+
elif response.choices[0].message.omitted_reasoning_content:
|
217
|
+
reasoning = [OmittedReasoningContent()]
|
213
218
|
elif response.choices[0].message.content:
|
214
219
|
reasoning = [TextContent(text=response.choices[0].message.content)] # reasoning placed into content for legacy reasons
|
215
220
|
else:
|
216
221
|
logger.info("No reasoning content found.")
|
217
222
|
reasoning = None
|
218
223
|
|
219
|
-
|
220
|
-
now = get_utc_timestamp_ns()
|
221
|
-
llm_request_ns = now - step_start
|
222
|
-
agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": ns_to_ms(llm_request_ns)})
|
223
|
-
|
224
|
-
persisted_messages, should_continue = await self._handle_ai_response(
|
224
|
+
persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
|
225
225
|
tool_call,
|
226
226
|
valid_tool_names,
|
227
227
|
agent_state,
|
@@ -262,11 +262,11 @@ class LettaAgent(BaseAgent):
|
|
262
262
|
)
|
263
263
|
|
264
264
|
for message in letta_messages:
|
265
|
-
if
|
266
|
-
yield f"data: {message.model_dump_json()}\n\n"
|
267
|
-
elif include_return_message_types and message.message_type in include_return_message_types:
|
265
|
+
if include_return_message_types is None or message.message_type in include_return_message_types:
|
268
266
|
yield f"data: {message.model_dump_json()}\n\n"
|
269
267
|
|
268
|
+
MetricRegistry().step_execution_time_ms_histogram.record(step_start - get_utc_timestamp_ns(), get_ctx_attributes())
|
269
|
+
|
270
270
|
if not should_continue:
|
271
271
|
break
|
272
272
|
|
@@ -288,8 +288,8 @@ class LettaAgent(BaseAgent):
|
|
288
288
|
request_span.end()
|
289
289
|
|
290
290
|
# Return back usage
|
291
|
-
|
292
|
-
|
291
|
+
for finish_chunk in self.get_finish_chunks_for_stream(usage, stop_reason):
|
292
|
+
yield f"data: {finish_chunk}\n\n"
|
293
293
|
|
294
294
|
async def _step(
|
295
295
|
self,
|
@@ -297,7 +297,7 @@ class LettaAgent(BaseAgent):
|
|
297
297
|
input_messages: List[MessageCreate],
|
298
298
|
max_steps: int = DEFAULT_MAX_STEPS,
|
299
299
|
request_start_timestamp_ns: Optional[int] = None,
|
300
|
-
) -> Tuple[List[Message], List[Message], LettaUsageStatistics]:
|
300
|
+
) -> Tuple[List[Message], List[Message], Optional[LettaStopReason], LettaUsageStatistics]:
|
301
301
|
"""
|
302
302
|
Carries out an invocation of the agent loop. In each step, the agent
|
303
303
|
1. Rebuilds its memory
|
@@ -320,6 +320,7 @@ class LettaAgent(BaseAgent):
|
|
320
320
|
request_span = tracer.start_span("time_to_first_token")
|
321
321
|
request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
|
322
322
|
|
323
|
+
stop_reason = None
|
323
324
|
usage = LettaUsageStatistics()
|
324
325
|
for i in range(max_steps):
|
325
326
|
step_id = generate_step_id()
|
@@ -329,7 +330,7 @@ class LettaAgent(BaseAgent):
|
|
329
330
|
|
330
331
|
request_data, response_data, current_in_context_messages, new_in_context_messages, valid_tool_names = (
|
331
332
|
await self._build_and_request_from_llm(
|
332
|
-
current_in_context_messages, new_in_context_messages, agent_state, llm_client, tool_rules_solver
|
333
|
+
current_in_context_messages, new_in_context_messages, agent_state, llm_client, tool_rules_solver, agent_step_span
|
333
334
|
)
|
334
335
|
)
|
335
336
|
in_context_messages = current_in_context_messages + new_in_context_messages
|
@@ -338,16 +339,14 @@ class LettaAgent(BaseAgent):
|
|
338
339
|
|
339
340
|
response = llm_client.convert_response_to_chat_completion(response_data, in_context_messages, agent_state.llm_config)
|
340
341
|
|
341
|
-
# log LLM request time
|
342
|
-
now = get_utc_timestamp_ns()
|
343
|
-
llm_request_ns = now - step_start
|
344
|
-
agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": ns_to_ms(llm_request_ns)})
|
345
|
-
|
346
342
|
# TODO: add run_id
|
347
343
|
usage.step_count += 1
|
348
344
|
usage.completion_tokens += response.usage.completion_tokens
|
349
345
|
usage.prompt_tokens += response.usage.prompt_tokens
|
350
346
|
usage.total_tokens += response.usage.total_tokens
|
347
|
+
MetricRegistry().message_output_tokens.record(
|
348
|
+
response.usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
|
349
|
+
)
|
351
350
|
|
352
351
|
if not response.choices[0].message.tool_calls:
|
353
352
|
# TODO: make into a real error
|
@@ -363,11 +362,13 @@ class LettaAgent(BaseAgent):
|
|
363
362
|
]
|
364
363
|
elif response.choices[0].message.content:
|
365
364
|
reasoning = [TextContent(text=response.choices[0].message.content)] # reasoning placed into content for legacy reasons
|
365
|
+
elif response.choices[0].message.omitted_reasoning_content:
|
366
|
+
reasoning = [OmittedReasoningContent()]
|
366
367
|
else:
|
367
368
|
logger.info("No reasoning content found.")
|
368
369
|
reasoning = None
|
369
370
|
|
370
|
-
persisted_messages, should_continue = await self._handle_ai_response(
|
371
|
+
persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
|
371
372
|
tool_call,
|
372
373
|
valid_tool_names,
|
373
374
|
agent_state,
|
@@ -401,6 +402,8 @@ class LettaAgent(BaseAgent):
|
|
401
402
|
),
|
402
403
|
)
|
403
404
|
|
405
|
+
MetricRegistry().step_execution_time_ms_histogram.record(step_start - get_utc_timestamp_ns(), get_ctx_attributes())
|
406
|
+
|
404
407
|
if not should_continue:
|
405
408
|
break
|
406
409
|
|
@@ -421,7 +424,7 @@ class LettaAgent(BaseAgent):
|
|
421
424
|
force=False,
|
422
425
|
)
|
423
426
|
|
424
|
-
return current_in_context_messages, new_in_context_messages, usage
|
427
|
+
return current_in_context_messages, new_in_context_messages, usage, stop_reason
|
425
428
|
|
426
429
|
@trace_method
|
427
430
|
async def step_stream(
|
@@ -454,30 +457,35 @@ class LettaAgent(BaseAgent):
|
|
454
457
|
put_inner_thoughts_first=True,
|
455
458
|
actor=self.actor,
|
456
459
|
)
|
460
|
+
stop_reason = None
|
457
461
|
usage = LettaUsageStatistics()
|
458
462
|
first_chunk, request_span = True, None
|
459
463
|
if request_start_timestamp_ns:
|
460
464
|
request_span = tracer.start_span("time_to_first_token", start_time=request_start_timestamp_ns)
|
461
465
|
request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
|
462
466
|
|
463
|
-
provider_request_start_timestamp_ns = None
|
464
467
|
for i in range(max_steps):
|
465
468
|
step_id = generate_step_id()
|
466
469
|
step_start = get_utc_timestamp_ns()
|
467
470
|
agent_step_span = tracer.start_span("agent_step", start_time=step_start)
|
468
471
|
agent_step_span.set_attributes({"step_id": step_id})
|
469
472
|
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
473
|
+
(
|
474
|
+
request_data,
|
475
|
+
stream,
|
476
|
+
current_in_context_messages,
|
477
|
+
new_in_context_messages,
|
478
|
+
valid_tool_names,
|
479
|
+
provider_request_start_timestamp_ns,
|
480
|
+
) = await self._build_and_request_from_llm_streaming(
|
481
|
+
first_chunk,
|
482
|
+
agent_step_span,
|
483
|
+
request_start_timestamp_ns,
|
484
|
+
current_in_context_messages,
|
485
|
+
new_in_context_messages,
|
486
|
+
agent_state,
|
487
|
+
llm_client,
|
488
|
+
tool_rules_solver,
|
481
489
|
)
|
482
490
|
log_event("agent.stream.llm_response.received") # [3^]
|
483
491
|
|
@@ -504,15 +512,17 @@ class LettaAgent(BaseAgent):
|
|
504
512
|
now = get_utc_timestamp_ns()
|
505
513
|
ttft_ns = now - request_start_timestamp_ns
|
506
514
|
request_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ns_to_ms(ttft_ns)})
|
515
|
+
metric_attributes = get_ctx_attributes()
|
516
|
+
metric_attributes["model.name"] = agent_state.llm_config.model
|
517
|
+
MetricRegistry().ttft_ms_histogram.record(ns_to_ms(ttft_ns), metric_attributes)
|
507
518
|
first_chunk = False
|
508
519
|
|
509
|
-
if include_return_message_types is None:
|
510
|
-
# return all data
|
511
|
-
yield f"data: {chunk.model_dump_json()}\n\n"
|
512
|
-
elif include_return_message_types and chunk.message_type in include_return_message_types:
|
520
|
+
if include_return_message_types is None or chunk.message_type in include_return_message_types:
|
513
521
|
# filter down returned data
|
514
522
|
yield f"data: {chunk.model_dump_json()}\n\n"
|
515
523
|
|
524
|
+
stream_end_time_ns = get_utc_timestamp_ns()
|
525
|
+
|
516
526
|
# update usage
|
517
527
|
usage.step_count += 1
|
518
528
|
usage.completion_tokens += interface.output_tokens
|
@@ -523,14 +533,26 @@ class LettaAgent(BaseAgent):
|
|
523
533
|
)
|
524
534
|
|
525
535
|
# log LLM request time
|
526
|
-
|
527
|
-
|
528
|
-
|
536
|
+
llm_request_ms = ns_to_ms(stream_end_time_ns - request_start_timestamp_ns)
|
537
|
+
agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ms})
|
538
|
+
MetricRegistry().llm_execution_time_ms_histogram.record(
|
539
|
+
llm_request_ms,
|
540
|
+
dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model}),
|
541
|
+
)
|
529
542
|
|
530
543
|
# Process resulting stream content
|
531
|
-
|
544
|
+
try:
|
545
|
+
tool_call = interface.get_tool_call_object()
|
546
|
+
except ValueError as e:
|
547
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.no_tool_call.value)
|
548
|
+
yield f"data: {stop_reason.model_dump_json()}\n\n"
|
549
|
+
raise e
|
550
|
+
except Exception as e:
|
551
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_tool_call.value)
|
552
|
+
yield f"data: {stop_reason.model_dump_json()}\n\n"
|
553
|
+
raise e
|
532
554
|
reasoning_content = interface.get_reasoning_content()
|
533
|
-
persisted_messages, should_continue = await self._handle_ai_response(
|
555
|
+
persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
|
534
556
|
tool_call,
|
535
557
|
valid_tool_names,
|
536
558
|
agent_state,
|
@@ -590,6 +612,9 @@ class LettaAgent(BaseAgent):
|
|
590
612
|
if include_return_message_types is None or tool_return.message_type in include_return_message_types:
|
591
613
|
yield f"data: {tool_return.model_dump_json()}\n\n"
|
592
614
|
|
615
|
+
# TODO (cliandy): consolidate and expand with trace
|
616
|
+
MetricRegistry().step_execution_time_ms_histogram.record(step_start - get_utc_timestamp_ns(), get_ctx_attributes())
|
617
|
+
|
593
618
|
if not should_continue:
|
594
619
|
break
|
595
620
|
|
@@ -610,10 +635,10 @@ class LettaAgent(BaseAgent):
|
|
610
635
|
request_span.add_event(name="letta_request_ms", attributes={"duration_ms": ns_to_ms(request_ns)})
|
611
636
|
request_span.end()
|
612
637
|
|
613
|
-
|
614
|
-
|
615
|
-
yield f"data: {MessageStreamStatus.done.model_dump_json()}\n\n"
|
638
|
+
for finish_chunk in self.get_finish_chunks_for_stream(usage, stop_reason):
|
639
|
+
yield f"data: {finish_chunk}\n\n"
|
616
640
|
|
641
|
+
# noinspection PyInconsistentReturns
|
617
642
|
async def _build_and_request_from_llm(
|
618
643
|
self,
|
619
644
|
current_in_context_messages: List[Message],
|
@@ -621,7 +646,8 @@ class LettaAgent(BaseAgent):
|
|
621
646
|
agent_state: AgentState,
|
622
647
|
llm_client: LLMClientBase,
|
623
648
|
tool_rules_solver: ToolRulesSolver,
|
624
|
-
|
649
|
+
agent_step_span: "Span",
|
650
|
+
) -> Tuple[Dict, Dict, List[Message], List[Message], List[str]] | None:
|
625
651
|
for attempt in range(self.max_summarization_retries + 1):
|
626
652
|
try:
|
627
653
|
log_event("agent.stream_no_tokens.messages.refreshed")
|
@@ -635,13 +661,15 @@ class LettaAgent(BaseAgent):
|
|
635
661
|
log_event("agent.stream_no_tokens.llm_request.created")
|
636
662
|
|
637
663
|
async with AsyncTimer() as timer:
|
664
|
+
# Attempt LLM request
|
638
665
|
response = await llm_client.request_async(request_data, agent_state.llm_config)
|
639
666
|
MetricRegistry().llm_execution_time_ms_histogram.record(
|
640
667
|
timer.elapsed_ms,
|
641
668
|
dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model}),
|
642
669
|
)
|
643
|
-
|
644
|
-
|
670
|
+
agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": timer.elapsed_ms})
|
671
|
+
|
672
|
+
return request_data, response, current_in_context_messages, new_in_context_messages, valid_tool_names
|
645
673
|
|
646
674
|
except Exception as e:
|
647
675
|
if attempt == self.max_summarization_retries:
|
@@ -659,6 +687,7 @@ class LettaAgent(BaseAgent):
|
|
659
687
|
new_in_context_messages = []
|
660
688
|
log_event(f"agent.stream_no_tokens.retry_attempt.{attempt + 1}")
|
661
689
|
|
690
|
+
# noinspection PyInconsistentReturns
|
662
691
|
async def _build_and_request_from_llm_streaming(
|
663
692
|
self,
|
664
693
|
first_chunk: bool,
|
@@ -669,7 +698,7 @@ class LettaAgent(BaseAgent):
|
|
669
698
|
agent_state: AgentState,
|
670
699
|
llm_client: LLMClientBase,
|
671
700
|
tool_rules_solver: ToolRulesSolver,
|
672
|
-
) -> Tuple[Dict, AsyncStream[ChatCompletionChunk], List[Message], List[Message], List[str]]:
|
701
|
+
) -> Tuple[Dict, AsyncStream[ChatCompletionChunk], List[Message], List[Message], List[str], int] | None:
|
673
702
|
for attempt in range(self.max_summarization_retries + 1):
|
674
703
|
try:
|
675
704
|
log_event("agent.stream_no_tokens.messages.refreshed")
|
@@ -682,10 +711,13 @@ class LettaAgent(BaseAgent):
|
|
682
711
|
)
|
683
712
|
log_event("agent.stream.llm_request.created") # [2^]
|
684
713
|
|
714
|
+
provider_request_start_timestamp_ns = get_utc_timestamp_ns()
|
685
715
|
if first_chunk and ttft_span is not None:
|
686
|
-
|
687
|
-
|
688
|
-
|
716
|
+
request_start_to_provider_request_start_ns = provider_request_start_timestamp_ns - request_start_timestamp_ns
|
717
|
+
ttft_span.add_event(
|
718
|
+
name="request_start_to_provider_request_start_ns",
|
719
|
+
attributes={"request_start_to_provider_request_start_ns": ns_to_ms(request_start_to_provider_request_start_ns)},
|
720
|
+
)
|
689
721
|
|
690
722
|
# Attempt LLM request
|
691
723
|
return (
|
@@ -694,6 +726,7 @@ class LettaAgent(BaseAgent):
|
|
694
726
|
current_in_context_messages,
|
695
727
|
new_in_context_messages,
|
696
728
|
valid_tool_names,
|
729
|
+
provider_request_start_timestamp_ns,
|
697
730
|
)
|
698
731
|
|
699
732
|
except Exception as e:
|
@@ -709,7 +742,7 @@ class LettaAgent(BaseAgent):
|
|
709
742
|
llm_config=agent_state.llm_config,
|
710
743
|
force=True,
|
711
744
|
)
|
712
|
-
new_in_context_messages = []
|
745
|
+
new_in_context_messages: list[Message] = []
|
713
746
|
log_event(f"agent.stream_no_tokens.retry_attempt.{attempt + 1}")
|
714
747
|
|
715
748
|
@trace_method
|
@@ -857,12 +890,13 @@ class LettaAgent(BaseAgent):
|
|
857
890
|
initial_messages: Optional[List[Message]] = None,
|
858
891
|
agent_step_span: Optional["Span"] = None,
|
859
892
|
is_final_step: Optional[bool] = None,
|
860
|
-
) -> Tuple[List[Message], bool]:
|
893
|
+
) -> Tuple[List[Message], bool, Optional[LettaStopReason]]:
|
861
894
|
"""
|
862
895
|
Now that streaming is done, handle the final AI response.
|
863
896
|
This might yield additional SSE tokens if we do stalling.
|
864
897
|
At the end, set self._continue_execution accordingly.
|
865
898
|
"""
|
899
|
+
stop_reason = None
|
866
900
|
# Check if the called tool is allowed by tool name:
|
867
901
|
tool_call_name = tool_call.function.name
|
868
902
|
tool_call_args_str = tool_call.function.arguments
|
@@ -880,6 +914,7 @@ class LettaAgent(BaseAgent):
|
|
880
914
|
tool_args = json.loads(tool_args)
|
881
915
|
|
882
916
|
if is_final_step:
|
917
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.max_steps.value)
|
883
918
|
logger.info("Agent has reached max steps.")
|
884
919
|
request_heartbeat = False
|
885
920
|
else:
|
@@ -948,6 +983,8 @@ class LettaAgent(BaseAgent):
|
|
948
983
|
continue_stepping = request_heartbeat
|
949
984
|
tool_rules_solver.register_tool_call(tool_name=tool_call_name)
|
950
985
|
if tool_rules_solver.is_terminal_tool(tool_name=tool_call_name):
|
986
|
+
if continue_stepping:
|
987
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.tool_rule.value)
|
951
988
|
continue_stepping = False
|
952
989
|
elif tool_rules_solver.has_children_tools(tool_name=tool_call_name):
|
953
990
|
continue_stepping = True
|
@@ -994,7 +1031,7 @@ class LettaAgent(BaseAgent):
|
|
994
1031
|
)
|
995
1032
|
self.last_function_response = function_response
|
996
1033
|
|
997
|
-
return persisted_messages, continue_stepping
|
1034
|
+
return persisted_messages, continue_stepping, stop_reason
|
998
1035
|
|
999
1036
|
@trace_method
|
1000
1037
|
async def _execute_tool(
|
@@ -82,7 +82,7 @@ class VoiceSleeptimeAgent(LettaAgent):
|
|
82
82
|
]
|
83
83
|
|
84
84
|
# Summarize
|
85
|
-
current_in_context_messages, new_in_context_messages, usage = await super()._step(
|
85
|
+
current_in_context_messages, new_in_context_messages, usage, stop_reason = await super()._step(
|
86
86
|
agent_state=agent_state, input_messages=input_messages, max_steps=max_steps
|
87
87
|
)
|
88
88
|
new_in_context_messages, updated = self.summarizer.summarize(
|
@@ -95,6 +95,7 @@ class VoiceSleeptimeAgent(LettaAgent):
|
|
95
95
|
return _create_letta_response(
|
96
96
|
new_in_context_messages=new_in_context_messages,
|
97
97
|
use_assistant_message=use_assistant_message,
|
98
|
+
stop_reason=stop_reason,
|
98
99
|
usage=usage,
|
99
100
|
include_return_message_types=include_return_message_types,
|
100
101
|
)
|
letta/constants.py
CHANGED
@@ -292,9 +292,6 @@ MESSAGE_SUMMARY_WARNING_STR = " ".join(
|
|
292
292
|
# "Remember to pass request_heartbeat = true if you would like to send a message immediately after.",
|
293
293
|
]
|
294
294
|
)
|
295
|
-
DATA_SOURCE_ATTACH_ALERT = (
|
296
|
-
"[ALERT] New data was just uploaded to archival memory. You can view this data by calling the archival_memory_search tool."
|
297
|
-
)
|
298
295
|
|
299
296
|
# Throw an error message when a read-only block is edited
|
300
297
|
READ_ONLY_BLOCK_EDIT_ERROR = f"{ERROR_MESSAGE_PREFIX} This block is read-only and cannot be edited."
|
@@ -337,6 +334,7 @@ WEB_SEARCH_CLIP_CONTENT = False
|
|
337
334
|
WEB_SEARCH_INCLUDE_SCORE = False
|
338
335
|
WEB_SEARCH_SEPARATOR = "\n" + "-" * 40 + "\n"
|
339
336
|
|
340
|
-
REDIS_INCLUDE = "
|
341
|
-
REDIS_EXCLUDE = "
|
337
|
+
REDIS_INCLUDE = "include"
|
338
|
+
REDIS_EXCLUDE = "exclude"
|
342
339
|
REDIS_SET_DEFAULT_VAL = "None"
|
340
|
+
REDIS_DEFAULT_CACHE_PREFIX = "letta_cache"
|
@@ -2,12 +2,17 @@ import asyncio
|
|
2
2
|
from functools import wraps
|
3
3
|
from typing import Any, Optional, Set, Union
|
4
4
|
|
5
|
-
import redis.asyncio as redis
|
6
|
-
from redis import RedisError
|
7
|
-
|
8
5
|
from letta.constants import REDIS_EXCLUDE, REDIS_INCLUDE, REDIS_SET_DEFAULT_VAL
|
9
6
|
from letta.log import get_logger
|
10
7
|
|
8
|
+
try:
|
9
|
+
from redis import RedisError
|
10
|
+
from redis.asyncio import ConnectionPool, Redis
|
11
|
+
except ImportError:
|
12
|
+
RedisError = None
|
13
|
+
Redis = None
|
14
|
+
ConnectionPool = None
|
15
|
+
|
11
16
|
logger = get_logger(__name__)
|
12
17
|
|
13
18
|
_client_instance = None
|
@@ -44,7 +49,7 @@ class AsyncRedisClient:
|
|
44
49
|
retry_on_timeout: Retry operations on timeout
|
45
50
|
health_check_interval: Seconds between health checks
|
46
51
|
"""
|
47
|
-
self.pool =
|
52
|
+
self.pool = ConnectionPool(
|
48
53
|
host=host,
|
49
54
|
port=port,
|
50
55
|
db=db,
|
@@ -59,12 +64,12 @@ class AsyncRedisClient:
|
|
59
64
|
self._client = None
|
60
65
|
self._lock = asyncio.Lock()
|
61
66
|
|
62
|
-
async def get_client(self) ->
|
67
|
+
async def get_client(self) -> Redis:
|
63
68
|
"""Get or create Redis client instance."""
|
64
69
|
if self._client is None:
|
65
70
|
async with self._lock:
|
66
71
|
if self._client is None:
|
67
|
-
self._client =
|
72
|
+
self._client = Redis(connection_pool=self.pool)
|
68
73
|
return self._client
|
69
74
|
|
70
75
|
async def close(self):
|
@@ -213,8 +218,8 @@ class AsyncRedisClient:
|
|
213
218
|
return await client.decr(key)
|
214
219
|
|
215
220
|
async def check_inclusion_and_exclusion(self, member: str, group: str) -> bool:
|
216
|
-
exclude_key =
|
217
|
-
include_key =
|
221
|
+
exclude_key = self._get_group_exclusion_key(group)
|
222
|
+
include_key = self._get_group_inclusion_key(group)
|
218
223
|
# 1. if the member IS excluded from the group
|
219
224
|
if self.exists(exclude_key) and await self.scard(exclude_key) > 1:
|
220
225
|
return bool(await self.smismember(exclude_key, member))
|
@@ -231,14 +236,29 @@ class AsyncRedisClient:
|
|
231
236
|
|
232
237
|
@staticmethod
|
233
238
|
def _get_group_inclusion_key(group: str) -> str:
|
234
|
-
return f"{group}
|
239
|
+
return f"{group}:{REDIS_INCLUDE}"
|
235
240
|
|
236
241
|
@staticmethod
|
237
242
|
def _get_group_exclusion_key(group: str) -> str:
|
238
|
-
return f"{group}
|
243
|
+
return f"{group}:{REDIS_EXCLUDE}"
|
239
244
|
|
240
245
|
|
241
246
|
class NoopAsyncRedisClient(AsyncRedisClient):
|
247
|
+
# noinspection PyMissingConstructor
|
248
|
+
def __init__(self):
|
249
|
+
pass
|
250
|
+
|
251
|
+
async def set(
|
252
|
+
self,
|
253
|
+
key: str,
|
254
|
+
value: Union[str, int, float],
|
255
|
+
ex: Optional[int] = None,
|
256
|
+
px: Optional[int] = None,
|
257
|
+
nx: bool = False,
|
258
|
+
xx: bool = False,
|
259
|
+
) -> bool:
|
260
|
+
return False
|
261
|
+
|
242
262
|
async def get(self, key: str, default: Any = None) -> Any:
|
243
263
|
return default
|
244
264
|
|
@@ -7,10 +7,10 @@ if TYPE_CHECKING:
|
|
7
7
|
|
8
8
|
async def open_file(agent_state: "AgentState", file_name: str, view_range: Optional[Tuple[int, int]]) -> str:
|
9
9
|
"""
|
10
|
-
Open
|
10
|
+
Open the file with name `file_name` and load the contents into files section in core memory.
|
11
11
|
|
12
12
|
Args:
|
13
|
-
file_name (str): Name of the file to view.
|
13
|
+
file_name (str): Name of the file to view. Required.
|
14
14
|
view_range (Optional[Tuple[int, int]]): Optional tuple indicating range to view.
|
15
15
|
|
16
16
|
Returns:
|
@@ -21,7 +21,7 @@ async def open_file(agent_state: "AgentState", file_name: str, view_range: Optio
|
|
21
21
|
|
22
22
|
async def close_file(agent_state: "AgentState", file_name: str) -> str:
|
23
23
|
"""
|
24
|
-
Close
|
24
|
+
Close file with name `file_name` in files section in core memory.
|
25
25
|
|
26
26
|
Args:
|
27
27
|
file_name (str): Name of the file to close.
|
@@ -48,7 +48,7 @@ async def grep(agent_state: "AgentState", pattern: str, include: Optional[str] =
|
|
48
48
|
|
49
49
|
async def search_files(agent_state: "AgentState", query: str) -> List["FileMetadata"]:
|
50
50
|
"""
|
51
|
-
Get list of most relevant files across all data sources.
|
51
|
+
Get list of most relevant files across all data sources using embedding search.
|
52
52
|
|
53
53
|
Args:
|
54
54
|
query (str): The search query.
|