letta-nightly 0.8.8.dev20250703104323__py3-none-any.whl → 0.8.8.dev20250703174903__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/agent.py +1 -0
- letta/agents/base_agent.py +8 -2
- letta/agents/ephemeral_summary_agent.py +33 -33
- letta/agents/letta_agent.py +104 -53
- letta/agents/voice_agent.py +2 -1
- letta/constants.py +8 -4
- letta/functions/function_sets/files.py +22 -7
- letta/functions/function_sets/multi_agent.py +34 -0
- letta/functions/types.py +1 -1
- letta/groups/helpers.py +8 -5
- letta/groups/sleeptime_multi_agent_v2.py +20 -15
- letta/interface.py +1 -1
- letta/interfaces/anthropic_streaming_interface.py +15 -8
- letta/interfaces/openai_chat_completions_streaming_interface.py +9 -6
- letta/interfaces/openai_streaming_interface.py +17 -11
- letta/llm_api/openai_client.py +2 -1
- letta/orm/agent.py +1 -0
- letta/orm/file.py +8 -2
- letta/orm/files_agents.py +36 -11
- letta/orm/mcp_server.py +3 -0
- letta/orm/source.py +2 -1
- letta/orm/step.py +3 -0
- letta/prompts/system/memgpt_v2_chat.txt +5 -8
- letta/schemas/agent.py +58 -23
- letta/schemas/embedding_config.py +3 -2
- letta/schemas/enums.py +4 -0
- letta/schemas/file.py +1 -0
- letta/schemas/letta_stop_reason.py +18 -0
- letta/schemas/mcp.py +15 -10
- letta/schemas/memory.py +35 -5
- letta/schemas/providers.py +11 -0
- letta/schemas/step.py +1 -0
- letta/schemas/tool.py +2 -1
- letta/server/rest_api/routers/v1/agents.py +320 -184
- letta/server/rest_api/routers/v1/groups.py +6 -2
- letta/server/rest_api/routers/v1/identities.py +6 -2
- letta/server/rest_api/routers/v1/jobs.py +49 -1
- letta/server/rest_api/routers/v1/sources.py +28 -19
- letta/server/rest_api/routers/v1/steps.py +7 -2
- letta/server/rest_api/routers/v1/tools.py +40 -9
- letta/server/rest_api/streaming_response.py +88 -0
- letta/server/server.py +61 -55
- letta/services/agent_manager.py +28 -16
- letta/services/file_manager.py +58 -9
- letta/services/file_processor/chunker/llama_index_chunker.py +2 -0
- letta/services/file_processor/embedder/openai_embedder.py +54 -10
- letta/services/file_processor/file_processor.py +59 -0
- letta/services/file_processor/parser/mistral_parser.py +2 -0
- letta/services/files_agents_manager.py +120 -2
- letta/services/helpers/agent_manager_helper.py +21 -4
- letta/services/job_manager.py +57 -6
- letta/services/mcp/base_client.py +1 -0
- letta/services/mcp_manager.py +13 -1
- letta/services/step_manager.py +14 -5
- letta/services/summarizer/summarizer.py +6 -22
- letta/services/tool_executor/builtin_tool_executor.py +0 -1
- letta/services/tool_executor/files_tool_executor.py +2 -2
- letta/services/tool_executor/multi_agent_tool_executor.py +23 -0
- letta/services/tool_manager.py +7 -7
- letta/settings.py +11 -2
- letta/templates/summary_request_text.j2 +19 -0
- letta/utils.py +95 -14
- {letta_nightly-0.8.8.dev20250703104323.dist-info → letta_nightly-0.8.8.dev20250703174903.dist-info}/METADATA +2 -2
- {letta_nightly-0.8.8.dev20250703104323.dist-info → letta_nightly-0.8.8.dev20250703174903.dist-info}/RECORD +68 -67
- /letta/{agents/prompts → prompts/system}/summary_system_prompt.txt +0 -0
- {letta_nightly-0.8.8.dev20250703104323.dist-info → letta_nightly-0.8.8.dev20250703174903.dist-info}/LICENSE +0 -0
- {letta_nightly-0.8.8.dev20250703104323.dist-info → letta_nightly-0.8.8.dev20250703174903.dist-info}/WHEEL +0 -0
- {letta_nightly-0.8.8.dev20250703104323.dist-info → letta_nightly-0.8.8.dev20250703174903.dist-info}/entry_points.txt +0 -0
letta/agent.py
CHANGED
letta/agents/base_agent.py
CHANGED
@@ -67,7 +67,8 @@ class BaseAgent(ABC):
|
|
67
67
|
"""
|
68
68
|
raise NotImplementedError
|
69
69
|
|
70
|
-
|
70
|
+
@staticmethod
|
71
|
+
def pre_process_input_message(input_messages: List[MessageCreate]) -> Any:
|
71
72
|
"""
|
72
73
|
Pre-process function to run on the input_message.
|
73
74
|
"""
|
@@ -97,9 +98,13 @@ class BaseAgent(ABC):
|
|
97
98
|
# [DB Call] loading blocks (modifies: agent_state.memory.blocks)
|
98
99
|
await self.agent_manager.refresh_memory_async(agent_state=agent_state, actor=self.actor)
|
99
100
|
|
101
|
+
tool_constraint_block = None
|
102
|
+
if tool_rules_solver is not None:
|
103
|
+
tool_constraint_block = tool_rules_solver.compile_tool_rule_prompts()
|
104
|
+
|
100
105
|
# TODO: This is a pretty brittle pattern established all over our code, need to get rid of this
|
101
106
|
curr_system_message = in_context_messages[0]
|
102
|
-
curr_memory_str = agent_state.memory.compile()
|
107
|
+
curr_memory_str = agent_state.memory.compile(tool_usage_rules=tool_constraint_block, sources=agent_state.sources)
|
103
108
|
curr_system_message_text = curr_system_message.content[0].text
|
104
109
|
if curr_memory_str in curr_system_message_text:
|
105
110
|
logger.debug(
|
@@ -124,6 +129,7 @@ class BaseAgent(ABC):
|
|
124
129
|
previous_message_count=num_messages - len(in_context_messages),
|
125
130
|
archival_memory_size=num_archival_memories,
|
126
131
|
tool_rules_solver=tool_rules_solver,
|
132
|
+
sources=agent_state.sources,
|
127
133
|
)
|
128
134
|
|
129
135
|
diff = united_diff(curr_system_message_text, new_system_message_str)
|
@@ -1,27 +1,28 @@
|
|
1
|
-
from
|
2
|
-
from typing import AsyncGenerator, Dict, List
|
3
|
-
|
4
|
-
from openai import AsyncOpenAI
|
1
|
+
from typing import AsyncGenerator, List
|
5
2
|
|
6
3
|
from letta.agents.base_agent import BaseAgent
|
7
4
|
from letta.constants import DEFAULT_MAX_STEPS
|
5
|
+
from letta.helpers.message_helper import convert_message_creates_to_messages
|
6
|
+
from letta.llm_api.llm_client import LLMClient
|
7
|
+
from letta.log import get_logger
|
8
8
|
from letta.orm.errors import NoResultFound
|
9
|
+
from letta.prompts.gpt_system import get_system_text
|
9
10
|
from letta.schemas.block import Block, BlockUpdate
|
10
11
|
from letta.schemas.enums import MessageRole
|
11
12
|
from letta.schemas.letta_message_content import TextContent
|
12
13
|
from letta.schemas.message import Message, MessageCreate
|
13
|
-
from letta.schemas.openai.chat_completion_request import ChatCompletionRequest
|
14
14
|
from letta.schemas.user import User
|
15
15
|
from letta.services.agent_manager import AgentManager
|
16
16
|
from letta.services.block_manager import BlockManager
|
17
17
|
from letta.services.message_manager import MessageManager
|
18
18
|
|
19
|
+
logger = get_logger(__name__)
|
20
|
+
|
19
21
|
|
20
22
|
class EphemeralSummaryAgent(BaseAgent):
|
21
23
|
"""
|
22
|
-
A stateless summarization agent
|
23
|
-
|
24
|
-
# TODO: Extend to more clients
|
24
|
+
A stateless summarization agent that utilizes the caller's LLM client to summarize the conversation.
|
25
|
+
TODO (cliandy): allow the summarizer to use another llm_config from the main agent maybe?
|
25
26
|
"""
|
26
27
|
|
27
28
|
def __init__(
|
@@ -35,7 +36,7 @@ class EphemeralSummaryAgent(BaseAgent):
|
|
35
36
|
):
|
36
37
|
super().__init__(
|
37
38
|
agent_id=agent_id,
|
38
|
-
openai_client=
|
39
|
+
openai_client=None,
|
39
40
|
message_manager=message_manager,
|
40
41
|
agent_manager=agent_manager,
|
41
42
|
actor=actor,
|
@@ -65,17 +66,33 @@ class EphemeralSummaryAgent(BaseAgent):
|
|
65
66
|
input_message = input_messages[0]
|
66
67
|
input_message.content[0].text += f"\n\n--- Previous Summary ---\n{block.value}\n"
|
67
68
|
|
68
|
-
|
69
|
-
|
69
|
+
# Gets the LLMCLient based on the calling agent's LLM Config
|
70
|
+
agent_state = await self.agent_manager.get_agent_by_id_async(agent_id=self.agent_id, actor=self.actor)
|
71
|
+
llm_client = LLMClient.create(
|
72
|
+
provider_type=agent_state.llm_config.model_endpoint_type,
|
73
|
+
put_inner_thoughts_first=True,
|
74
|
+
actor=self.actor,
|
75
|
+
)
|
70
76
|
|
71
|
-
|
72
|
-
|
73
|
-
|
77
|
+
system_message_create = MessageCreate(
|
78
|
+
role=MessageRole.system,
|
79
|
+
content=[TextContent(text=get_system_text("summary_system_prompt"))],
|
80
|
+
)
|
81
|
+
messages = convert_message_creates_to_messages(
|
82
|
+
message_creates=[system_message_create] + input_messages,
|
83
|
+
agent_id=self.agent_id,
|
84
|
+
timezone=agent_state.timezone,
|
85
|
+
)
|
86
|
+
|
87
|
+
request_data = llm_client.build_request_data(messages, agent_state.llm_config, tools=[])
|
88
|
+
response_data = await llm_client.request_async(request_data, agent_state.llm_config)
|
89
|
+
response = llm_client.convert_response_to_chat_completion(response_data, messages, agent_state.llm_config)
|
90
|
+
summary = response.choices[0].message.content.strip()
|
74
91
|
|
75
92
|
await self.block_manager.update_block_async(block_id=block.id, block_update=BlockUpdate(value=summary), actor=self.actor)
|
76
93
|
|
77
|
-
|
78
|
-
|
94
|
+
logger.debug("block:", block)
|
95
|
+
logger.debug("summary:", summary)
|
79
96
|
|
80
97
|
return [
|
81
98
|
Message(
|
@@ -84,22 +101,5 @@ class EphemeralSummaryAgent(BaseAgent):
|
|
84
101
|
)
|
85
102
|
]
|
86
103
|
|
87
|
-
def _build_openai_request(self, openai_messages: List[Dict]) -> ChatCompletionRequest:
|
88
|
-
current_dir = Path(__file__).parent
|
89
|
-
file_path = current_dir / "prompts" / "summary_system_prompt.txt"
|
90
|
-
with open(file_path, "r") as file:
|
91
|
-
system = file.read()
|
92
|
-
|
93
|
-
system_message = [{"role": "system", "content": system}]
|
94
|
-
|
95
|
-
openai_request = ChatCompletionRequest(
|
96
|
-
model="gpt-4o",
|
97
|
-
messages=system_message + openai_messages,
|
98
|
-
user=self.actor.id,
|
99
|
-
max_completion_tokens=4096,
|
100
|
-
temperature=0.7,
|
101
|
-
)
|
102
|
-
return openai_request
|
103
|
-
|
104
104
|
async def step_stream(self, input_messages: List[MessageCreate], max_steps: int = DEFAULT_MAX_STEPS) -> AsyncGenerator[str, None]:
|
105
105
|
raise NotImplementedError("EphemeralAgent does not support async step.")
|
letta/agents/letta_agent.py
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
import asyncio
|
2
2
|
import json
|
3
3
|
import uuid
|
4
|
+
from collections.abc import AsyncGenerator
|
4
5
|
from datetime import datetime
|
5
|
-
from typing import
|
6
|
+
from typing import Optional
|
6
7
|
|
7
8
|
from openai import AsyncStream
|
8
9
|
from openai.types.chat import ChatCompletionChunk
|
@@ -34,7 +35,7 @@ from letta.otel.context import get_ctx_attributes
|
|
34
35
|
from letta.otel.metric_registry import MetricRegistry
|
35
36
|
from letta.otel.tracing import log_event, trace_method, tracer
|
36
37
|
from letta.schemas.agent import AgentState, UpdateAgent
|
37
|
-
from letta.schemas.enums import MessageRole, ProviderType
|
38
|
+
from letta.schemas.enums import JobStatus, MessageRole, ProviderType
|
38
39
|
from letta.schemas.letta_message import MessageType
|
39
40
|
from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
|
40
41
|
from letta.schemas.letta_response import LettaResponse
|
@@ -58,14 +59,17 @@ from letta.services.summarizer.enums import SummarizationMode
|
|
58
59
|
from letta.services.summarizer.summarizer import Summarizer
|
59
60
|
from letta.services.telemetry_manager import NoopTelemetryManager, TelemetryManager
|
60
61
|
from letta.services.tool_executor.tool_execution_manager import ToolExecutionManager
|
61
|
-
from letta.settings import model_settings
|
62
|
+
from letta.settings import model_settings, settings, summarizer_settings
|
62
63
|
from letta.system import package_function_response
|
63
64
|
from letta.types import JsonDict
|
64
65
|
from letta.utils import log_telemetry, validate_function_response
|
65
66
|
|
67
|
+
logger = get_logger(__name__)
|
68
|
+
|
69
|
+
DEFAULT_SUMMARY_BLOCK_LABEL = "conversation_summary"
|
66
70
|
|
67
|
-
class LettaAgent(BaseAgent):
|
68
71
|
|
72
|
+
class LettaAgent(BaseAgent):
|
69
73
|
def __init__(
|
70
74
|
self,
|
71
75
|
agent_id: str,
|
@@ -77,11 +81,12 @@ class LettaAgent(BaseAgent):
|
|
77
81
|
actor: User,
|
78
82
|
step_manager: StepManager = NoopStepManager(),
|
79
83
|
telemetry_manager: TelemetryManager = NoopTelemetryManager(),
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
84
|
+
current_run_id: str | None = None,
|
85
|
+
summary_block_label: str = DEFAULT_SUMMARY_BLOCK_LABEL,
|
86
|
+
message_buffer_limit: int = summarizer_settings.message_buffer_limit,
|
87
|
+
message_buffer_min: int = summarizer_settings.message_buffer_min,
|
88
|
+
enable_summarization: bool = summarizer_settings.enable_summarization,
|
89
|
+
max_summarization_retries: int = summarizer_settings.max_summarization_retries,
|
85
90
|
):
|
86
91
|
super().__init__(agent_id=agent_id, openai_client=None, message_manager=message_manager, agent_manager=agent_manager, actor=actor)
|
87
92
|
|
@@ -92,7 +97,9 @@ class LettaAgent(BaseAgent):
|
|
92
97
|
self.passage_manager = passage_manager
|
93
98
|
self.step_manager = step_manager
|
94
99
|
self.telemetry_manager = telemetry_manager
|
95
|
-
self.
|
100
|
+
self.job_manager = job_manager
|
101
|
+
self.current_run_id = current_run_id
|
102
|
+
self.response_messages: list[Message] = []
|
96
103
|
|
97
104
|
self.last_function_response = None
|
98
105
|
|
@@ -117,23 +124,42 @@ class LettaAgent(BaseAgent):
|
|
117
124
|
)
|
118
125
|
|
119
126
|
self.summarizer = Summarizer(
|
120
|
-
mode=SummarizationMode.
|
127
|
+
mode=SummarizationMode(summarizer_settings.mode),
|
121
128
|
summarizer_agent=self.summarization_agent,
|
122
129
|
# TODO: Make this configurable
|
123
130
|
message_buffer_limit=message_buffer_limit,
|
124
131
|
message_buffer_min=message_buffer_min,
|
125
132
|
)
|
126
133
|
|
134
|
+
async def _check_run_cancellation(self) -> bool:
|
135
|
+
"""
|
136
|
+
Check if the current run associated with this agent execution has been cancelled.
|
137
|
+
|
138
|
+
Returns:
|
139
|
+
True if the run is cancelled, False otherwise (or if no run is associated)
|
140
|
+
"""
|
141
|
+
if not self.job_manager or not self.current_run_id:
|
142
|
+
return False
|
143
|
+
|
144
|
+
try:
|
145
|
+
job = await self.job_manager.get_job_by_id_async(job_id=self.current_run_id, actor=self.actor)
|
146
|
+
return job.status == JobStatus.cancelled
|
147
|
+
except Exception as e:
|
148
|
+
# Log the error but don't fail the execution
|
149
|
+
logger.warning(f"Failed to check job cancellation status for job {self.current_run_id}: {e}")
|
150
|
+
return False
|
151
|
+
|
127
152
|
@trace_method
|
128
153
|
async def step(
|
129
154
|
self,
|
130
|
-
input_messages:
|
155
|
+
input_messages: list[MessageCreate],
|
131
156
|
max_steps: int = DEFAULT_MAX_STEPS,
|
132
|
-
run_id:
|
157
|
+
run_id: str | None = None,
|
133
158
|
use_assistant_message: bool = True,
|
134
|
-
request_start_timestamp_ns:
|
135
|
-
include_return_message_types:
|
159
|
+
request_start_timestamp_ns: int | None = None,
|
160
|
+
include_return_message_types: list[MessageType] | None = None,
|
136
161
|
) -> LettaResponse:
|
162
|
+
# TODO (cliandy): pass in run_id and use at send_message endpoints for all step functions
|
137
163
|
agent_state = await self.agent_manager.get_agent_by_id_async(
|
138
164
|
agent_id=self.agent_id, include_relationships=["tools", "memory", "tool_exec_environment_variables"], actor=self.actor
|
139
165
|
)
|
@@ -155,11 +181,11 @@ class LettaAgent(BaseAgent):
|
|
155
181
|
@trace_method
|
156
182
|
async def step_stream_no_tokens(
|
157
183
|
self,
|
158
|
-
input_messages:
|
184
|
+
input_messages: list[MessageCreate],
|
159
185
|
max_steps: int = DEFAULT_MAX_STEPS,
|
160
186
|
use_assistant_message: bool = True,
|
161
|
-
request_start_timestamp_ns:
|
162
|
-
include_return_message_types:
|
187
|
+
request_start_timestamp_ns: int | None = None,
|
188
|
+
include_return_message_types: list[MessageType] | None = None,
|
163
189
|
):
|
164
190
|
agent_state = await self.agent_manager.get_agent_by_id_async(
|
165
191
|
agent_id=self.agent_id, include_relationships=["tools", "memory", "tool_exec_environment_variables"], actor=self.actor
|
@@ -182,6 +208,13 @@ class LettaAgent(BaseAgent):
|
|
182
208
|
request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
|
183
209
|
|
184
210
|
for i in range(max_steps):
|
211
|
+
# Check for job cancellation at the start of each step
|
212
|
+
if await self._check_run_cancellation():
|
213
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
|
214
|
+
logger.info(f"Agent execution cancelled for run {self.current_run_id}")
|
215
|
+
yield f"data: {stop_reason.model_dump_json()}\n\n"
|
216
|
+
break
|
217
|
+
|
185
218
|
step_id = generate_step_id()
|
186
219
|
step_start = get_utc_timestamp_ns()
|
187
220
|
agent_step_span = tracer.start_span("agent_step", start_time=step_start)
|
@@ -313,11 +346,11 @@ class LettaAgent(BaseAgent):
|
|
313
346
|
async def _step(
|
314
347
|
self,
|
315
348
|
agent_state: AgentState,
|
316
|
-
input_messages:
|
349
|
+
input_messages: list[MessageCreate],
|
317
350
|
max_steps: int = DEFAULT_MAX_STEPS,
|
318
|
-
run_id:
|
319
|
-
request_start_timestamp_ns:
|
320
|
-
) ->
|
351
|
+
run_id: str | None = None,
|
352
|
+
request_start_timestamp_ns: int | None = None,
|
353
|
+
) -> tuple[list[Message], list[Message], LettaStopReason | None, LettaUsageStatistics]:
|
321
354
|
"""
|
322
355
|
Carries out an invocation of the agent loop. In each step, the agent
|
323
356
|
1. Rebuilds its memory
|
@@ -343,6 +376,12 @@ class LettaAgent(BaseAgent):
|
|
343
376
|
stop_reason = None
|
344
377
|
usage = LettaUsageStatistics()
|
345
378
|
for i in range(max_steps):
|
379
|
+
# Check for job cancellation at the start of each step
|
380
|
+
if await self._check_run_cancellation():
|
381
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
|
382
|
+
logger.info(f"Agent execution cancelled for run {self.current_run_id}")
|
383
|
+
break
|
384
|
+
|
346
385
|
step_id = generate_step_id()
|
347
386
|
step_start = get_utc_timestamp_ns()
|
348
387
|
agent_step_span = tracer.start_span("agent_step", start_time=step_start)
|
@@ -425,7 +464,7 @@ class LettaAgent(BaseAgent):
|
|
425
464
|
),
|
426
465
|
)
|
427
466
|
|
428
|
-
MetricRegistry().step_execution_time_ms_histogram.record(
|
467
|
+
MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
|
429
468
|
|
430
469
|
if not should_continue:
|
431
470
|
break
|
@@ -455,6 +494,8 @@ class LettaAgent(BaseAgent):
|
|
455
494
|
return current_in_context_messages, new_in_context_messages, stop_reason, usage
|
456
495
|
|
457
496
|
async def _update_agent_last_run_metrics(self, completion_time: datetime, duration_ms: float) -> None:
|
497
|
+
if not settings.track_last_agent_run:
|
498
|
+
return
|
458
499
|
try:
|
459
500
|
await self.agent_manager.update_agent_async(
|
460
501
|
agent_id=self.agent_id,
|
@@ -467,11 +508,11 @@ class LettaAgent(BaseAgent):
|
|
467
508
|
@trace_method
|
468
509
|
async def step_stream(
|
469
510
|
self,
|
470
|
-
input_messages:
|
511
|
+
input_messages: list[MessageCreate],
|
471
512
|
max_steps: int = DEFAULT_MAX_STEPS,
|
472
513
|
use_assistant_message: bool = True,
|
473
|
-
request_start_timestamp_ns:
|
474
|
-
include_return_message_types:
|
514
|
+
request_start_timestamp_ns: int | None = None,
|
515
|
+
include_return_message_types: list[MessageType] | None = None,
|
475
516
|
) -> AsyncGenerator[str, None]:
|
476
517
|
"""
|
477
518
|
Carries out an invocation of the agent loop in a streaming fashion that yields partial tokens.
|
@@ -503,6 +544,13 @@ class LettaAgent(BaseAgent):
|
|
503
544
|
request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
|
504
545
|
|
505
546
|
for i in range(max_steps):
|
547
|
+
# Check for job cancellation at the start of each step
|
548
|
+
if await self._check_run_cancellation():
|
549
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
|
550
|
+
logger.info(f"Agent execution cancelled for run {self.current_run_id}")
|
551
|
+
yield f"data: {stop_reason.model_dump_json()}\n\n"
|
552
|
+
break
|
553
|
+
|
506
554
|
step_id = generate_step_id()
|
507
555
|
step_start = get_utc_timestamp_ns()
|
508
556
|
agent_step_span = tracer.start_span("agent_step", start_time=step_start)
|
@@ -543,7 +591,9 @@ class LettaAgent(BaseAgent):
|
|
543
591
|
raise ValueError(f"Streaming not supported for {agent_state.llm_config}")
|
544
592
|
|
545
593
|
async for chunk in interface.process(
|
546
|
-
stream,
|
594
|
+
stream,
|
595
|
+
ttft_span=request_span,
|
596
|
+
provider_request_start_timestamp_ns=provider_request_start_timestamp_ns,
|
547
597
|
):
|
548
598
|
# Measure time to first token
|
549
599
|
if first_chunk and request_span is not None:
|
@@ -653,7 +703,7 @@ class LettaAgent(BaseAgent):
|
|
653
703
|
yield f"data: {tool_return.model_dump_json()}\n\n"
|
654
704
|
|
655
705
|
# TODO (cliandy): consolidate and expand with trace
|
656
|
-
MetricRegistry().step_execution_time_ms_histogram.record(
|
706
|
+
MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
|
657
707
|
|
658
708
|
if not should_continue:
|
659
709
|
break
|
@@ -686,13 +736,13 @@ class LettaAgent(BaseAgent):
|
|
686
736
|
# noinspection PyInconsistentReturns
|
687
737
|
async def _build_and_request_from_llm(
|
688
738
|
self,
|
689
|
-
current_in_context_messages:
|
690
|
-
new_in_context_messages:
|
739
|
+
current_in_context_messages: list[Message],
|
740
|
+
new_in_context_messages: list[Message],
|
691
741
|
agent_state: AgentState,
|
692
742
|
llm_client: LLMClientBase,
|
693
743
|
tool_rules_solver: ToolRulesSolver,
|
694
744
|
agent_step_span: "Span",
|
695
|
-
) ->
|
745
|
+
) -> tuple[dict, dict, list[Message], list[Message], list[str]] | None:
|
696
746
|
for attempt in range(self.max_summarization_retries + 1):
|
697
747
|
try:
|
698
748
|
log_event("agent.stream_no_tokens.messages.refreshed")
|
@@ -738,12 +788,12 @@ class LettaAgent(BaseAgent):
|
|
738
788
|
first_chunk: bool,
|
739
789
|
ttft_span: "Span",
|
740
790
|
request_start_timestamp_ns: int,
|
741
|
-
current_in_context_messages:
|
742
|
-
new_in_context_messages:
|
791
|
+
current_in_context_messages: list[Message],
|
792
|
+
new_in_context_messages: list[Message],
|
743
793
|
agent_state: AgentState,
|
744
794
|
llm_client: LLMClientBase,
|
745
795
|
tool_rules_solver: ToolRulesSolver,
|
746
|
-
) ->
|
796
|
+
) -> tuple[dict, AsyncStream[ChatCompletionChunk], list[Message], list[Message], list[str], int] | None:
|
747
797
|
for attempt in range(self.max_summarization_retries + 1):
|
748
798
|
try:
|
749
799
|
log_event("agent.stream_no_tokens.messages.refreshed")
|
@@ -795,11 +845,11 @@ class LettaAgent(BaseAgent):
|
|
795
845
|
self,
|
796
846
|
e: Exception,
|
797
847
|
llm_client: LLMClientBase,
|
798
|
-
in_context_messages:
|
799
|
-
new_letta_messages:
|
848
|
+
in_context_messages: list[Message],
|
849
|
+
new_letta_messages: list[Message],
|
800
850
|
llm_config: LLMConfig,
|
801
851
|
force: bool,
|
802
|
-
) ->
|
852
|
+
) -> list[Message]:
|
803
853
|
if isinstance(e, ContextWindowExceededError):
|
804
854
|
return await self._rebuild_context_window(
|
805
855
|
in_context_messages=in_context_messages, new_letta_messages=new_letta_messages, llm_config=llm_config, force=force
|
@@ -810,12 +860,12 @@ class LettaAgent(BaseAgent):
|
|
810
860
|
@trace_method
|
811
861
|
async def _rebuild_context_window(
|
812
862
|
self,
|
813
|
-
in_context_messages:
|
814
|
-
new_letta_messages:
|
863
|
+
in_context_messages: list[Message],
|
864
|
+
new_letta_messages: list[Message],
|
815
865
|
llm_config: LLMConfig,
|
816
|
-
total_tokens:
|
866
|
+
total_tokens: int | None = None,
|
817
867
|
force: bool = False,
|
818
|
-
) ->
|
868
|
+
) -> list[Message]:
|
819
869
|
# If total tokens is reached, we truncate down
|
820
870
|
# TODO: This can be broken by bad configs, e.g. lower bound too high, initial messages too fat, etc.
|
821
871
|
if force or (total_tokens and total_tokens > llm_config.context_window):
|
@@ -851,10 +901,10 @@ class LettaAgent(BaseAgent):
|
|
851
901
|
async def _create_llm_request_data_async(
|
852
902
|
self,
|
853
903
|
llm_client: LLMClientBase,
|
854
|
-
in_context_messages:
|
904
|
+
in_context_messages: list[Message],
|
855
905
|
agent_state: AgentState,
|
856
906
|
tool_rules_solver: ToolRulesSolver,
|
857
|
-
) ->
|
907
|
+
) -> tuple[dict, list[str]]:
|
858
908
|
self.num_messages, self.num_archival_memories = await asyncio.gather(
|
859
909
|
(
|
860
910
|
self.message_manager.size_async(actor=self.actor, agent_id=agent_state.id)
|
@@ -925,18 +975,18 @@ class LettaAgent(BaseAgent):
|
|
925
975
|
async def _handle_ai_response(
|
926
976
|
self,
|
927
977
|
tool_call: ToolCall,
|
928
|
-
valid_tool_names:
|
978
|
+
valid_tool_names: list[str],
|
929
979
|
agent_state: AgentState,
|
930
980
|
tool_rules_solver: ToolRulesSolver,
|
931
981
|
usage: UsageStatistics,
|
932
|
-
reasoning_content:
|
933
|
-
pre_computed_assistant_message_id:
|
982
|
+
reasoning_content: list[TextContent | ReasoningContent | RedactedReasoningContent | OmittedReasoningContent] | None = None,
|
983
|
+
pre_computed_assistant_message_id: str | None = None,
|
934
984
|
step_id: str | None = None,
|
935
|
-
initial_messages:
|
985
|
+
initial_messages: list[Message] | None = None,
|
936
986
|
agent_step_span: Optional["Span"] = None,
|
937
|
-
is_final_step:
|
938
|
-
run_id:
|
939
|
-
) ->
|
987
|
+
is_final_step: bool | None = None,
|
988
|
+
run_id: str | None = None,
|
989
|
+
) -> tuple[list[Message], bool, LettaStopReason | None]:
|
940
990
|
"""
|
941
991
|
Handle the final AI response once streaming completes, execute / validate the
|
942
992
|
tool call, decide whether we should keep stepping, and persist state.
|
@@ -1012,8 +1062,9 @@ class LettaAgent(BaseAgent):
|
|
1012
1062
|
context_window_limit=agent_state.llm_config.context_window,
|
1013
1063
|
usage=usage,
|
1014
1064
|
provider_id=None,
|
1015
|
-
job_id=run_id,
|
1065
|
+
job_id=run_id if run_id else self.current_run_id,
|
1016
1066
|
step_id=step_id,
|
1067
|
+
project_id=agent_state.project_id,
|
1017
1068
|
)
|
1018
1069
|
|
1019
1070
|
tool_call_messages = create_letta_messages_from_llm_response(
|
@@ -1150,7 +1201,7 @@ class LettaAgent(BaseAgent):
|
|
1150
1201
|
name="tool_execution_completed",
|
1151
1202
|
attributes={
|
1152
1203
|
"tool_name": target_tool.name,
|
1153
|
-
"duration_ms": ns_to_ms(
|
1204
|
+
"duration_ms": ns_to_ms(end_time - start_time),
|
1154
1205
|
"success": tool_execution_result.success_flag,
|
1155
1206
|
"tool_type": target_tool.tool_type,
|
1156
1207
|
"tool_id": target_tool.id,
|
@@ -1160,7 +1211,7 @@ class LettaAgent(BaseAgent):
|
|
1160
1211
|
return tool_execution_result
|
1161
1212
|
|
1162
1213
|
@trace_method
|
1163
|
-
def _load_last_function_response(self, in_context_messages:
|
1214
|
+
def _load_last_function_response(self, in_context_messages: list[Message]):
|
1164
1215
|
"""Load the last function response from message history"""
|
1165
1216
|
for msg in reversed(in_context_messages):
|
1166
1217
|
if msg.role == MessageRole.tool and msg.content and len(msg.content) == 1 and isinstance(msg.content[0], TextContent):
|
letta/agents/voice_agent.py
CHANGED
@@ -153,6 +153,7 @@ class VoiceAgent(BaseAgent):
|
|
153
153
|
timezone=agent_state.timezone,
|
154
154
|
previous_message_count=self.num_messages,
|
155
155
|
archival_memory_size=self.num_archival_memories,
|
156
|
+
sources=agent_state.sources,
|
156
157
|
)
|
157
158
|
letta_message_db_queue = create_input_messages(
|
158
159
|
input_messages=input_messages, agent_id=agent_state.id, timezone=agent_state.timezone, actor=self.actor
|
@@ -366,7 +367,7 @@ class VoiceAgent(BaseAgent):
|
|
366
367
|
"description": (
|
367
368
|
"Look in long-term or earlier-conversation memory **only when** the "
|
368
369
|
"user asks about something missing from the visible context. "
|
369
|
-
"The user
|
370
|
+
"The user's latest utterance is sent automatically as the main query.\n\n"
|
370
371
|
"Optional refinements (set unused fields to *null*):\n"
|
371
372
|
"• `convo_keyword_queries` – extra names/IDs if the request is vague.\n"
|
372
373
|
"• `start_minutes_ago` / `end_minutes_ago` – limit results to a recent time window."
|
letta/constants.py
CHANGED
@@ -83,7 +83,7 @@ SEND_MESSAGE_TOOL_NAME = "send_message"
|
|
83
83
|
# Base tools that cannot be edited, as they access agent state directly
|
84
84
|
# Note that we don't include "conversation_search_date" for now
|
85
85
|
BASE_TOOLS = [SEND_MESSAGE_TOOL_NAME, "conversation_search", "archival_memory_insert", "archival_memory_search"]
|
86
|
-
|
86
|
+
DEPRECATED_LETTA_TOOLS = ["archival_memory_insert", "archival_memory_search"]
|
87
87
|
# Base memory tools CAN be edited, and are added by default by the server
|
88
88
|
BASE_MEMORY_TOOLS = ["core_memory_append", "core_memory_replace"]
|
89
89
|
# New v2 collection of the base memory tools (effecitvely same as sleeptime set), to pair with memgpt_v2 prompt
|
@@ -115,7 +115,8 @@ BASE_VOICE_SLEEPTIME_TOOLS = [
|
|
115
115
|
"finish_rethinking_memory",
|
116
116
|
]
|
117
117
|
# Multi agent tools
|
118
|
-
MULTI_AGENT_TOOLS = ["send_message_to_agent_and_wait_for_reply", "send_message_to_agents_matching_tags"]
|
118
|
+
MULTI_AGENT_TOOLS = ["send_message_to_agent_and_wait_for_reply", "send_message_to_agents_matching_tags", "send_message_to_agent_async"]
|
119
|
+
LOCAL_ONLY_MULTI_AGENT_TOOLS = ["send_message_to_agent_async"]
|
119
120
|
|
120
121
|
# Used to catch if line numbers are pushed in
|
121
122
|
# MEMORY_TOOLS_LINE_NUMBER_PREFIX_REGEX = re.compile(r"^Line \d+: ", re.MULTILINE)
|
@@ -130,7 +131,7 @@ MEMORY_TOOLS_LINE_NUMBER_PREFIX_REGEX = re.compile(
|
|
130
131
|
BUILTIN_TOOLS = ["run_code", "web_search"]
|
131
132
|
|
132
133
|
# Built in tools
|
133
|
-
FILES_TOOLS = ["open_files", "grep_files", "
|
134
|
+
FILES_TOOLS = ["open_files", "grep_files", "semantic_search_files"]
|
134
135
|
|
135
136
|
FILE_MEMORY_EXISTS_MESSAGE = "The following files are currently accessible in memory:"
|
136
137
|
FILE_MEMORY_EMPTY_MESSAGE = (
|
@@ -325,7 +326,7 @@ MAX_ERROR_MESSAGE_CHAR_LIMIT = 500
|
|
325
326
|
CORE_MEMORY_PERSONA_CHAR_LIMIT: int = 5000
|
326
327
|
CORE_MEMORY_HUMAN_CHAR_LIMIT: int = 5000
|
327
328
|
CORE_MEMORY_BLOCK_CHAR_LIMIT: int = 5000
|
328
|
-
CORE_MEMORY_SOURCE_CHAR_LIMIT: int =
|
329
|
+
CORE_MEMORY_SOURCE_CHAR_LIMIT: int = 50000
|
329
330
|
# Function return limits
|
330
331
|
FUNCTION_RETURN_CHAR_LIMIT = 6000 # ~300 words
|
331
332
|
BASE_FUNCTION_RETURN_CHAR_LIMIT = 1000000 # very high (we rely on implementation)
|
@@ -357,6 +358,9 @@ REDIS_INCLUDE = "include"
|
|
357
358
|
REDIS_EXCLUDE = "exclude"
|
358
359
|
REDIS_SET_DEFAULT_VAL = "None"
|
359
360
|
REDIS_DEFAULT_CACHE_PREFIX = "letta_cache"
|
361
|
+
REDIS_RUN_ID_PREFIX = "agent:send_message:run_id"
|
360
362
|
|
361
363
|
# TODO: This is temporary, eventually use token-based eviction
|
362
364
|
MAX_FILES_OPEN = 5
|
365
|
+
|
366
|
+
GET_PROVIDERS_TIMEOUT_SECONDS = 10
|
@@ -10,15 +10,20 @@ if TYPE_CHECKING:
|
|
10
10
|
async def open_files(agent_state: "AgentState", file_requests: List[FileOpenRequest], close_all_others: bool = False) -> str:
|
11
11
|
"""Open one or more files and load their contents into files section in core memory. Maximum of 5 files can be opened simultaneously.
|
12
12
|
|
13
|
+
Use this when you want to:
|
14
|
+
- Inspect or reference file contents during reasoning
|
15
|
+
- View specific portions of large files (e.g. functions or definitions)
|
16
|
+
- Replace currently open files with a new set for focused context (via `close_all_others=True`)
|
17
|
+
|
13
18
|
Examples:
|
14
|
-
Open single file (entire content):
|
15
|
-
file_requests = [FileOpenRequest(file_name="config.py")]
|
19
|
+
Open single file belonging to a directory named `project_utils` (entire content):
|
20
|
+
file_requests = [FileOpenRequest(file_name="project_utils/config.py")]
|
16
21
|
|
17
22
|
Open multiple files with different view ranges:
|
18
23
|
file_requests = [
|
19
|
-
FileOpenRequest(file_name="config.py", offset=1, length=50), # Lines 1-50
|
20
|
-
FileOpenRequest(file_name="main.py", offset=100, length=100), # Lines 100-199
|
21
|
-
FileOpenRequest(file_name="utils.py") # Entire file
|
24
|
+
FileOpenRequest(file_name="project_utils/config.py", offset=1, length=50), # Lines 1-50
|
25
|
+
FileOpenRequest(file_name="project_utils/main.py", offset=100, length=100), # Lines 100-199
|
26
|
+
FileOpenRequest(file_name="project_utils/utils.py") # Entire file
|
22
27
|
]
|
23
28
|
|
24
29
|
Close all other files and open new ones:
|
@@ -43,6 +48,11 @@ async def grep_files(
|
|
43
48
|
"""
|
44
49
|
Grep tool to search files across data sources using a keyword or regex pattern.
|
45
50
|
|
51
|
+
Use this when you want to:
|
52
|
+
- Quickly find occurrences of a variable, function, or keyword
|
53
|
+
- Locate log messages, error codes, or TODOs across files
|
54
|
+
- Understand surrounding code by including `context_lines`
|
55
|
+
|
46
56
|
Args:
|
47
57
|
pattern (str): Keyword or regex pattern to search within file contents.
|
48
58
|
include (Optional[str]): Optional keyword or regex pattern to filter filenames to include in the search.
|
@@ -55,9 +65,14 @@ async def grep_files(
|
|
55
65
|
raise NotImplementedError("Tool not implemented. Please contact the Letta team.")
|
56
66
|
|
57
67
|
|
58
|
-
async def
|
68
|
+
async def semantic_search_files(agent_state: "AgentState", query: str) -> List["FileMetadata"]:
|
59
69
|
"""
|
60
|
-
Get list of most relevant
|
70
|
+
Get list of most relevant chunks from any file using vector/embedding search.
|
71
|
+
|
72
|
+
Use this when you want to:
|
73
|
+
- Find related content that without using exact keywords (e.g., conceptually similar sections)
|
74
|
+
- Look up high-level descriptions, documentation, or config patterns
|
75
|
+
- Perform fuzzy search when grep isn't sufficient
|
61
76
|
|
62
77
|
Args:
|
63
78
|
query (str): The search query.
|