letta-nightly 0.8.3.dev20250612104349__py3-none-any.whl → 0.8.4.dev20250613104250__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +11 -1
- letta/agents/base_agent.py +11 -4
- letta/agents/ephemeral_summary_agent.py +3 -2
- letta/agents/letta_agent.py +109 -78
- letta/agents/letta_agent_batch.py +4 -3
- letta/agents/voice_agent.py +3 -3
- letta/agents/voice_sleeptime_agent.py +3 -2
- letta/client/client.py +6 -3
- letta/constants.py +6 -0
- letta/data_sources/connectors.py +3 -5
- letta/functions/async_composio_toolset.py +4 -1
- letta/functions/function_sets/files.py +4 -3
- letta/functions/schema_generator.py +5 -2
- letta/groups/sleeptime_multi_agent_v2.py +4 -3
- letta/helpers/converters.py +7 -1
- letta/helpers/message_helper.py +31 -11
- letta/helpers/tool_rule_solver.py +69 -4
- letta/interfaces/anthropic_streaming_interface.py +8 -1
- letta/interfaces/openai_streaming_interface.py +4 -1
- letta/llm_api/anthropic_client.py +4 -4
- letta/llm_api/openai_client.py +56 -11
- letta/local_llm/utils.py +3 -20
- letta/orm/sqlalchemy_base.py +7 -1
- letta/otel/metric_registry.py +26 -0
- letta/otel/metrics.py +78 -14
- letta/schemas/letta_message_content.py +64 -3
- letta/schemas/letta_request.py +5 -1
- letta/schemas/message.py +61 -14
- letta/schemas/openai/chat_completion_request.py +1 -1
- letta/schemas/providers.py +41 -14
- letta/schemas/tool_rule.py +67 -0
- letta/schemas/user.py +2 -2
- letta/server/rest_api/routers/v1/agents.py +22 -12
- letta/server/rest_api/routers/v1/sources.py +13 -25
- letta/server/server.py +10 -5
- letta/services/agent_manager.py +5 -1
- letta/services/file_manager.py +219 -0
- letta/services/file_processor/chunker/line_chunker.py +119 -14
- letta/services/file_processor/file_processor.py +8 -8
- letta/services/file_processor/file_types.py +303 -0
- letta/services/file_processor/parser/mistral_parser.py +2 -11
- letta/services/helpers/agent_manager_helper.py +6 -0
- letta/services/message_manager.py +32 -0
- letta/services/organization_manager.py +4 -6
- letta/services/passage_manager.py +1 -0
- letta/services/source_manager.py +0 -208
- letta/services/tool_executor/composio_tool_executor.py +5 -1
- letta/services/tool_executor/files_tool_executor.py +291 -15
- letta/services/user_manager.py +8 -8
- letta/system.py +3 -1
- letta/utils.py +7 -13
- {letta_nightly-0.8.3.dev20250612104349.dist-info → letta_nightly-0.8.4.dev20250613104250.dist-info}/METADATA +2 -2
- {letta_nightly-0.8.3.dev20250612104349.dist-info → letta_nightly-0.8.4.dev20250613104250.dist-info}/RECORD +57 -55
- {letta_nightly-0.8.3.dev20250612104349.dist-info → letta_nightly-0.8.4.dev20250613104250.dist-info}/LICENSE +0 -0
- {letta_nightly-0.8.3.dev20250612104349.dist-info → letta_nightly-0.8.4.dev20250613104250.dist-info}/WHEEL +0 -0
- {letta_nightly-0.8.3.dev20250612104349.dist-info → letta_nightly-0.8.4.dev20250613104250.dist-info}/entry_points.txt +0 -0
letta/__init__.py
CHANGED
letta/agent.py
CHANGED
@@ -46,7 +46,7 @@ from letta.schemas.agent import AgentState, AgentStepResponse, UpdateAgent, get_
|
|
46
46
|
from letta.schemas.block import BlockUpdate
|
47
47
|
from letta.schemas.embedding_config import EmbeddingConfig
|
48
48
|
from letta.schemas.enums import MessageRole, ProviderType
|
49
|
-
from letta.schemas.letta_message_content import TextContent
|
49
|
+
from letta.schemas.letta_message_content import ImageContent, TextContent
|
50
50
|
from letta.schemas.memory import ContextWindowOverview, Memory
|
51
51
|
from letta.schemas.message import Message, MessageCreate, ToolReturn
|
52
52
|
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
@@ -369,6 +369,16 @@ class Agent(BaseAgent):
|
|
369
369
|
)
|
370
370
|
else:
|
371
371
|
# Fallback to existing flow
|
372
|
+
for message in message_sequence:
|
373
|
+
if isinstance(message.content, list):
|
374
|
+
|
375
|
+
def get_fallback_text_content(content):
|
376
|
+
if isinstance(content, ImageContent):
|
377
|
+
return TextContent(text="[Image Here]")
|
378
|
+
return content
|
379
|
+
|
380
|
+
message.content = [get_fallback_text_content(content) for content in message.content]
|
381
|
+
|
372
382
|
response = create(
|
373
383
|
llm_config=self.agent_state.llm_config,
|
374
384
|
messages=message_sequence,
|
letta/agents/base_agent.py
CHANGED
@@ -3,6 +3,8 @@ from typing import Any, AsyncGenerator, List, Optional, Union
|
|
3
3
|
|
4
4
|
import openai
|
5
5
|
|
6
|
+
from letta.constants import DEFAULT_MAX_STEPS
|
7
|
+
from letta.helpers import ToolRulesSolver
|
6
8
|
from letta.helpers.datetime_helpers import get_utc_time
|
7
9
|
from letta.log import get_logger
|
8
10
|
from letta.schemas.agent import AgentState
|
@@ -15,6 +17,7 @@ from letta.schemas.user import User
|
|
15
17
|
from letta.services.agent_manager import AgentManager
|
16
18
|
from letta.services.helpers.agent_manager_helper import compile_system_message
|
17
19
|
from letta.services.message_manager import MessageManager
|
20
|
+
from letta.services.passage_manager import PassageManager
|
18
21
|
from letta.utils import united_diff
|
19
22
|
|
20
23
|
logger = get_logger(__name__)
|
@@ -39,11 +42,13 @@ class BaseAgent(ABC):
|
|
39
42
|
self.openai_client = openai_client
|
40
43
|
self.message_manager = message_manager
|
41
44
|
self.agent_manager = agent_manager
|
45
|
+
# TODO: Pass this in
|
46
|
+
self.passage_manager = PassageManager()
|
42
47
|
self.actor = actor
|
43
48
|
self.logger = get_logger(agent_id)
|
44
49
|
|
45
50
|
@abstractmethod
|
46
|
-
async def step(self, input_messages: List[MessageCreate], max_steps: int =
|
51
|
+
async def step(self, input_messages: List[MessageCreate], max_steps: int = DEFAULT_MAX_STEPS) -> LettaResponse:
|
47
52
|
"""
|
48
53
|
Main execution loop for the agent.
|
49
54
|
"""
|
@@ -51,7 +56,7 @@ class BaseAgent(ABC):
|
|
51
56
|
|
52
57
|
@abstractmethod
|
53
58
|
async def step_stream(
|
54
|
-
self, input_messages: List[MessageCreate], max_steps: int =
|
59
|
+
self, input_messages: List[MessageCreate], max_steps: int = DEFAULT_MAX_STEPS
|
55
60
|
) -> AsyncGenerator[Union[LettaMessage, LegacyLettaMessage, MessageStreamStatus], None]:
|
56
61
|
"""
|
57
62
|
Main streaming execution loop for the agent.
|
@@ -77,8 +82,9 @@ class BaseAgent(ABC):
|
|
77
82
|
self,
|
78
83
|
in_context_messages: List[Message],
|
79
84
|
agent_state: AgentState,
|
80
|
-
|
81
|
-
|
85
|
+
tool_rules_solver: Optional[ToolRulesSolver] = None,
|
86
|
+
num_messages: Optional[int] = None, # storing these calculations is specific to the voice agent
|
87
|
+
num_archival_memories: Optional[int] = None,
|
82
88
|
) -> List[Message]:
|
83
89
|
"""
|
84
90
|
Async version of function above. For now before breaking up components, changes should be made in both places.
|
@@ -112,6 +118,7 @@ class BaseAgent(ABC):
|
|
112
118
|
in_context_memory_last_edit=memory_edit_timestamp,
|
113
119
|
previous_message_count=num_messages,
|
114
120
|
archival_memory_size=num_archival_memories,
|
121
|
+
tool_rules_solver=tool_rules_solver,
|
115
122
|
)
|
116
123
|
|
117
124
|
diff = united_diff(curr_system_message_text, new_system_message_str)
|
@@ -4,6 +4,7 @@ from typing import AsyncGenerator, Dict, List
|
|
4
4
|
from openai import AsyncOpenAI
|
5
5
|
|
6
6
|
from letta.agents.base_agent import BaseAgent
|
7
|
+
from letta.constants import DEFAULT_MAX_STEPS
|
7
8
|
from letta.orm.errors import NoResultFound
|
8
9
|
from letta.schemas.block import Block, BlockUpdate
|
9
10
|
from letta.schemas.enums import MessageRole
|
@@ -42,7 +43,7 @@ class EphemeralSummaryAgent(BaseAgent):
|
|
42
43
|
self.target_block_label = target_block_label
|
43
44
|
self.block_manager = block_manager
|
44
45
|
|
45
|
-
async def step(self, input_messages: List[MessageCreate], max_steps: int =
|
46
|
+
async def step(self, input_messages: List[MessageCreate], max_steps: int = DEFAULT_MAX_STEPS) -> List[Message]:
|
46
47
|
if len(input_messages) > 1:
|
47
48
|
raise ValueError("Can only invoke EphemeralSummaryAgent with a single summarization message.")
|
48
49
|
|
@@ -100,5 +101,5 @@ class EphemeralSummaryAgent(BaseAgent):
|
|
100
101
|
)
|
101
102
|
return openai_request
|
102
103
|
|
103
|
-
async def step_stream(self, input_messages: List[MessageCreate], max_steps: int =
|
104
|
+
async def step_stream(self, input_messages: List[MessageCreate], max_steps: int = DEFAULT_MAX_STEPS) -> AsyncGenerator[str, None]:
|
104
105
|
raise NotImplementedError("EphemeralAgent does not support async step.")
|
letta/agents/letta_agent.py
CHANGED
@@ -8,12 +8,8 @@ from openai.types.chat import ChatCompletionChunk
|
|
8
8
|
|
9
9
|
from letta.agents.base_agent import BaseAgent
|
10
10
|
from letta.agents.ephemeral_summary_agent import EphemeralSummaryAgent
|
11
|
-
from letta.agents.helpers import
|
12
|
-
|
13
|
-
_prepare_in_context_messages_async,
|
14
|
-
_prepare_in_context_messages_no_persist_async,
|
15
|
-
generate_step_id,
|
16
|
-
)
|
11
|
+
from letta.agents.helpers import _create_letta_response, _prepare_in_context_messages_no_persist_async, generate_step_id
|
12
|
+
from letta.constants import DEFAULT_MAX_STEPS
|
17
13
|
from letta.errors import ContextWindowExceededError
|
18
14
|
from letta.helpers import ToolRulesSolver
|
19
15
|
from letta.helpers.datetime_helpers import AsyncTimer, get_utc_timestamp_ns, ns_to_ms
|
@@ -37,6 +33,7 @@ from letta.schemas.llm_config import LLMConfig
|
|
37
33
|
from letta.schemas.message import Message, MessageCreate
|
38
34
|
from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
|
39
35
|
from letta.schemas.provider_trace import ProviderTraceCreate
|
36
|
+
from letta.schemas.tool_execution_result import ToolExecutionResult
|
40
37
|
from letta.schemas.usage import LettaUsageStatistics
|
41
38
|
from letta.schemas.user import User
|
42
39
|
from letta.server.rest_api.utils import create_letta_messages_from_llm_response
|
@@ -119,7 +116,7 @@ class LettaAgent(BaseAgent):
|
|
119
116
|
async def step(
|
120
117
|
self,
|
121
118
|
input_messages: List[MessageCreate],
|
122
|
-
max_steps: int =
|
119
|
+
max_steps: int = DEFAULT_MAX_STEPS,
|
123
120
|
use_assistant_message: bool = True,
|
124
121
|
request_start_timestamp_ns: Optional[int] = None,
|
125
122
|
include_return_message_types: Optional[List[MessageType]] = None,
|
@@ -144,7 +141,7 @@ class LettaAgent(BaseAgent):
|
|
144
141
|
async def step_stream_no_tokens(
|
145
142
|
self,
|
146
143
|
input_messages: List[MessageCreate],
|
147
|
-
max_steps: int =
|
144
|
+
max_steps: int = DEFAULT_MAX_STEPS,
|
148
145
|
use_assistant_message: bool = True,
|
149
146
|
request_start_timestamp_ns: Optional[int] = None,
|
150
147
|
include_return_message_types: Optional[List[MessageType]] = None,
|
@@ -152,9 +149,10 @@ class LettaAgent(BaseAgent):
|
|
152
149
|
agent_state = await self.agent_manager.get_agent_by_id_async(
|
153
150
|
agent_id=self.agent_id, include_relationships=["tools", "memory", "tool_exec_environment_variables"], actor=self.actor
|
154
151
|
)
|
155
|
-
current_in_context_messages, new_in_context_messages = await
|
152
|
+
current_in_context_messages, new_in_context_messages = await _prepare_in_context_messages_no_persist_async(
|
156
153
|
input_messages, agent_state, self.message_manager, self.actor
|
157
154
|
)
|
155
|
+
initial_messages = new_in_context_messages
|
158
156
|
tool_rules_solver = ToolRulesSolver(agent_state.tool_rules)
|
159
157
|
llm_client = LLMClient.create(
|
160
158
|
provider_type=agent_state.llm_config.model_endpoint_type,
|
@@ -167,18 +165,20 @@ class LettaAgent(BaseAgent):
|
|
167
165
|
request_span = tracer.start_span("time_to_first_token", start_time=request_start_timestamp_ns)
|
168
166
|
request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
|
169
167
|
|
170
|
-
for
|
168
|
+
for i in range(max_steps):
|
171
169
|
step_id = generate_step_id()
|
172
170
|
step_start = get_utc_timestamp_ns()
|
173
171
|
agent_step_span = tracer.start_span("agent_step", start_time=step_start)
|
174
172
|
agent_step_span.set_attributes({"step_id": step_id})
|
175
173
|
|
176
|
-
request_data, response_data, current_in_context_messages, new_in_context_messages =
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
174
|
+
request_data, response_data, current_in_context_messages, new_in_context_messages, valid_tool_names = (
|
175
|
+
await self._build_and_request_from_llm(
|
176
|
+
current_in_context_messages,
|
177
|
+
new_in_context_messages,
|
178
|
+
agent_state,
|
179
|
+
llm_client,
|
180
|
+
tool_rules_solver,
|
181
|
+
)
|
182
182
|
)
|
183
183
|
in_context_messages = current_in_context_messages + new_in_context_messages
|
184
184
|
|
@@ -223,14 +223,18 @@ class LettaAgent(BaseAgent):
|
|
223
223
|
|
224
224
|
persisted_messages, should_continue = await self._handle_ai_response(
|
225
225
|
tool_call,
|
226
|
+
valid_tool_names,
|
226
227
|
agent_state,
|
227
228
|
tool_rules_solver,
|
228
229
|
response.usage,
|
229
230
|
reasoning_content=reasoning,
|
231
|
+
initial_messages=initial_messages,
|
230
232
|
agent_step_span=agent_step_span,
|
233
|
+
is_final_step=(i == max_steps - 1),
|
231
234
|
)
|
232
235
|
self.response_messages.extend(persisted_messages)
|
233
236
|
new_in_context_messages.extend(persisted_messages)
|
237
|
+
initial_messages = None
|
234
238
|
log_event("agent.stream_no_tokens.llm_response.processed") # [4^]
|
235
239
|
|
236
240
|
# log step time
|
@@ -291,7 +295,7 @@ class LettaAgent(BaseAgent):
|
|
291
295
|
self,
|
292
296
|
agent_state: AgentState,
|
293
297
|
input_messages: List[MessageCreate],
|
294
|
-
max_steps: int =
|
298
|
+
max_steps: int = DEFAULT_MAX_STEPS,
|
295
299
|
request_start_timestamp_ns: Optional[int] = None,
|
296
300
|
) -> Tuple[List[Message], List[Message], LettaUsageStatistics]:
|
297
301
|
"""
|
@@ -301,9 +305,10 @@ class LettaAgent(BaseAgent):
|
|
301
305
|
3. Fetches a response from the LLM
|
302
306
|
4. Processes the response
|
303
307
|
"""
|
304
|
-
current_in_context_messages, new_in_context_messages = await
|
308
|
+
current_in_context_messages, new_in_context_messages = await _prepare_in_context_messages_no_persist_async(
|
305
309
|
input_messages, agent_state, self.message_manager, self.actor
|
306
310
|
)
|
311
|
+
initial_messages = new_in_context_messages
|
307
312
|
tool_rules_solver = ToolRulesSolver(agent_state.tool_rules)
|
308
313
|
llm_client = LLMClient.create(
|
309
314
|
provider_type=agent_state.llm_config.model_endpoint_type,
|
@@ -316,14 +321,16 @@ class LettaAgent(BaseAgent):
|
|
316
321
|
request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
|
317
322
|
|
318
323
|
usage = LettaUsageStatistics()
|
319
|
-
for
|
324
|
+
for i in range(max_steps):
|
320
325
|
step_id = generate_step_id()
|
321
326
|
step_start = get_utc_timestamp_ns()
|
322
327
|
agent_step_span = tracer.start_span("agent_step", start_time=step_start)
|
323
328
|
agent_step_span.set_attributes({"step_id": step_id})
|
324
329
|
|
325
|
-
request_data, response_data, current_in_context_messages, new_in_context_messages =
|
326
|
-
|
330
|
+
request_data, response_data, current_in_context_messages, new_in_context_messages, valid_tool_names = (
|
331
|
+
await self._build_and_request_from_llm(
|
332
|
+
current_in_context_messages, new_in_context_messages, agent_state, llm_client, tool_rules_solver
|
333
|
+
)
|
327
334
|
)
|
328
335
|
in_context_messages = current_in_context_messages + new_in_context_messages
|
329
336
|
|
@@ -362,15 +369,19 @@ class LettaAgent(BaseAgent):
|
|
362
369
|
|
363
370
|
persisted_messages, should_continue = await self._handle_ai_response(
|
364
371
|
tool_call,
|
372
|
+
valid_tool_names,
|
365
373
|
agent_state,
|
366
374
|
tool_rules_solver,
|
367
375
|
response.usage,
|
368
376
|
reasoning_content=reasoning,
|
369
377
|
step_id=step_id,
|
378
|
+
initial_messages=initial_messages,
|
370
379
|
agent_step_span=agent_step_span,
|
380
|
+
is_final_step=(i == max_steps - 1),
|
371
381
|
)
|
372
382
|
self.response_messages.extend(persisted_messages)
|
373
383
|
new_in_context_messages.extend(persisted_messages)
|
384
|
+
initial_messages = None
|
374
385
|
log_event("agent.step.llm_response.processed") # [4^]
|
375
386
|
|
376
387
|
# log step time
|
@@ -416,7 +427,7 @@ class LettaAgent(BaseAgent):
|
|
416
427
|
async def step_stream(
|
417
428
|
self,
|
418
429
|
input_messages: List[MessageCreate],
|
419
|
-
max_steps: int =
|
430
|
+
max_steps: int = DEFAULT_MAX_STEPS,
|
420
431
|
use_assistant_message: bool = True,
|
421
432
|
request_start_timestamp_ns: Optional[int] = None,
|
422
433
|
include_return_message_types: Optional[List[MessageType]] = None,
|
@@ -435,10 +446,6 @@ class LettaAgent(BaseAgent):
|
|
435
446
|
current_in_context_messages, new_in_context_messages = await _prepare_in_context_messages_no_persist_async(
|
436
447
|
input_messages, agent_state, self.message_manager, self.actor
|
437
448
|
)
|
438
|
-
|
439
|
-
# Special strategy to lower TTFT
|
440
|
-
# Delay persistence of the initial input message as much as possible
|
441
|
-
persisted_input_messages = False
|
442
449
|
initial_messages = new_in_context_messages
|
443
450
|
|
444
451
|
tool_rules_solver = ToolRulesSolver(agent_state.tool_rules)
|
@@ -454,21 +461,23 @@ class LettaAgent(BaseAgent):
|
|
454
461
|
request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
|
455
462
|
|
456
463
|
provider_request_start_timestamp_ns = None
|
457
|
-
for
|
464
|
+
for i in range(max_steps):
|
458
465
|
step_id = generate_step_id()
|
459
466
|
step_start = get_utc_timestamp_ns()
|
460
467
|
agent_step_span = tracer.start_span("agent_step", start_time=step_start)
|
461
468
|
agent_step_span.set_attributes({"step_id": step_id})
|
462
469
|
|
463
|
-
request_data, stream, current_in_context_messages, new_in_context_messages =
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
470
|
+
request_data, stream, current_in_context_messages, new_in_context_messages, valid_tool_names = (
|
471
|
+
await self._build_and_request_from_llm_streaming(
|
472
|
+
first_chunk,
|
473
|
+
agent_step_span,
|
474
|
+
request_start_timestamp_ns,
|
475
|
+
current_in_context_messages,
|
476
|
+
new_in_context_messages,
|
477
|
+
agent_state,
|
478
|
+
llm_client,
|
479
|
+
tool_rules_solver,
|
480
|
+
)
|
472
481
|
)
|
473
482
|
log_event("agent.stream.llm_response.received") # [3^]
|
474
483
|
|
@@ -513,12 +522,6 @@ class LettaAgent(BaseAgent):
|
|
513
522
|
interface.output_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
|
514
523
|
)
|
515
524
|
|
516
|
-
# Persist input messages if not already
|
517
|
-
# Special strategy to lower TTFT
|
518
|
-
if not persisted_input_messages:
|
519
|
-
await self.message_manager.create_many_messages_async(initial_messages, actor=self.actor)
|
520
|
-
persisted_input_messages = True
|
521
|
-
|
522
525
|
# log LLM request time
|
523
526
|
now = get_utc_timestamp_ns()
|
524
527
|
llm_request_ns = now - step_start
|
@@ -529,6 +532,7 @@ class LettaAgent(BaseAgent):
|
|
529
532
|
reasoning_content = interface.get_reasoning_content()
|
530
533
|
persisted_messages, should_continue = await self._handle_ai_response(
|
531
534
|
tool_call,
|
535
|
+
valid_tool_names,
|
532
536
|
agent_state,
|
533
537
|
tool_rules_solver,
|
534
538
|
UsageStatistics(
|
@@ -539,10 +543,13 @@ class LettaAgent(BaseAgent):
|
|
539
543
|
reasoning_content=reasoning_content,
|
540
544
|
pre_computed_assistant_message_id=interface.letta_message_id,
|
541
545
|
step_id=step_id,
|
546
|
+
initial_messages=initial_messages,
|
542
547
|
agent_step_span=agent_step_span,
|
548
|
+
is_final_step=(i == max_steps - 1),
|
543
549
|
)
|
544
550
|
self.response_messages.extend(persisted_messages)
|
545
551
|
new_in_context_messages.extend(persisted_messages)
|
552
|
+
initial_messages = None
|
546
553
|
|
547
554
|
# log total step time
|
548
555
|
now = get_utc_timestamp_ns()
|
@@ -614,12 +621,12 @@ class LettaAgent(BaseAgent):
|
|
614
621
|
agent_state: AgentState,
|
615
622
|
llm_client: LLMClientBase,
|
616
623
|
tool_rules_solver: ToolRulesSolver,
|
617
|
-
) -> Tuple[Dict, Dict, List[Message], List[Message]]:
|
624
|
+
) -> Tuple[Dict, Dict, List[Message], List[Message], List[str]]:
|
618
625
|
for attempt in range(self.max_summarization_retries + 1):
|
619
626
|
try:
|
620
627
|
log_event("agent.stream_no_tokens.messages.refreshed")
|
621
628
|
# Create LLM request data
|
622
|
-
request_data = await self._create_llm_request_data_async(
|
629
|
+
request_data, valid_tool_names = await self._create_llm_request_data_async(
|
623
630
|
llm_client=llm_client,
|
624
631
|
in_context_messages=current_in_context_messages + new_in_context_messages,
|
625
632
|
agent_state=agent_state,
|
@@ -634,12 +641,7 @@ class LettaAgent(BaseAgent):
|
|
634
641
|
dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model}),
|
635
642
|
)
|
636
643
|
# Attempt LLM request
|
637
|
-
return (
|
638
|
-
request_data,
|
639
|
-
response,
|
640
|
-
current_in_context_messages,
|
641
|
-
new_in_context_messages,
|
642
|
-
)
|
644
|
+
return (request_data, response, current_in_context_messages, new_in_context_messages, valid_tool_names)
|
643
645
|
|
644
646
|
except Exception as e:
|
645
647
|
if attempt == self.max_summarization_retries:
|
@@ -667,12 +669,12 @@ class LettaAgent(BaseAgent):
|
|
667
669
|
agent_state: AgentState,
|
668
670
|
llm_client: LLMClientBase,
|
669
671
|
tool_rules_solver: ToolRulesSolver,
|
670
|
-
) -> Tuple[Dict, AsyncStream[ChatCompletionChunk], List[Message], List[Message]]:
|
672
|
+
) -> Tuple[Dict, AsyncStream[ChatCompletionChunk], List[Message], List[Message], List[str]]:
|
671
673
|
for attempt in range(self.max_summarization_retries + 1):
|
672
674
|
try:
|
673
675
|
log_event("agent.stream_no_tokens.messages.refreshed")
|
674
676
|
# Create LLM request data
|
675
|
-
request_data = await self._create_llm_request_data_async(
|
677
|
+
request_data, valid_tool_names = await self._create_llm_request_data_async(
|
676
678
|
llm_client=llm_client,
|
677
679
|
in_context_messages=current_in_context_messages + new_in_context_messages,
|
678
680
|
agent_state=agent_state,
|
@@ -691,6 +693,7 @@ class LettaAgent(BaseAgent):
|
|
691
693
|
await llm_client.stream_async(request_data, agent_state.llm_config),
|
692
694
|
current_in_context_messages,
|
693
695
|
new_in_context_messages,
|
696
|
+
valid_tool_names,
|
694
697
|
)
|
695
698
|
|
696
699
|
except Exception as e:
|
@@ -773,7 +776,7 @@ class LettaAgent(BaseAgent):
|
|
773
776
|
in_context_messages: List[Message],
|
774
777
|
agent_state: AgentState,
|
775
778
|
tool_rules_solver: ToolRulesSolver,
|
776
|
-
) -> dict:
|
779
|
+
) -> Tuple[dict, List[str]]:
|
777
780
|
self.num_messages, self.num_archival_memories = await asyncio.gather(
|
778
781
|
(
|
779
782
|
self.message_manager.size_async(actor=self.actor, agent_id=agent_state.id)
|
@@ -787,7 +790,11 @@ class LettaAgent(BaseAgent):
|
|
787
790
|
),
|
788
791
|
)
|
789
792
|
in_context_messages = await self._rebuild_memory_async(
|
790
|
-
in_context_messages,
|
793
|
+
in_context_messages,
|
794
|
+
agent_state,
|
795
|
+
num_messages=self.num_messages,
|
796
|
+
num_archival_memories=self.num_archival_memories,
|
797
|
+
tool_rules_solver=tool_rules_solver,
|
791
798
|
)
|
792
799
|
|
793
800
|
tools = [
|
@@ -809,8 +816,7 @@ class LettaAgent(BaseAgent):
|
|
809
816
|
]
|
810
817
|
|
811
818
|
# Mirror the sync agent loop: get allowed tools or allow all if none are allowed
|
812
|
-
|
813
|
-
self.last_function_response = self._load_last_function_response(in_context_messages)
|
819
|
+
self.last_function_response = self._load_last_function_response(in_context_messages)
|
814
820
|
valid_tool_names = tool_rules_solver.get_allowed_tool_names(
|
815
821
|
available_tools=set([t.name for t in tools]),
|
816
822
|
last_function_response=self.last_function_response,
|
@@ -827,28 +833,40 @@ class LettaAgent(BaseAgent):
|
|
827
833
|
tool_list=allowed_tools, response_format=agent_state.response_format, request_heartbeat=True
|
828
834
|
)
|
829
835
|
|
830
|
-
return
|
836
|
+
return (
|
837
|
+
llm_client.build_request_data(
|
838
|
+
in_context_messages,
|
839
|
+
agent_state.llm_config,
|
840
|
+
allowed_tools,
|
841
|
+
force_tool_call,
|
842
|
+
),
|
843
|
+
valid_tool_names,
|
844
|
+
)
|
831
845
|
|
832
846
|
@trace_method
|
833
847
|
async def _handle_ai_response(
|
834
848
|
self,
|
835
849
|
tool_call: ToolCall,
|
850
|
+
valid_tool_names: List[str],
|
836
851
|
agent_state: AgentState,
|
837
852
|
tool_rules_solver: ToolRulesSolver,
|
838
853
|
usage: UsageStatistics,
|
839
854
|
reasoning_content: Optional[List[Union[TextContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent]]] = None,
|
840
855
|
pre_computed_assistant_message_id: Optional[str] = None,
|
841
856
|
step_id: str | None = None,
|
842
|
-
|
857
|
+
initial_messages: Optional[List[Message]] = None,
|
843
858
|
agent_step_span: Optional["Span"] = None,
|
859
|
+
is_final_step: Optional[bool] = None,
|
844
860
|
) -> Tuple[List[Message], bool]:
|
845
861
|
"""
|
846
862
|
Now that streaming is done, handle the final AI response.
|
847
863
|
This might yield additional SSE tokens if we do stalling.
|
848
864
|
At the end, set self._continue_execution accordingly.
|
849
865
|
"""
|
866
|
+
# Check if the called tool is allowed by tool name:
|
850
867
|
tool_call_name = tool_call.function.name
|
851
868
|
tool_call_args_str = tool_call.function.arguments
|
869
|
+
|
852
870
|
# Temp hack to gracefully handle parallel tool calling attempt, only take first one
|
853
871
|
if "}{" in tool_call_args_str:
|
854
872
|
tool_call_args_str = tool_call_args_str.split("}{", 1)[0] + "}"
|
@@ -861,17 +879,21 @@ class LettaAgent(BaseAgent):
|
|
861
879
|
except AssertionError:
|
862
880
|
tool_args = json.loads(tool_args)
|
863
881
|
|
864
|
-
|
865
|
-
|
866
|
-
|
867
|
-
|
868
|
-
|
869
|
-
|
870
|
-
|
871
|
-
|
872
|
-
|
873
|
-
|
874
|
-
|
882
|
+
if is_final_step:
|
883
|
+
logger.info("Agent has reached max steps.")
|
884
|
+
request_heartbeat = False
|
885
|
+
else:
|
886
|
+
# Get request heartbeats and coerce to bool
|
887
|
+
request_heartbeat = tool_args.pop("request_heartbeat", False)
|
888
|
+
# Pre-emptively pop out inner_thoughts
|
889
|
+
tool_args.pop(INNER_THOUGHTS_KWARG, "")
|
890
|
+
|
891
|
+
# So this is necessary, because sometimes non-structured outputs makes mistakes
|
892
|
+
if not isinstance(request_heartbeat, bool):
|
893
|
+
if isinstance(request_heartbeat, str):
|
894
|
+
request_heartbeat = request_heartbeat.lower() == "true"
|
895
|
+
else:
|
896
|
+
request_heartbeat = bool(request_heartbeat)
|
875
897
|
|
876
898
|
tool_call_id = tool_call.id or f"call_{uuid.uuid4().hex[:8]}"
|
877
899
|
|
@@ -883,14 +905,21 @@ class LettaAgent(BaseAgent):
|
|
883
905
|
tool_call_id=tool_call_id,
|
884
906
|
request_heartbeat=request_heartbeat,
|
885
907
|
)
|
886
|
-
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
893
|
-
|
908
|
+
if tool_call_name not in valid_tool_names:
|
909
|
+
base_error_message = f"[ToolConstraintError] Cannot call {tool_call_name}, valid tools to call include: {valid_tool_names}."
|
910
|
+
violated_rule_messages = tool_rules_solver.guess_rule_violation(tool_call_name)
|
911
|
+
if violated_rule_messages:
|
912
|
+
bullet_points = "\n".join(f"\t- {msg}" for msg in violated_rule_messages)
|
913
|
+
base_error_message += f"\n** Hint: Possible rules that were violated:\n{bullet_points}"
|
914
|
+
tool_execution_result = ToolExecutionResult(status="error", func_return=base_error_message)
|
915
|
+
else:
|
916
|
+
tool_execution_result = await self._execute_tool(
|
917
|
+
tool_name=tool_call_name,
|
918
|
+
tool_args=tool_args,
|
919
|
+
agent_state=agent_state,
|
920
|
+
agent_step_span=agent_step_span,
|
921
|
+
step_id=step_id,
|
922
|
+
)
|
894
923
|
log_telemetry(
|
895
924
|
self.logger, "_handle_ai_response execute tool finish", tool_execution_result=tool_execution_result, tool_call_id=tool_call_id
|
896
925
|
)
|
@@ -960,7 +989,9 @@ class LettaAgent(BaseAgent):
|
|
960
989
|
step_id=logged_step.id if logged_step else None, # TODO (cliandy): eventually move over other agent loops
|
961
990
|
)
|
962
991
|
|
963
|
-
persisted_messages = await self.message_manager.create_many_messages_async(
|
992
|
+
persisted_messages = await self.message_manager.create_many_messages_async(
|
993
|
+
(initial_messages or []) + tool_call_messages, actor=self.actor
|
994
|
+
)
|
964
995
|
self.last_function_response = function_response
|
965
996
|
|
966
997
|
return persisted_messages, continue_stepping
|
@@ -8,6 +8,7 @@ from anthropic.types.beta.messages import BetaMessageBatchCanceledResult, BetaMe
|
|
8
8
|
|
9
9
|
from letta.agents.base_agent import BaseAgent
|
10
10
|
from letta.agents.helpers import _prepare_in_context_messages_async
|
11
|
+
from letta.constants import DEFAULT_MAX_STEPS
|
11
12
|
from letta.helpers import ToolRulesSolver
|
12
13
|
from letta.helpers.datetime_helpers import get_utc_time
|
13
14
|
from letta.helpers.tool_execution_helper import enable_strict_mode
|
@@ -110,7 +111,7 @@ class LettaAgentBatch(BaseAgent):
|
|
110
111
|
sandbox_config_manager: SandboxConfigManager,
|
111
112
|
job_manager: JobManager,
|
112
113
|
actor: User,
|
113
|
-
max_steps: int =
|
114
|
+
max_steps: int = DEFAULT_MAX_STEPS,
|
114
115
|
):
|
115
116
|
self.message_manager = message_manager
|
116
117
|
self.agent_manager = agent_manager
|
@@ -619,10 +620,10 @@ class LettaAgentBatch(BaseAgent):
|
|
619
620
|
return in_context_messages
|
620
621
|
|
621
622
|
# Not used in batch.
|
622
|
-
async def step(self, input_messages: List[MessageCreate], max_steps: int =
|
623
|
+
async def step(self, input_messages: List[MessageCreate], max_steps: int = DEFAULT_MAX_STEPS) -> LettaResponse:
|
623
624
|
raise NotImplementedError
|
624
625
|
|
625
626
|
async def step_stream(
|
626
|
-
self, input_messages: List[MessageCreate], max_steps: int =
|
627
|
+
self, input_messages: List[MessageCreate], max_steps: int = DEFAULT_MAX_STEPS
|
627
628
|
) -> AsyncGenerator[Union[LettaMessage, LegacyLettaMessage, MessageStreamStatus], None]:
|
628
629
|
raise NotImplementedError
|
letta/agents/voice_agent.py
CHANGED
@@ -9,7 +9,7 @@ import openai
|
|
9
9
|
from letta.agents.base_agent import BaseAgent
|
10
10
|
from letta.agents.exceptions import IncompatibleAgentType
|
11
11
|
from letta.agents.voice_sleeptime_agent import VoiceSleeptimeAgent
|
12
|
-
from letta.constants import NON_USER_MSG_PREFIX
|
12
|
+
from letta.constants import DEFAULT_MAX_STEPS, NON_USER_MSG_PREFIX
|
13
13
|
from letta.helpers.datetime_helpers import get_utc_time
|
14
14
|
from letta.helpers.tool_execution_helper import (
|
15
15
|
add_pre_execution_message,
|
@@ -111,10 +111,10 @@ class VoiceAgent(BaseAgent):
|
|
111
111
|
|
112
112
|
return summarizer
|
113
113
|
|
114
|
-
async def step(self, input_messages: List[MessageCreate], max_steps: int =
|
114
|
+
async def step(self, input_messages: List[MessageCreate], max_steps: int = DEFAULT_MAX_STEPS) -> LettaResponse:
|
115
115
|
raise NotImplementedError("VoiceAgent does not have a synchronous step implemented currently.")
|
116
116
|
|
117
|
-
async def step_stream(self, input_messages: List[MessageCreate], max_steps: int =
|
117
|
+
async def step_stream(self, input_messages: List[MessageCreate], max_steps: int = DEFAULT_MAX_STEPS) -> AsyncGenerator[str, None]:
|
118
118
|
"""
|
119
119
|
Main streaming loop that yields partial tokens.
|
120
120
|
Whenever we detect a tool call, we yield from _handle_ai_response as well.
|
@@ -2,6 +2,7 @@ from typing import AsyncGenerator, List, Optional, Tuple, Union
|
|
2
2
|
|
3
3
|
from letta.agents.helpers import _create_letta_response, serialize_message_history
|
4
4
|
from letta.agents.letta_agent import LettaAgent
|
5
|
+
from letta.constants import DEFAULT_MAX_STEPS
|
5
6
|
from letta.orm.enums import ToolType
|
6
7
|
from letta.otel.tracing import trace_method
|
7
8
|
from letta.schemas.agent import AgentState
|
@@ -62,7 +63,7 @@ class VoiceSleeptimeAgent(LettaAgent):
|
|
62
63
|
async def step(
|
63
64
|
self,
|
64
65
|
input_messages: List[MessageCreate],
|
65
|
-
max_steps: int =
|
66
|
+
max_steps: int = DEFAULT_MAX_STEPS,
|
66
67
|
use_assistant_message: bool = True,
|
67
68
|
include_return_message_types: Optional[List[MessageType]] = None,
|
68
69
|
) -> LettaResponse:
|
@@ -170,7 +171,7 @@ class VoiceSleeptimeAgent(LettaAgent):
|
|
170
171
|
return f"Failed to store memory given start_index {start_index} and end_index {end_index}: {e}", False
|
171
172
|
|
172
173
|
async def step_stream(
|
173
|
-
self, input_messages: List[MessageCreate], max_steps: int =
|
174
|
+
self, input_messages: List[MessageCreate], max_steps: int = DEFAULT_MAX_STEPS, use_assistant_message: bool = True
|
174
175
|
) -> AsyncGenerator[Union[LettaMessage, LegacyLettaMessage, MessageStreamStatus], None]:
|
175
176
|
"""
|
176
177
|
This agent is synchronous-only. If called in an async context, raise an error.
|