letta-nightly 0.11.7.dev20251006104136__py3-none-any.whl → 0.11.7.dev20251008104128__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/adapters/letta_llm_adapter.py +1 -0
- letta/adapters/letta_llm_request_adapter.py +0 -1
- letta/adapters/letta_llm_stream_adapter.py +7 -2
- letta/adapters/simple_llm_request_adapter.py +88 -0
- letta/adapters/simple_llm_stream_adapter.py +192 -0
- letta/agents/agent_loop.py +6 -0
- letta/agents/ephemeral_summary_agent.py +2 -1
- letta/agents/helpers.py +142 -6
- letta/agents/letta_agent.py +13 -33
- letta/agents/letta_agent_batch.py +2 -4
- letta/agents/letta_agent_v2.py +87 -77
- letta/agents/letta_agent_v3.py +899 -0
- letta/agents/voice_agent.py +2 -6
- letta/constants.py +8 -4
- letta/errors.py +40 -0
- letta/functions/function_sets/base.py +84 -4
- letta/functions/function_sets/multi_agent.py +0 -3
- letta/functions/schema_generator.py +113 -71
- letta/groups/dynamic_multi_agent.py +3 -2
- letta/groups/helpers.py +1 -2
- letta/groups/round_robin_multi_agent.py +3 -2
- letta/groups/sleeptime_multi_agent.py +3 -2
- letta/groups/sleeptime_multi_agent_v2.py +1 -1
- letta/groups/sleeptime_multi_agent_v3.py +17 -17
- letta/groups/supervisor_multi_agent.py +84 -80
- letta/helpers/converters.py +3 -0
- letta/helpers/message_helper.py +4 -0
- letta/helpers/tool_rule_solver.py +92 -5
- letta/interfaces/anthropic_streaming_interface.py +409 -0
- letta/interfaces/gemini_streaming_interface.py +296 -0
- letta/interfaces/openai_streaming_interface.py +752 -1
- letta/llm_api/anthropic_client.py +126 -16
- letta/llm_api/bedrock_client.py +4 -2
- letta/llm_api/deepseek_client.py +4 -1
- letta/llm_api/google_vertex_client.py +123 -42
- letta/llm_api/groq_client.py +4 -1
- letta/llm_api/llm_api_tools.py +11 -4
- letta/llm_api/llm_client_base.py +6 -2
- letta/llm_api/openai.py +32 -2
- letta/llm_api/openai_client.py +423 -18
- letta/llm_api/xai_client.py +4 -1
- letta/main.py +9 -5
- letta/memory.py +1 -0
- letta/orm/__init__.py +1 -1
- letta/orm/agent.py +10 -0
- letta/orm/block.py +7 -16
- letta/orm/blocks_agents.py +8 -2
- letta/orm/files_agents.py +2 -0
- letta/orm/job.py +7 -5
- letta/orm/mcp_oauth.py +1 -0
- letta/orm/message.py +21 -6
- letta/orm/organization.py +2 -0
- letta/orm/provider.py +6 -2
- letta/orm/run.py +71 -0
- letta/orm/sandbox_config.py +7 -1
- letta/orm/sqlalchemy_base.py +0 -306
- letta/orm/step.py +6 -5
- letta/orm/step_metrics.py +5 -5
- letta/otel/tracing.py +28 -3
- letta/plugins/defaults.py +4 -4
- letta/prompts/system_prompts/__init__.py +2 -0
- letta/prompts/system_prompts/letta_v1.py +25 -0
- letta/schemas/agent.py +3 -2
- letta/schemas/agent_file.py +9 -3
- letta/schemas/block.py +23 -10
- letta/schemas/enums.py +21 -2
- letta/schemas/job.py +17 -4
- letta/schemas/letta_message_content.py +71 -2
- letta/schemas/letta_stop_reason.py +5 -5
- letta/schemas/llm_config.py +53 -3
- letta/schemas/memory.py +1 -1
- letta/schemas/message.py +504 -117
- letta/schemas/openai/responses_request.py +64 -0
- letta/schemas/providers/__init__.py +2 -0
- letta/schemas/providers/anthropic.py +16 -0
- letta/schemas/providers/ollama.py +115 -33
- letta/schemas/providers/openrouter.py +52 -0
- letta/schemas/providers/vllm.py +2 -1
- letta/schemas/run.py +48 -42
- letta/schemas/step.py +2 -2
- letta/schemas/step_metrics.py +1 -1
- letta/schemas/tool.py +15 -107
- letta/schemas/tool_rule.py +88 -5
- letta/serialize_schemas/marshmallow_agent.py +1 -0
- letta/server/db.py +86 -408
- letta/server/rest_api/app.py +61 -10
- letta/server/rest_api/dependencies.py +14 -0
- letta/server/rest_api/redis_stream_manager.py +19 -8
- letta/server/rest_api/routers/v1/agents.py +364 -292
- letta/server/rest_api/routers/v1/blocks.py +14 -20
- letta/server/rest_api/routers/v1/identities.py +45 -110
- letta/server/rest_api/routers/v1/internal_templates.py +21 -0
- letta/server/rest_api/routers/v1/jobs.py +23 -6
- letta/server/rest_api/routers/v1/messages.py +1 -1
- letta/server/rest_api/routers/v1/runs.py +126 -85
- letta/server/rest_api/routers/v1/sandbox_configs.py +10 -19
- letta/server/rest_api/routers/v1/tools.py +281 -594
- letta/server/rest_api/routers/v1/voice.py +1 -1
- letta/server/rest_api/streaming_response.py +29 -29
- letta/server/rest_api/utils.py +122 -64
- letta/server/server.py +160 -887
- letta/services/agent_manager.py +236 -919
- letta/services/agent_serialization_manager.py +16 -0
- letta/services/archive_manager.py +0 -100
- letta/services/block_manager.py +211 -168
- letta/services/file_manager.py +1 -1
- letta/services/files_agents_manager.py +24 -33
- letta/services/group_manager.py +0 -142
- letta/services/helpers/agent_manager_helper.py +7 -2
- letta/services/helpers/run_manager_helper.py +85 -0
- letta/services/job_manager.py +96 -411
- letta/services/lettuce/__init__.py +6 -0
- letta/services/lettuce/lettuce_client_base.py +86 -0
- letta/services/mcp_manager.py +38 -6
- letta/services/message_manager.py +165 -362
- letta/services/organization_manager.py +0 -36
- letta/services/passage_manager.py +0 -345
- letta/services/provider_manager.py +0 -80
- letta/services/run_manager.py +301 -0
- letta/services/sandbox_config_manager.py +0 -234
- letta/services/step_manager.py +62 -39
- letta/services/summarizer/summarizer.py +9 -7
- letta/services/telemetry_manager.py +0 -16
- letta/services/tool_executor/builtin_tool_executor.py +35 -0
- letta/services/tool_executor/core_tool_executor.py +397 -2
- letta/services/tool_executor/files_tool_executor.py +3 -3
- letta/services/tool_executor/multi_agent_tool_executor.py +30 -15
- letta/services/tool_executor/tool_execution_manager.py +6 -8
- letta/services/tool_executor/tool_executor_base.py +3 -3
- letta/services/tool_manager.py +85 -339
- letta/services/tool_sandbox/base.py +24 -13
- letta/services/tool_sandbox/e2b_sandbox.py +16 -1
- letta/services/tool_schema_generator.py +123 -0
- letta/services/user_manager.py +0 -99
- letta/settings.py +20 -4
- {letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/METADATA +3 -5
- {letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/RECORD +140 -132
- letta/agents/temporal/activities/__init__.py +0 -4
- letta/agents/temporal/activities/example_activity.py +0 -7
- letta/agents/temporal/activities/prepare_messages.py +0 -10
- letta/agents/temporal/temporal_agent_workflow.py +0 -56
- letta/agents/temporal/types.py +0 -25
- {letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/WHEEL +0 -0
- {letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/entry_points.txt +0 -0
- {letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/licenses/LICENSE +0 -0
@@ -28,6 +28,7 @@ class LettaLLMAdapter(ABC):
|
|
28
28
|
self.response_data: dict | None = None
|
29
29
|
self.chat_completions_response: ChatCompletionResponse | None = None
|
30
30
|
self.reasoning_content: list[TextContent | ReasoningContent | RedactedReasoningContent] | None = None
|
31
|
+
self.content: list[TextContent | ReasoningContent | RedactedReasoningContent] | None = None
|
31
32
|
self.tool_call: ToolCall | None = None
|
32
33
|
self.usage: LettaUsageStatistics = LettaUsageStatistics()
|
33
34
|
self.telemetry_manager: TelemetryManager = TelemetryManager()
|
@@ -1,4 +1,3 @@
|
|
1
|
-
import asyncio
|
2
1
|
from typing import AsyncGenerator
|
3
2
|
|
4
3
|
from letta.adapters.letta_llm_adapter import LettaLLMAdapter
|
@@ -26,8 +25,9 @@ class LettaLLMStreamAdapter(LettaLLMAdapter):
|
|
26
25
|
specific streaming formats.
|
27
26
|
"""
|
28
27
|
|
29
|
-
def __init__(self, llm_client: LLMClientBase, llm_config: LLMConfig) -> None:
|
28
|
+
def __init__(self, llm_client: LLMClientBase, llm_config: LLMConfig, run_id: str | None = None) -> None:
|
30
29
|
super().__init__(llm_client, llm_config)
|
30
|
+
self.run_id = run_id
|
31
31
|
self.interface: OpenAIStreamingInterface | AnthropicStreamingInterface | None = None
|
32
32
|
|
33
33
|
async def invoke_llm(
|
@@ -58,8 +58,11 @@ class LettaLLMStreamAdapter(LettaLLMAdapter):
|
|
58
58
|
use_assistant_message=use_assistant_message,
|
59
59
|
put_inner_thoughts_in_kwarg=self.llm_config.put_inner_thoughts_in_kwargs,
|
60
60
|
requires_approval_tools=requires_approval_tools,
|
61
|
+
run_id=self.run_id,
|
62
|
+
step_id=step_id,
|
61
63
|
)
|
62
64
|
elif self.llm_config.model_endpoint_type == ProviderType.openai:
|
65
|
+
# For non-v1 agents, always use Chat Completions streaming interface
|
63
66
|
self.interface = OpenAIStreamingInterface(
|
64
67
|
use_assistant_message=use_assistant_message,
|
65
68
|
is_openai_proxy=self.llm_config.provider_name == "lmstudio_openai",
|
@@ -67,6 +70,8 @@ class LettaLLMStreamAdapter(LettaLLMAdapter):
|
|
67
70
|
messages=messages,
|
68
71
|
tools=tools,
|
69
72
|
requires_approval_tools=requires_approval_tools,
|
73
|
+
run_id=self.run_id,
|
74
|
+
step_id=step_id,
|
70
75
|
)
|
71
76
|
else:
|
72
77
|
raise ValueError(f"Streaming not supported for provider {self.llm_config.model_endpoint_type}")
|
@@ -0,0 +1,88 @@
|
|
1
|
+
from typing import AsyncGenerator
|
2
|
+
|
3
|
+
from letta.adapters.letta_llm_request_adapter import LettaLLMRequestAdapter
|
4
|
+
from letta.helpers.datetime_helpers import get_utc_timestamp_ns
|
5
|
+
from letta.schemas.letta_message import LettaMessage
|
6
|
+
from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, TextContent
|
7
|
+
|
8
|
+
|
9
|
+
class SimpleLLMRequestAdapter(LettaLLMRequestAdapter):
|
10
|
+
"""Simplifying assumptions:
|
11
|
+
|
12
|
+
- No inner thoughts in kwargs
|
13
|
+
- No forced tool calls
|
14
|
+
- Content native as assistant message
|
15
|
+
"""
|
16
|
+
|
17
|
+
async def invoke_llm(
|
18
|
+
self,
|
19
|
+
request_data: dict,
|
20
|
+
messages: list,
|
21
|
+
tools: list,
|
22
|
+
use_assistant_message: bool,
|
23
|
+
requires_approval_tools: list[str] = [],
|
24
|
+
step_id: str | None = None,
|
25
|
+
actor: str | None = None,
|
26
|
+
) -> AsyncGenerator[LettaMessage | None, None]:
|
27
|
+
"""
|
28
|
+
Execute a blocking LLM request and yield the response.
|
29
|
+
|
30
|
+
This adapter:
|
31
|
+
1. Makes a blocking request to the LLM
|
32
|
+
2. Converts the response to chat completion format
|
33
|
+
3. Extracts reasoning and tool call information
|
34
|
+
4. Updates all instance variables
|
35
|
+
5. Yields nothing (blocking mode doesn't stream)
|
36
|
+
"""
|
37
|
+
# Store request data
|
38
|
+
self.request_data = request_data
|
39
|
+
|
40
|
+
# Make the blocking LLM request
|
41
|
+
self.response_data = await self.llm_client.request_async(request_data, self.llm_config)
|
42
|
+
self.llm_request_finish_timestamp_ns = get_utc_timestamp_ns()
|
43
|
+
|
44
|
+
# Convert response to chat completion format
|
45
|
+
self.chat_completions_response = self.llm_client.convert_response_to_chat_completion(self.response_data, messages, self.llm_config)
|
46
|
+
|
47
|
+
# Extract reasoning content from the response
|
48
|
+
if self.chat_completions_response.choices[0].message.reasoning_content:
|
49
|
+
self.reasoning_content = [
|
50
|
+
ReasoningContent(
|
51
|
+
reasoning=self.chat_completions_response.choices[0].message.reasoning_content,
|
52
|
+
is_native=True,
|
53
|
+
signature=self.chat_completions_response.choices[0].message.reasoning_content_signature,
|
54
|
+
)
|
55
|
+
]
|
56
|
+
elif self.chat_completions_response.choices[0].message.omitted_reasoning_content:
|
57
|
+
self.reasoning_content = [OmittedReasoningContent()]
|
58
|
+
else:
|
59
|
+
# logger.info("No reasoning content found.")
|
60
|
+
self.reasoning_content = None
|
61
|
+
|
62
|
+
if self.chat_completions_response.choices[0].message.content:
|
63
|
+
# NOTE: big difference - 'content' goes into 'content'
|
64
|
+
# Reasoning placed into content for legacy reasons
|
65
|
+
self.content = [TextContent(text=self.chat_completions_response.choices[0].message.content)]
|
66
|
+
else:
|
67
|
+
self.content = None
|
68
|
+
|
69
|
+
if self.reasoning_content and len(self.reasoning_content) > 0:
|
70
|
+
# Temp workaround to consolidate parts to persist reasoning content, this should be integrated better
|
71
|
+
self.content = self.reasoning_content + (self.content or [])
|
72
|
+
|
73
|
+
# Extract tool call
|
74
|
+
if self.chat_completions_response.choices[0].message.tool_calls:
|
75
|
+
self.tool_call = self.chat_completions_response.choices[0].message.tool_calls[0]
|
76
|
+
else:
|
77
|
+
self.tool_call = None
|
78
|
+
|
79
|
+
# Extract usage statistics
|
80
|
+
self.usage.step_count = 1
|
81
|
+
self.usage.completion_tokens = self.chat_completions_response.usage.completion_tokens
|
82
|
+
self.usage.prompt_tokens = self.chat_completions_response.usage.prompt_tokens
|
83
|
+
self.usage.total_tokens = self.chat_completions_response.usage.total_tokens
|
84
|
+
|
85
|
+
self.log_provider_trace(step_id=step_id, actor=actor)
|
86
|
+
|
87
|
+
yield None
|
88
|
+
return
|
@@ -0,0 +1,192 @@
|
|
1
|
+
from typing import AsyncGenerator, List
|
2
|
+
|
3
|
+
from letta.adapters.letta_llm_stream_adapter import LettaLLMStreamAdapter
|
4
|
+
from letta.helpers.datetime_helpers import get_utc_timestamp_ns
|
5
|
+
from letta.interfaces.anthropic_streaming_interface import SimpleAnthropicStreamingInterface
|
6
|
+
from letta.interfaces.gemini_streaming_interface import SimpleGeminiStreamingInterface
|
7
|
+
from letta.interfaces.openai_streaming_interface import SimpleOpenAIResponsesStreamingInterface, SimpleOpenAIStreamingInterface
|
8
|
+
from letta.schemas.enums import ProviderType
|
9
|
+
from letta.schemas.letta_message import LettaMessage
|
10
|
+
from letta.schemas.letta_message_content import LettaMessageContentUnion
|
11
|
+
from letta.schemas.provider_trace import ProviderTraceCreate
|
12
|
+
from letta.schemas.usage import LettaUsageStatistics
|
13
|
+
from letta.schemas.user import User
|
14
|
+
from letta.settings import settings
|
15
|
+
from letta.utils import safe_create_task
|
16
|
+
|
17
|
+
|
18
|
+
class SimpleLLMStreamAdapter(LettaLLMStreamAdapter):
|
19
|
+
"""
|
20
|
+
Adapter for handling streaming LLM requests with immediate token yielding.
|
21
|
+
|
22
|
+
This adapter supports real-time streaming of tokens from the LLM, providing
|
23
|
+
minimal time-to-first-token (TTFT) latency. It uses specialized streaming
|
24
|
+
interfaces for different providers (OpenAI, Anthropic) to handle their
|
25
|
+
specific streaming formats.
|
26
|
+
"""
|
27
|
+
|
28
|
+
async def invoke_llm(
|
29
|
+
self,
|
30
|
+
request_data: dict,
|
31
|
+
messages: list,
|
32
|
+
tools: list,
|
33
|
+
use_assistant_message: bool, # NOTE: not used
|
34
|
+
requires_approval_tools: list[str] = [],
|
35
|
+
step_id: str | None = None,
|
36
|
+
actor: User | None = None,
|
37
|
+
) -> AsyncGenerator[LettaMessage, None]:
|
38
|
+
"""
|
39
|
+
Execute a streaming LLM request and yield tokens/chunks as they arrive.
|
40
|
+
|
41
|
+
This adapter:
|
42
|
+
1. Makes a streaming request to the LLM
|
43
|
+
2. Yields chunks immediately for minimal TTFT
|
44
|
+
3. Accumulates response data through the streaming interface
|
45
|
+
4. Updates all instance variables after streaming completes
|
46
|
+
"""
|
47
|
+
# Store request data
|
48
|
+
self.request_data = request_data
|
49
|
+
|
50
|
+
# Instantiate streaming interface
|
51
|
+
if self.llm_config.model_endpoint_type in [ProviderType.anthropic, ProviderType.bedrock]:
|
52
|
+
# NOTE: different
|
53
|
+
self.interface = SimpleAnthropicStreamingInterface(
|
54
|
+
requires_approval_tools=requires_approval_tools,
|
55
|
+
run_id=self.run_id,
|
56
|
+
step_id=step_id,
|
57
|
+
)
|
58
|
+
elif self.llm_config.model_endpoint_type == ProviderType.openai:
|
59
|
+
# Decide interface based on payload shape
|
60
|
+
use_responses = "input" in request_data and "messages" not in request_data
|
61
|
+
# No support for Responses API proxy
|
62
|
+
is_proxy = self.llm_config.provider_name == "lmstudio_openai"
|
63
|
+
if use_responses and not is_proxy:
|
64
|
+
self.interface = SimpleOpenAIResponsesStreamingInterface(
|
65
|
+
is_openai_proxy=False,
|
66
|
+
messages=messages,
|
67
|
+
tools=tools,
|
68
|
+
requires_approval_tools=requires_approval_tools,
|
69
|
+
run_id=self.run_id,
|
70
|
+
step_id=step_id,
|
71
|
+
)
|
72
|
+
else:
|
73
|
+
self.interface = SimpleOpenAIStreamingInterface(
|
74
|
+
is_openai_proxy=self.llm_config.provider_name == "lmstudio_openai",
|
75
|
+
messages=messages,
|
76
|
+
tools=tools,
|
77
|
+
requires_approval_tools=requires_approval_tools,
|
78
|
+
model=self.llm_config.model,
|
79
|
+
run_id=self.run_id,
|
80
|
+
step_id=step_id,
|
81
|
+
)
|
82
|
+
elif self.llm_config.model_endpoint_type in [ProviderType.google_ai, ProviderType.google_vertex]:
|
83
|
+
self.interface = SimpleGeminiStreamingInterface(
|
84
|
+
requires_approval_tools=requires_approval_tools,
|
85
|
+
run_id=self.run_id,
|
86
|
+
step_id=step_id,
|
87
|
+
)
|
88
|
+
else:
|
89
|
+
raise ValueError(f"Streaming not supported for provider {self.llm_config.model_endpoint_type}")
|
90
|
+
|
91
|
+
# Extract optional parameters
|
92
|
+
# ttft_span = kwargs.get('ttft_span', None)
|
93
|
+
|
94
|
+
# Start the streaming request
|
95
|
+
stream = await self.llm_client.stream_async(request_data, self.llm_config)
|
96
|
+
|
97
|
+
# Process the stream and yield chunks immediately for TTFT
|
98
|
+
async for chunk in self.interface.process(stream): # TODO: add ttft span
|
99
|
+
# Yield each chunk immediately as it arrives
|
100
|
+
yield chunk
|
101
|
+
|
102
|
+
# After streaming completes, extract the accumulated data
|
103
|
+
self.llm_request_finish_timestamp_ns = get_utc_timestamp_ns()
|
104
|
+
|
105
|
+
# Extract tool call from the interface
|
106
|
+
try:
|
107
|
+
self.tool_call = self.interface.get_tool_call_object()
|
108
|
+
except ValueError as e:
|
109
|
+
# No tool call, handle upstream
|
110
|
+
self.tool_call = None
|
111
|
+
|
112
|
+
# Extract reasoning content from the interface
|
113
|
+
# TODO this should probably just be called "content"?
|
114
|
+
# self.reasoning_content = self.interface.get_reasoning_content()
|
115
|
+
|
116
|
+
# Extract all content parts
|
117
|
+
self.content: List[LettaMessageContentUnion] = self.interface.get_content()
|
118
|
+
|
119
|
+
# Extract usage statistics
|
120
|
+
# Some providers don't provide usage in streaming, use fallback if needed
|
121
|
+
if hasattr(self.interface, "input_tokens") and hasattr(self.interface, "output_tokens"):
|
122
|
+
# Handle cases where tokens might not be set (e.g., LMStudio)
|
123
|
+
input_tokens = self.interface.input_tokens
|
124
|
+
output_tokens = self.interface.output_tokens
|
125
|
+
|
126
|
+
# Fallback to estimated values if not provided
|
127
|
+
if not input_tokens and hasattr(self.interface, "fallback_input_tokens"):
|
128
|
+
input_tokens = self.interface.fallback_input_tokens
|
129
|
+
if not output_tokens and hasattr(self.interface, "fallback_output_tokens"):
|
130
|
+
output_tokens = self.interface.fallback_output_tokens
|
131
|
+
|
132
|
+
self.usage = LettaUsageStatistics(
|
133
|
+
step_count=1,
|
134
|
+
completion_tokens=output_tokens or 0,
|
135
|
+
prompt_tokens=input_tokens or 0,
|
136
|
+
total_tokens=(input_tokens or 0) + (output_tokens or 0),
|
137
|
+
)
|
138
|
+
else:
|
139
|
+
# Default usage statistics if not available
|
140
|
+
self.usage = LettaUsageStatistics(step_count=1, completion_tokens=0, prompt_tokens=0, total_tokens=0)
|
141
|
+
|
142
|
+
# Store any additional data from the interface
|
143
|
+
self.message_id = self.interface.letta_message_id
|
144
|
+
|
145
|
+
# Log request and response data
|
146
|
+
self.log_provider_trace(step_id=step_id, actor=actor)
|
147
|
+
|
148
|
+
def log_provider_trace(self, step_id: str | None, actor: User | None) -> None:
|
149
|
+
"""
|
150
|
+
Log provider trace data for telemetry purposes in a fire-and-forget manner.
|
151
|
+
|
152
|
+
Creates an async task to log the request/response data without blocking
|
153
|
+
the main execution flow. For streaming adapters, this includes the final
|
154
|
+
tool call and reasoning content collected during streaming.
|
155
|
+
|
156
|
+
Args:
|
157
|
+
step_id: The step ID associated with this request for logging purposes
|
158
|
+
actor: The user associated with this request for logging purposes
|
159
|
+
"""
|
160
|
+
if step_id is None or actor is None or not settings.track_provider_trace:
|
161
|
+
return
|
162
|
+
|
163
|
+
safe_create_task(
|
164
|
+
self.telemetry_manager.create_provider_trace_async(
|
165
|
+
actor=actor,
|
166
|
+
provider_trace_create=ProviderTraceCreate(
|
167
|
+
request_json=self.request_data,
|
168
|
+
response_json={
|
169
|
+
"content": {
|
170
|
+
"tool_call": self.tool_call.model_dump_json() if self.tool_call else None,
|
171
|
+
# "reasoning": [content.model_dump_json() for content in self.reasoning_content],
|
172
|
+
# NOTE: different
|
173
|
+
# TODO potentially split this into both content and reasoning?
|
174
|
+
"content": [content.model_dump_json() for content in self.content],
|
175
|
+
},
|
176
|
+
"id": self.interface.message_id,
|
177
|
+
"model": self.interface.model,
|
178
|
+
"role": "assistant",
|
179
|
+
# "stop_reason": "",
|
180
|
+
# "stop_sequence": None,
|
181
|
+
"type": "message",
|
182
|
+
"usage": {
|
183
|
+
"input_tokens": self.usage.prompt_tokens,
|
184
|
+
"output_tokens": self.usage.completion_tokens,
|
185
|
+
},
|
186
|
+
},
|
187
|
+
step_id=step_id, # Use original step_id for telemetry
|
188
|
+
organization_id=actor.organization_id,
|
189
|
+
),
|
190
|
+
),
|
191
|
+
label="create_provider_trace",
|
192
|
+
)
|
letta/agents/agent_loop.py
CHANGED
@@ -2,6 +2,7 @@ from typing import TYPE_CHECKING
|
|
2
2
|
|
3
3
|
from letta.agents.base_agent_v2 import BaseAgentV2
|
4
4
|
from letta.agents.letta_agent_v2 import LettaAgentV2
|
5
|
+
from letta.agents.letta_agent_v3 import LettaAgentV3
|
5
6
|
from letta.groups.sleeptime_multi_agent_v3 import SleeptimeMultiAgentV3
|
6
7
|
from letta.schemas.agent import AgentState
|
7
8
|
from letta.schemas.enums import AgentType
|
@@ -17,6 +18,11 @@ class AgentLoop:
|
|
17
18
|
def load(agent_state: AgentState, actor: "User") -> BaseAgentV2:
|
18
19
|
if agent_state.enable_sleeptime and agent_state.agent_type != AgentType.voice_convo_agent:
|
19
20
|
return SleeptimeMultiAgentV3(agent_state=agent_state, actor=actor, group=agent_state.multi_agent_group)
|
21
|
+
elif agent_state.agent_type == AgentType.letta_v1_agent:
|
22
|
+
return LettaAgentV3(
|
23
|
+
agent_state=agent_state,
|
24
|
+
actor=actor,
|
25
|
+
)
|
20
26
|
else:
|
21
27
|
return LettaAgentV2(
|
22
28
|
agent_state=agent_state,
|
@@ -82,9 +82,10 @@ class EphemeralSummaryAgent(BaseAgent):
|
|
82
82
|
message_creates=[system_message_create] + input_messages,
|
83
83
|
agent_id=self.agent_id,
|
84
84
|
timezone=agent_state.timezone,
|
85
|
+
run_id=None, # TODO: add this
|
85
86
|
)
|
86
87
|
|
87
|
-
request_data = llm_client.build_request_data(messages, agent_state.llm_config, tools=[])
|
88
|
+
request_data = llm_client.build_request_data(agent_state.agent_type, messages, agent_state.llm_config, tools=[])
|
88
89
|
response_data = await llm_client.request_async(request_data, agent_state.llm_config)
|
89
90
|
response = llm_client.convert_response_to_chat_completion(response_data, messages, agent_state.llm_config)
|
90
91
|
summary = response.choices[0].message.content.strip()
|
letta/agents/helpers.py
CHANGED
@@ -1,13 +1,16 @@
|
|
1
1
|
import json
|
2
2
|
import uuid
|
3
3
|
import xml.etree.ElementTree as ET
|
4
|
-
from typing import List, Optional, Tuple
|
4
|
+
from typing import Any, Dict, List, Optional, Tuple
|
5
|
+
from uuid import UUID, uuid4
|
5
6
|
|
6
7
|
from letta.errors import PendingApprovalError
|
7
8
|
from letta.helpers import ToolRulesSolver
|
8
9
|
from letta.log import get_logger
|
9
10
|
from letta.schemas.agent import AgentState
|
11
|
+
from letta.schemas.enums import MessageRole
|
10
12
|
from letta.schemas.letta_message import MessageType
|
13
|
+
from letta.schemas.letta_message_content import TextContent
|
11
14
|
from letta.schemas.letta_response import LettaResponse
|
12
15
|
from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
|
13
16
|
from letta.schemas.message import Message, MessageCreate, MessageCreateBase
|
@@ -53,6 +56,7 @@ def _prepare_in_context_messages(
|
|
53
56
|
agent_state: AgentState,
|
54
57
|
message_manager: MessageManager,
|
55
58
|
actor: User,
|
59
|
+
run_id: str,
|
56
60
|
) -> Tuple[List[Message], List[Message]]:
|
57
61
|
"""
|
58
62
|
Prepares in-context messages for an agent, based on the current state and a new user input.
|
@@ -62,6 +66,7 @@ def _prepare_in_context_messages(
|
|
62
66
|
agent_state (AgentState): The current state of the agent, including message buffer config.
|
63
67
|
message_manager (MessageManager): The manager used to retrieve and create messages.
|
64
68
|
actor (User): The user performing the action, used for access control and attribution.
|
69
|
+
run_id (str): The run ID associated with this message processing.
|
65
70
|
|
66
71
|
Returns:
|
67
72
|
Tuple[List[Message], List[Message]]: A tuple containing:
|
@@ -78,7 +83,9 @@ def _prepare_in_context_messages(
|
|
78
83
|
|
79
84
|
# Create a new user message from the input and store it
|
80
85
|
new_in_context_messages = message_manager.create_many_messages(
|
81
|
-
create_input_messages(
|
86
|
+
create_input_messages(
|
87
|
+
input_messages=input_messages, agent_id=agent_state.id, timezone=agent_state.timezone, run_id=run_id, actor=actor
|
88
|
+
),
|
82
89
|
actor=actor,
|
83
90
|
)
|
84
91
|
|
@@ -90,6 +97,7 @@ async def _prepare_in_context_messages_async(
|
|
90
97
|
agent_state: AgentState,
|
91
98
|
message_manager: MessageManager,
|
92
99
|
actor: User,
|
100
|
+
run_id: str,
|
93
101
|
) -> Tuple[List[Message], List[Message]]:
|
94
102
|
"""
|
95
103
|
Prepares in-context messages for an agent, based on the current state and a new user input.
|
@@ -100,6 +108,7 @@ async def _prepare_in_context_messages_async(
|
|
100
108
|
agent_state (AgentState): The current state of the agent, including message buffer config.
|
101
109
|
message_manager (MessageManager): The manager used to retrieve and create messages.
|
102
110
|
actor (User): The user performing the action, used for access control and attribution.
|
111
|
+
run_id (str): The run ID associated with this message processing.
|
103
112
|
|
104
113
|
Returns:
|
105
114
|
Tuple[List[Message], List[Message]]: A tuple containing:
|
@@ -116,7 +125,9 @@ async def _prepare_in_context_messages_async(
|
|
116
125
|
|
117
126
|
# Create a new user message from the input and store it
|
118
127
|
new_in_context_messages = await message_manager.create_many_messages_async(
|
119
|
-
create_input_messages(
|
128
|
+
create_input_messages(
|
129
|
+
input_messages=input_messages, agent_id=agent_state.id, timezone=agent_state.timezone, run_id=run_id, actor=actor
|
130
|
+
),
|
120
131
|
actor=actor,
|
121
132
|
project_id=agent_state.project_id,
|
122
133
|
)
|
@@ -129,6 +140,7 @@ async def _prepare_in_context_messages_no_persist_async(
|
|
129
140
|
agent_state: AgentState,
|
130
141
|
message_manager: MessageManager,
|
131
142
|
actor: User,
|
143
|
+
run_id: Optional[str] = None,
|
132
144
|
) -> Tuple[List[Message], List[Message]]:
|
133
145
|
"""
|
134
146
|
Prepares in-context messages for an agent, based on the current state and a new user input.
|
@@ -138,6 +150,7 @@ async def _prepare_in_context_messages_no_persist_async(
|
|
138
150
|
agent_state (AgentState): The current state of the agent, including message buffer config.
|
139
151
|
message_manager (MessageManager): The manager used to retrieve and create messages.
|
140
152
|
actor (User): The user performing the action, used for access control and attribution.
|
153
|
+
run_id (str): The run ID associated with this message processing.
|
141
154
|
|
142
155
|
Returns:
|
143
156
|
Tuple[List[Message], List[Message]]: A tuple containing:
|
@@ -173,7 +186,7 @@ async def _prepare_in_context_messages_no_persist_async(
|
|
173
186
|
|
174
187
|
# Create a new user message from the input but dont store it yet
|
175
188
|
new_in_context_messages = create_input_messages(
|
176
|
-
input_messages=input_messages, agent_id=agent_state.id, timezone=agent_state.timezone, actor=actor
|
189
|
+
input_messages=input_messages, agent_id=agent_state.id, timezone=agent_state.timezone, run_id=run_id, actor=actor
|
177
190
|
)
|
178
191
|
|
179
192
|
return current_in_context_messages, new_in_context_messages
|
@@ -232,8 +245,9 @@ def deserialize_message_history(xml_str: str) -> Tuple[List[str], str]:
|
|
232
245
|
return messages, context
|
233
246
|
|
234
247
|
|
235
|
-
def generate_step_id():
|
236
|
-
|
248
|
+
def generate_step_id(uid: Optional[UUID] = None) -> str:
|
249
|
+
uid = uid or uuid4()
|
250
|
+
return f"step-{uid}"
|
237
251
|
|
238
252
|
|
239
253
|
def _safe_load_tool_call_str(tool_call_args_str: str) -> dict:
|
@@ -254,6 +268,106 @@ def _safe_load_tool_call_str(tool_call_args_str: str) -> dict:
|
|
254
268
|
return tool_args
|
255
269
|
|
256
270
|
|
271
|
+
def _json_type_matches(value: Any, expected_type: Any) -> bool:
|
272
|
+
"""Basic JSON Schema type checking for common types.
|
273
|
+
|
274
|
+
expected_type can be a string (e.g., "string") or a list (union).
|
275
|
+
This is intentionally lightweight; deeper validation can be added as needed.
|
276
|
+
"""
|
277
|
+
|
278
|
+
def match_one(v: Any, t: str) -> bool:
|
279
|
+
if t == "string":
|
280
|
+
return isinstance(v, str)
|
281
|
+
if t == "integer":
|
282
|
+
# bool is subclass of int in Python; exclude
|
283
|
+
return isinstance(v, int) and not isinstance(v, bool)
|
284
|
+
if t == "number":
|
285
|
+
return (isinstance(v, int) and not isinstance(v, bool)) or isinstance(v, float)
|
286
|
+
if t == "boolean":
|
287
|
+
return isinstance(v, bool)
|
288
|
+
if t == "object":
|
289
|
+
return isinstance(v, dict)
|
290
|
+
if t == "array":
|
291
|
+
return isinstance(v, list)
|
292
|
+
if t == "null":
|
293
|
+
return v is None
|
294
|
+
# Fallback: don't over-reject on unknown types
|
295
|
+
return True
|
296
|
+
|
297
|
+
if isinstance(expected_type, list):
|
298
|
+
return any(match_one(value, t) for t in expected_type)
|
299
|
+
if isinstance(expected_type, str):
|
300
|
+
return match_one(value, expected_type)
|
301
|
+
return True
|
302
|
+
|
303
|
+
|
304
|
+
def _schema_accepts_value(prop_schema: Dict[str, Any], value: Any) -> bool:
|
305
|
+
"""Check if a value is acceptable for a property schema.
|
306
|
+
|
307
|
+
Handles: type, enum, const, anyOf, oneOf (by shallow traversal).
|
308
|
+
"""
|
309
|
+
if prop_schema is None:
|
310
|
+
return True
|
311
|
+
|
312
|
+
# const has highest precedence
|
313
|
+
if "const" in prop_schema:
|
314
|
+
return value == prop_schema["const"]
|
315
|
+
|
316
|
+
# enums
|
317
|
+
if "enum" in prop_schema:
|
318
|
+
try:
|
319
|
+
return value in prop_schema["enum"]
|
320
|
+
except Exception:
|
321
|
+
return False
|
322
|
+
|
323
|
+
# unions
|
324
|
+
for union_key in ("anyOf", "oneOf"):
|
325
|
+
if union_key in prop_schema and isinstance(prop_schema[union_key], list):
|
326
|
+
for sub in prop_schema[union_key]:
|
327
|
+
if _schema_accepts_value(sub, value):
|
328
|
+
return True
|
329
|
+
return False
|
330
|
+
|
331
|
+
# type-based
|
332
|
+
if "type" in prop_schema:
|
333
|
+
if not _json_type_matches(value, prop_schema["type"]):
|
334
|
+
return False
|
335
|
+
|
336
|
+
# No strict constraints specified: accept
|
337
|
+
return True
|
338
|
+
|
339
|
+
|
340
|
+
def merge_and_validate_prefilled_args(tool: "Tool", llm_args: Dict[str, Any], prefilled_args: Dict[str, Any]) -> Dict[str, Any]:
|
341
|
+
"""Merge LLM-provided args with prefilled args from tool rules.
|
342
|
+
|
343
|
+
- Overlapping keys are replaced by prefilled values (prefilled wins).
|
344
|
+
- Validates that prefilled keys exist on the tool schema and that values satisfy
|
345
|
+
basic JSON Schema constraints (type/enum/const/anyOf/oneOf).
|
346
|
+
- Returns merged args, or raises ValueError on invalid prefilled inputs.
|
347
|
+
"""
|
348
|
+
from letta.schemas.tool import Tool # local import to avoid circulars in type hints
|
349
|
+
|
350
|
+
assert isinstance(tool, Tool)
|
351
|
+
schema = (tool.json_schema or {}).get("parameters", {})
|
352
|
+
props: Dict[str, Any] = schema.get("properties", {}) if isinstance(schema, dict) else {}
|
353
|
+
|
354
|
+
errors: list[str] = []
|
355
|
+
for k, v in prefilled_args.items():
|
356
|
+
if k not in props:
|
357
|
+
errors.append(f"Unknown argument '{k}' for tool '{tool.name}'.")
|
358
|
+
continue
|
359
|
+
if not _schema_accepts_value(props.get(k), v):
|
360
|
+
expected = props.get(k, {}).get("type")
|
361
|
+
errors.append(f"Invalid value for '{k}': {v!r} does not match expected schema type {expected!r}.")
|
362
|
+
|
363
|
+
if errors:
|
364
|
+
raise ValueError("; ".join(errors))
|
365
|
+
|
366
|
+
merged = dict(llm_args or {})
|
367
|
+
merged.update(prefilled_args)
|
368
|
+
return merged
|
369
|
+
|
370
|
+
|
257
371
|
def _pop_heartbeat(tool_args: dict) -> bool:
|
258
372
|
hb = tool_args.pop("request_heartbeat", False)
|
259
373
|
return str(hb).lower() == "true" if isinstance(hb, str) else bool(hb)
|
@@ -264,3 +378,25 @@ def _build_rule_violation_result(tool_name: str, valid: list[str], solver: ToolR
|
|
264
378
|
hint_txt = ("\n** Hint: Possible rules that were violated:\n" + "\n".join(f"\t- {h}" for h in hint_lines)) if hint_lines else ""
|
265
379
|
msg = f"[ToolConstraintError] Cannot call {tool_name}, valid tools include: {valid}.{hint_txt}"
|
266
380
|
return ToolExecutionResult(status="error", func_return=msg)
|
381
|
+
|
382
|
+
|
383
|
+
def _load_last_function_response(in_context_messages: list[Message]):
|
384
|
+
"""Load the last function response from message history"""
|
385
|
+
for msg in reversed(in_context_messages):
|
386
|
+
if msg.role == MessageRole.tool and msg.content and len(msg.content) == 1 and isinstance(msg.content[0], TextContent):
|
387
|
+
text_content = msg.content[0].text
|
388
|
+
try:
|
389
|
+
response_json = json.loads(text_content)
|
390
|
+
if response_json.get("message"):
|
391
|
+
return response_json["message"]
|
392
|
+
except (json.JSONDecodeError, KeyError):
|
393
|
+
raise ValueError(f"Invalid JSON format in message: {text_content}")
|
394
|
+
return None
|
395
|
+
|
396
|
+
|
397
|
+
def _maybe_get_approval_messages(messages: list[Message]) -> Tuple[Message | None, Message | None]:
|
398
|
+
if len(messages) >= 2:
|
399
|
+
maybe_approval_request, maybe_approval_response = messages[-2], messages[-1]
|
400
|
+
if maybe_approval_request.role == "approval" and maybe_approval_response.role == "approval":
|
401
|
+
return maybe_approval_request, maybe_approval_response
|
402
|
+
return None, None
|