letta-nightly 0.9.1.dev20250731104458__py3-none-any.whl → 0.10.0.dev20250801010504__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +2 -1
- letta/agent.py +1 -1
- letta/agents/base_agent.py +2 -2
- letta/agents/letta_agent.py +22 -8
- letta/agents/letta_agent_batch.py +2 -2
- letta/agents/voice_agent.py +2 -2
- letta/client/client.py +0 -11
- letta/errors.py +11 -0
- letta/functions/function_sets/builtin.py +3 -7
- letta/functions/mcp_client/types.py +107 -1
- letta/helpers/reasoning_helper.py +48 -0
- letta/helpers/tool_execution_helper.py +2 -65
- letta/interfaces/openai_streaming_interface.py +38 -2
- letta/llm_api/anthropic_client.py +1 -5
- letta/llm_api/google_vertex_client.py +1 -1
- letta/llm_api/llm_client.py +1 -1
- letta/llm_api/openai_client.py +2 -0
- letta/llm_api/sample_response_jsons/lmstudio_embedding_list.json +3 -2
- letta/orm/agent.py +5 -0
- letta/orm/enums.py +0 -1
- letta/orm/file.py +0 -1
- letta/orm/files_agents.py +9 -9
- letta/orm/sandbox_config.py +1 -1
- letta/orm/sqlite_functions.py +15 -13
- letta/prompts/system/memgpt_generate_tool.txt +139 -0
- letta/schemas/agent.py +15 -1
- letta/schemas/enums.py +6 -0
- letta/schemas/file.py +3 -3
- letta/schemas/letta_ping.py +28 -0
- letta/schemas/letta_request.py +9 -0
- letta/schemas/letta_stop_reason.py +25 -0
- letta/schemas/llm_config.py +1 -0
- letta/schemas/mcp.py +16 -3
- letta/schemas/memory.py +5 -0
- letta/schemas/providers/lmstudio.py +7 -0
- letta/schemas/providers/ollama.py +11 -8
- letta/schemas/sandbox_config.py +17 -7
- letta/server/rest_api/app.py +2 -0
- letta/server/rest_api/routers/v1/agents.py +93 -30
- letta/server/rest_api/routers/v1/blocks.py +52 -0
- letta/server/rest_api/routers/v1/sandbox_configs.py +2 -1
- letta/server/rest_api/routers/v1/tools.py +43 -101
- letta/server/rest_api/streaming_response.py +121 -9
- letta/server/server.py +6 -10
- letta/services/agent_manager.py +41 -4
- letta/services/block_manager.py +63 -1
- letta/services/file_processor/chunker/line_chunker.py +20 -19
- letta/services/file_processor/file_processor.py +0 -2
- letta/services/file_processor/file_types.py +1 -2
- letta/services/files_agents_manager.py +46 -6
- letta/services/helpers/agent_manager_helper.py +185 -13
- letta/services/job_manager.py +4 -4
- letta/services/mcp/oauth_utils.py +6 -150
- letta/services/mcp_manager.py +120 -2
- letta/services/sandbox_config_manager.py +3 -5
- letta/services/tool_executor/builtin_tool_executor.py +13 -18
- letta/services/tool_executor/files_tool_executor.py +31 -27
- letta/services/tool_executor/mcp_tool_executor.py +10 -1
- letta/services/tool_executor/{tool_executor.py → sandbox_tool_executor.py} +14 -2
- letta/services/tool_executor/tool_execution_manager.py +1 -1
- letta/services/tool_executor/tool_execution_sandbox.py +2 -1
- letta/services/tool_manager.py +59 -21
- letta/services/tool_sandbox/base.py +18 -2
- letta/services/tool_sandbox/e2b_sandbox.py +5 -35
- letta/services/tool_sandbox/local_sandbox.py +5 -22
- letta/services/tool_sandbox/modal_sandbox.py +205 -0
- letta/settings.py +27 -8
- letta/system.py +1 -4
- letta/templates/template_helper.py +5 -0
- letta/utils.py +14 -2
- {letta_nightly-0.9.1.dev20250731104458.dist-info → letta_nightly-0.10.0.dev20250801010504.dist-info}/METADATA +7 -3
- {letta_nightly-0.9.1.dev20250731104458.dist-info → letta_nightly-0.10.0.dev20250801010504.dist-info}/RECORD +75 -72
- letta/orm/__all__.py +0 -15
- {letta_nightly-0.9.1.dev20250731104458.dist-info → letta_nightly-0.10.0.dev20250801010504.dist-info}/LICENSE +0 -0
- {letta_nightly-0.9.1.dev20250731104458.dist-info → letta_nightly-0.10.0.dev20250801010504.dist-info}/WHEEL +0 -0
- {letta_nightly-0.9.1.dev20250731104458.dist-info → letta_nightly-0.10.0.dev20250801010504.dist-info}/entry_points.txt +0 -0
letta/__init__.py
CHANGED
@@ -5,7 +5,7 @@ try:
|
|
5
5
|
__version__ = version("letta")
|
6
6
|
except PackageNotFoundError:
|
7
7
|
# Fallback for development installations
|
8
|
-
__version__ = "0.
|
8
|
+
__version__ = "0.10.0"
|
9
9
|
|
10
10
|
if os.environ.get("LETTA_VERSION"):
|
11
11
|
__version__ = os.environ["LETTA_VERSION"]
|
@@ -24,6 +24,7 @@ from letta.schemas.enums import JobStatus
|
|
24
24
|
from letta.schemas.file import FileMetadata
|
25
25
|
from letta.schemas.job import Job
|
26
26
|
from letta.schemas.letta_message import LettaMessage
|
27
|
+
from letta.schemas.letta_ping import LettaPing
|
27
28
|
from letta.schemas.letta_stop_reason import LettaStopReason
|
28
29
|
from letta.schemas.llm_config import LLMConfig
|
29
30
|
from letta.schemas.memory import ArchivalMemorySummary, BasicBlockMemory, ChatMemory, Memory, RecallMemorySummary
|
letta/agent.py
CHANGED
@@ -1298,7 +1298,7 @@ class Agent(BaseAgent):
|
|
1298
1298
|
)
|
1299
1299
|
|
1300
1300
|
async def get_context_window_async(self) -> ContextWindowOverview:
|
1301
|
-
if os.getenv("LETTA_ENVIRONMENT") == "PRODUCTION":
|
1301
|
+
if os.getenv("LETTA_ENVIRONMENT") == "PRODUCTION" and os.getenv("ANTHROPIC_API_KEY"):
|
1302
1302
|
return await self.get_context_window_from_anthropic_async()
|
1303
1303
|
return await self.get_context_window_from_tiktoken_async()
|
1304
1304
|
|
letta/agents/base_agent.py
CHANGED
@@ -17,7 +17,7 @@ from letta.schemas.message import Message, MessageCreate, MessageUpdate
|
|
17
17
|
from letta.schemas.usage import LettaUsageStatistics
|
18
18
|
from letta.schemas.user import User
|
19
19
|
from letta.services.agent_manager import AgentManager
|
20
|
-
from letta.services.helpers.agent_manager_helper import
|
20
|
+
from letta.services.helpers.agent_manager_helper import compile_system_message_async
|
21
21
|
from letta.services.message_manager import MessageManager
|
22
22
|
from letta.services.passage_manager import PassageManager
|
23
23
|
from letta.utils import united_diff
|
@@ -142,7 +142,7 @@ class BaseAgent(ABC):
|
|
142
142
|
if num_archival_memories is None:
|
143
143
|
num_archival_memories = await self.passage_manager.agent_passage_size_async(actor=self.actor, agent_id=agent_state.id)
|
144
144
|
|
145
|
-
new_system_message_str =
|
145
|
+
new_system_message_str = await compile_system_message_async(
|
146
146
|
system_prompt=agent_state.system,
|
147
147
|
in_context_memory=agent_state.memory,
|
148
148
|
in_context_memory_last_edit=memory_edit_timestamp,
|
letta/agents/letta_agent.py
CHANGED
@@ -22,6 +22,7 @@ from letta.constants import DEFAULT_MAX_STEPS, NON_USER_MSG_PREFIX
|
|
22
22
|
from letta.errors import ContextWindowExceededError
|
23
23
|
from letta.helpers import ToolRulesSolver
|
24
24
|
from letta.helpers.datetime_helpers import AsyncTimer, get_utc_time, get_utc_timestamp_ns, ns_to_ms
|
25
|
+
from letta.helpers.reasoning_helper import scrub_inner_thoughts_from_messages
|
25
26
|
from letta.helpers.tool_execution_helper import enable_strict_mode
|
26
27
|
from letta.interfaces.anthropic_streaming_interface import AnthropicStreamingInterface
|
27
28
|
from letta.interfaces.openai_streaming_interface import OpenAIStreamingInterface
|
@@ -756,6 +757,9 @@ class LettaAgent(BaseAgent):
|
|
756
757
|
interface = OpenAIStreamingInterface(
|
757
758
|
use_assistant_message=use_assistant_message,
|
758
759
|
put_inner_thoughts_in_kwarg=agent_state.llm_config.put_inner_thoughts_in_kwargs,
|
760
|
+
is_openai_proxy=agent_state.llm_config.provider_name == "lmstudio_openai",
|
761
|
+
messages=current_in_context_messages + new_in_context_messages,
|
762
|
+
tools=request_data.get("tools", []),
|
759
763
|
)
|
760
764
|
else:
|
761
765
|
raise ValueError(f"Streaming not supported for {agent_state.llm_config}")
|
@@ -781,13 +785,20 @@ class LettaAgent(BaseAgent):
|
|
781
785
|
|
782
786
|
stream_end_time_ns = get_utc_timestamp_ns()
|
783
787
|
|
784
|
-
#
|
788
|
+
# Some providers that rely on the OpenAI client currently e.g. LMStudio don't get usage metrics back on the last streaming chunk, fall back to manual values
|
789
|
+
if isinstance(interface, OpenAIStreamingInterface) and not interface.input_tokens and not interface.output_tokens:
|
790
|
+
logger.warning(
|
791
|
+
f"No token usage metrics received from OpenAI streaming interface for {agent_state.llm_config.model}, falling back to estimated values. Input tokens: {interface.fallback_input_tokens}, Output tokens: {interface.fallback_output_tokens}"
|
792
|
+
)
|
793
|
+
interface.input_tokens = interface.fallback_input_tokens
|
794
|
+
interface.output_tokens = interface.fallback_output_tokens
|
795
|
+
|
785
796
|
usage.step_count += 1
|
786
797
|
usage.completion_tokens += interface.output_tokens
|
787
798
|
usage.prompt_tokens += interface.input_tokens
|
788
799
|
usage.total_tokens += interface.input_tokens + interface.output_tokens
|
789
800
|
MetricRegistry().message_output_tokens.record(
|
790
|
-
|
801
|
+
usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
|
791
802
|
)
|
792
803
|
|
793
804
|
# log LLM request time
|
@@ -814,9 +825,9 @@ class LettaAgent(BaseAgent):
|
|
814
825
|
agent_state,
|
815
826
|
tool_rules_solver,
|
816
827
|
UsageStatistics(
|
817
|
-
completion_tokens=
|
818
|
-
prompt_tokens=
|
819
|
-
total_tokens=
|
828
|
+
completion_tokens=usage.completion_tokens,
|
829
|
+
prompt_tokens=usage.prompt_tokens,
|
830
|
+
total_tokens=usage.total_tokens,
|
820
831
|
),
|
821
832
|
reasoning_content=reasoning_content,
|
822
833
|
pre_computed_assistant_message_id=interface.letta_message_id,
|
@@ -861,8 +872,8 @@ class LettaAgent(BaseAgent):
|
|
861
872
|
# "stop_sequence": None,
|
862
873
|
"type": "message",
|
863
874
|
"usage": {
|
864
|
-
"input_tokens":
|
865
|
-
"output_tokens":
|
875
|
+
"input_tokens": usage.prompt_tokens,
|
876
|
+
"output_tokens": usage.completion_tokens,
|
866
877
|
},
|
867
878
|
},
|
868
879
|
step_id=step_id,
|
@@ -1130,7 +1141,7 @@ class LettaAgent(BaseAgent):
|
|
1130
1141
|
return new_in_context_messages
|
1131
1142
|
|
1132
1143
|
@trace_method
|
1133
|
-
async def summarize_conversation_history(self) ->
|
1144
|
+
async def summarize_conversation_history(self) -> None:
|
1134
1145
|
"""Called when the developer explicitly triggers compaction via the API"""
|
1135
1146
|
agent_state = await self.agent_manager.get_agent_by_id_async(agent_id=self.agent_id, actor=self.actor)
|
1136
1147
|
message_ids = agent_state.message_ids
|
@@ -1169,6 +1180,9 @@ class LettaAgent(BaseAgent):
|
|
1169
1180
|
tool_rules_solver=tool_rules_solver,
|
1170
1181
|
)
|
1171
1182
|
|
1183
|
+
# scrub inner thoughts from messages if reasoning is completely disabled
|
1184
|
+
in_context_messages = scrub_inner_thoughts_from_messages(in_context_messages, agent_state.llm_config)
|
1185
|
+
|
1172
1186
|
tools = [
|
1173
1187
|
t
|
1174
1188
|
for t in agent_state.tools
|
@@ -19,7 +19,7 @@ from letta.log import get_logger
|
|
19
19
|
from letta.orm.enums import ToolType
|
20
20
|
from letta.otel.tracing import log_event, trace_method
|
21
21
|
from letta.schemas.agent import AgentState
|
22
|
-
from letta.schemas.enums import AgentStepStatus, JobStatus, MessageStreamStatus, ProviderType
|
22
|
+
from letta.schemas.enums import AgentStepStatus, JobStatus, MessageStreamStatus, ProviderType, SandboxType
|
23
23
|
from letta.schemas.job import JobUpdate
|
24
24
|
from letta.schemas.letta_message import LegacyLettaMessage, LettaMessage
|
25
25
|
from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
|
@@ -28,7 +28,7 @@ from letta.schemas.letta_response import LettaBatchResponse, LettaResponse
|
|
28
28
|
from letta.schemas.llm_batch_job import AgentStepState, LLMBatchItem
|
29
29
|
from letta.schemas.message import Message, MessageCreate
|
30
30
|
from letta.schemas.openai.chat_completion_response import ToolCall as OpenAIToolCall
|
31
|
-
from letta.schemas.sandbox_config import SandboxConfig
|
31
|
+
from letta.schemas.sandbox_config import SandboxConfig
|
32
32
|
from letta.schemas.tool_execution_result import ToolExecutionResult
|
33
33
|
from letta.schemas.user import User
|
34
34
|
from letta.server.rest_api.utils import create_heartbeat_system_message, create_letta_messages_from_llm_response
|
letta/agents/voice_agent.py
CHANGED
@@ -36,7 +36,7 @@ from letta.server.rest_api.utils import (
|
|
36
36
|
)
|
37
37
|
from letta.services.agent_manager import AgentManager
|
38
38
|
from letta.services.block_manager import BlockManager
|
39
|
-
from letta.services.helpers.agent_manager_helper import
|
39
|
+
from letta.services.helpers.agent_manager_helper import compile_system_message_async
|
40
40
|
from letta.services.job_manager import JobManager
|
41
41
|
from letta.services.message_manager import MessageManager
|
42
42
|
from letta.services.passage_manager import PassageManager
|
@@ -145,7 +145,7 @@ class VoiceAgent(BaseAgent):
|
|
145
145
|
|
146
146
|
in_context_messages = await self.message_manager.get_messages_by_ids_async(message_ids=agent_state.message_ids, actor=self.actor)
|
147
147
|
memory_edit_timestamp = get_utc_time()
|
148
|
-
in_context_messages[0].content[0].text =
|
148
|
+
in_context_messages[0].content[0].text = await compile_system_message_async(
|
149
149
|
system_prompt=agent_state.system,
|
150
150
|
in_context_memory=agent_state.memory,
|
151
151
|
in_context_memory_last_edit=memory_edit_timestamp,
|
letta/client/client.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
import sys
|
2
1
|
import time
|
3
2
|
from typing import Callable, Dict, List, Optional, Union
|
4
3
|
|
@@ -33,16 +32,6 @@ from letta.schemas.tool import Tool, ToolCreate, ToolUpdate
|
|
33
32
|
from letta.schemas.tool_rule import BaseToolRule
|
34
33
|
from letta.utils import get_human_text, get_persona_text
|
35
34
|
|
36
|
-
# Print deprecation notice in yellow when module is imported
|
37
|
-
print(
|
38
|
-
"\n\n\033[93m"
|
39
|
-
+ "DEPRECATION WARNING: This legacy Python client has been deprecated and will be removed in a future release.\n"
|
40
|
-
+ "Please migrate to the new official python SDK by running: pip install letta-client\n"
|
41
|
-
+ "For further documentation, visit: https://docs.letta.com/api-reference/overview#python-sdk"
|
42
|
-
+ "\033[0m\n\n",
|
43
|
-
file=sys.stderr,
|
44
|
-
)
|
45
|
-
|
46
35
|
|
47
36
|
class AbstractClient(object):
|
48
37
|
def __init__(
|
letta/errors.py
CHANGED
@@ -49,6 +49,17 @@ class LettaToolCreateError(LettaError):
|
|
49
49
|
super().__init__(message=message or self.default_error_message)
|
50
50
|
|
51
51
|
|
52
|
+
class LettaToolNameConflictError(LettaError):
|
53
|
+
"""Error raised when a tool name already exists."""
|
54
|
+
|
55
|
+
def __init__(self, tool_name: str):
|
56
|
+
super().__init__(
|
57
|
+
message=f"Tool with name '{tool_name}' already exists in your organization",
|
58
|
+
code=ErrorCode.INVALID_ARGUMENT,
|
59
|
+
details={"tool_name": tool_name},
|
60
|
+
)
|
61
|
+
|
62
|
+
|
52
63
|
class LettaConfigurationError(LettaError):
|
53
64
|
"""Error raised when there are configuration-related issues."""
|
54
65
|
|
@@ -17,11 +17,7 @@ def run_code(code: str, language: Literal["python", "js", "ts", "r", "java"]) ->
|
|
17
17
|
raise NotImplementedError("This is only available on the latest agent architecture. Please contact the Letta team.")
|
18
18
|
|
19
19
|
|
20
|
-
async def web_search(
|
21
|
-
tasks: List[SearchTask],
|
22
|
-
limit: int = 3,
|
23
|
-
return_raw: bool = False,
|
24
|
-
) -> str:
|
20
|
+
async def web_search(tasks: List[SearchTask], limit: int = 1, return_raw: bool = True) -> str:
|
25
21
|
"""
|
26
22
|
Search the web with a list of query/question pairs and extract passages that answer the corresponding questions.
|
27
23
|
|
@@ -39,9 +35,9 @@ async def web_search(
|
|
39
35
|
|
40
36
|
Args:
|
41
37
|
tasks (List[SearchTask]): A list of search tasks, each containing a `query` and a corresponding `question`.
|
42
|
-
limit (int, optional): Maximum number of URLs to fetch and analyse per task (must be > 0). Defaults to
|
38
|
+
limit (int, optional): Maximum number of URLs to fetch and analyse per task (must be > 0). Defaults to 1.
|
43
39
|
return_raw (bool, optional): If set to True, returns the raw content of the web pages.
|
44
|
-
This should be
|
40
|
+
This should be True unless otherwise specified by the user. Defaults to True.
|
45
41
|
|
46
42
|
Returns:
|
47
43
|
str: A JSON-encoded string containing a list of search results.
|
@@ -1,5 +1,7 @@
|
|
1
|
+
import re
|
2
|
+
from abc import abstractmethod
|
1
3
|
from enum import Enum
|
2
|
-
from typing import List, Optional
|
4
|
+
from typing import Dict, List, Optional
|
3
5
|
|
4
6
|
from mcp import Tool
|
5
7
|
from pydantic import BaseModel, Field
|
@@ -7,6 +9,9 @@ from pydantic import BaseModel, Field
|
|
7
9
|
# MCP Authentication Constants
|
8
10
|
MCP_AUTH_HEADER_AUTHORIZATION = "Authorization"
|
9
11
|
MCP_AUTH_TOKEN_BEARER_PREFIX = "Bearer"
|
12
|
+
TEMPLATED_VARIABLE_REGEX = (
|
13
|
+
r"\{\{\s*([A-Z_][A-Z0-9_]*)\s*(?:\|\s*([^}]+?)\s*)?\}\}" # Allows for optional whitespace around the variable name and default value
|
14
|
+
)
|
10
15
|
|
11
16
|
|
12
17
|
class MCPTool(Tool):
|
@@ -23,6 +28,91 @@ class BaseServerConfig(BaseModel):
|
|
23
28
|
server_name: str = Field(..., description="The name of the server")
|
24
29
|
type: MCPServerType
|
25
30
|
|
31
|
+
def is_templated_tool_variable(self, value: str) -> bool:
|
32
|
+
"""
|
33
|
+
Check if string contains templated variables.
|
34
|
+
|
35
|
+
Args:
|
36
|
+
value: The value string to check
|
37
|
+
|
38
|
+
Returns:
|
39
|
+
True if the value contains templated variables in the format {{ VARIABLE_NAME }} or {{ VARIABLE_NAME | default }}, False otherwise
|
40
|
+
"""
|
41
|
+
return bool(re.search(TEMPLATED_VARIABLE_REGEX, value))
|
42
|
+
|
43
|
+
def get_tool_variable(self, value: str, environment_variables: Dict[str, str]) -> Optional[str]:
|
44
|
+
"""
|
45
|
+
Replace templated variables in a value string with their values from environment variables.
|
46
|
+
Supports fallback/default values with pipe syntax.
|
47
|
+
|
48
|
+
Args:
|
49
|
+
value: The value string that may contain templated variables (e.g., "Bearer {{ API_KEY | default_token }}")
|
50
|
+
environment_variables: Dictionary of environment variables
|
51
|
+
|
52
|
+
Returns:
|
53
|
+
The string with templated variables replaced, or None if no templated variables found
|
54
|
+
"""
|
55
|
+
|
56
|
+
# If no templated variables found or default value provided, return the original value
|
57
|
+
if not self.is_templated_tool_variable(value):
|
58
|
+
return value
|
59
|
+
|
60
|
+
def replace_template(match):
|
61
|
+
variable_name = match.group(1)
|
62
|
+
default_value = match.group(2) if match.group(2) else None
|
63
|
+
|
64
|
+
# Try to get the value from environment variables
|
65
|
+
env_value = environment_variables.get(variable_name) if environment_variables else None
|
66
|
+
|
67
|
+
# Return environment value if found, otherwise return default value, otherwise return empty string
|
68
|
+
if env_value is not None:
|
69
|
+
return env_value
|
70
|
+
elif default_value is not None:
|
71
|
+
return default_value
|
72
|
+
else:
|
73
|
+
# If no environment value and no default, return the original template
|
74
|
+
return match.group(0)
|
75
|
+
|
76
|
+
# Replace all templated variables in the token
|
77
|
+
result = re.sub(TEMPLATED_VARIABLE_REGEX, replace_template, value)
|
78
|
+
|
79
|
+
# If the result still contains unreplaced templates, just return original value
|
80
|
+
if re.search(TEMPLATED_VARIABLE_REGEX, result):
|
81
|
+
logger.warning(f"Unable to resolve templated variable in value: {value}")
|
82
|
+
return value
|
83
|
+
|
84
|
+
return result
|
85
|
+
|
86
|
+
def resolve_custom_headers(
|
87
|
+
self, custom_headers: Optional[Dict[str, str]], environment_variables: Optional[Dict[str, str]] = None
|
88
|
+
) -> Optional[Dict[str, str]]:
|
89
|
+
"""
|
90
|
+
Resolve templated variables in custom headers dictionary.
|
91
|
+
|
92
|
+
Args:
|
93
|
+
custom_headers: Dictionary of custom headers that may contain templated variables
|
94
|
+
environment_variables: Dictionary of environment variables for resolving templates
|
95
|
+
|
96
|
+
Returns:
|
97
|
+
Dictionary with resolved header values, or None if custom_headers is None
|
98
|
+
"""
|
99
|
+
if custom_headers is None:
|
100
|
+
return None
|
101
|
+
|
102
|
+
resolved_headers = {}
|
103
|
+
for key, value in custom_headers.items():
|
104
|
+
# Resolve templated variables in each header value
|
105
|
+
if self.is_templated_tool_variable(value):
|
106
|
+
resolved_headers[key] = self.get_tool_variable(value, environment_variables)
|
107
|
+
else:
|
108
|
+
resolved_headers[key] = value
|
109
|
+
|
110
|
+
return resolved_headers
|
111
|
+
|
112
|
+
@abstractmethod
|
113
|
+
def resolve_environment_variables(self, environment_variables: Optional[Dict[str, str]] = None) -> None:
|
114
|
+
raise NotImplementedError
|
115
|
+
|
26
116
|
|
27
117
|
class SSEServerConfig(BaseServerConfig):
|
28
118
|
"""
|
@@ -47,6 +137,12 @@ class SSEServerConfig(BaseServerConfig):
|
|
47
137
|
return self.auth_token[len(f"{MCP_AUTH_TOKEN_BEARER_PREFIX} ") :]
|
48
138
|
return self.auth_token
|
49
139
|
|
140
|
+
def resolve_environment_variables(self, environment_variables: Optional[Dict[str, str]] = None) -> None:
|
141
|
+
if self.auth_token and super().is_templated_tool_variable(self.auth_token):
|
142
|
+
self.auth_token = super().get_tool_variable(self.auth_token, environment_variables)
|
143
|
+
|
144
|
+
self.custom_headers = super().resolve_custom_headers(self.custom_headers, environment_variables)
|
145
|
+
|
50
146
|
def to_dict(self) -> dict:
|
51
147
|
values = {
|
52
148
|
"transport": "sse",
|
@@ -72,6 +168,10 @@ class StdioServerConfig(BaseServerConfig):
|
|
72
168
|
args: List[str] = Field(..., description="The arguments to pass to the command")
|
73
169
|
env: Optional[dict[str, str]] = Field(None, description="Environment variables to set")
|
74
170
|
|
171
|
+
# TODO: @jnjpng templated auth handling for stdio
|
172
|
+
def resolve_environment_variables(self, environment_variables: Optional[Dict[str, str]] = None) -> None:
|
173
|
+
pass
|
174
|
+
|
75
175
|
def to_dict(self) -> dict:
|
76
176
|
values = {
|
77
177
|
"transport": "stdio",
|
@@ -106,6 +206,12 @@ class StreamableHTTPServerConfig(BaseServerConfig):
|
|
106
206
|
return self.auth_token[len(f"{MCP_AUTH_TOKEN_BEARER_PREFIX} ") :]
|
107
207
|
return self.auth_token
|
108
208
|
|
209
|
+
def resolve_environment_variables(self, environment_variables: Optional[Dict[str, str]] = None) -> None:
|
210
|
+
if self.auth_token and super().is_templated_tool_variable(self.auth_token):
|
211
|
+
self.auth_token = super().get_tool_variable(self.auth_token, environment_variables)
|
212
|
+
|
213
|
+
self.custom_headers = super().resolve_custom_headers(self.custom_headers, environment_variables)
|
214
|
+
|
109
215
|
def model_post_init(self, __context) -> None:
|
110
216
|
"""Validate the server URL format."""
|
111
217
|
# Basic validation for streamable HTTP URLs
|
@@ -0,0 +1,48 @@
|
|
1
|
+
from typing import List
|
2
|
+
|
3
|
+
from letta.schemas.enums import MessageRole
|
4
|
+
from letta.schemas.letta_message_content import TextContent
|
5
|
+
from letta.schemas.llm_config import LLMConfig
|
6
|
+
from letta.schemas.message import Message
|
7
|
+
|
8
|
+
|
9
|
+
def is_reasoning_completely_disabled(llm_config: LLMConfig) -> bool:
|
10
|
+
"""
|
11
|
+
Check if reasoning is completely disabled by verifying all three conditions:
|
12
|
+
- put_inner_thoughts_in_kwargs is False
|
13
|
+
- enable_reasoner is False
|
14
|
+
- max_reasoning_tokens is 0
|
15
|
+
|
16
|
+
Args:
|
17
|
+
llm_config: The LLM configuration to check
|
18
|
+
|
19
|
+
Returns:
|
20
|
+
True if reasoning is completely disabled, False otherwise
|
21
|
+
"""
|
22
|
+
return llm_config.put_inner_thoughts_in_kwargs is False and llm_config.enable_reasoner is False and llm_config.max_reasoning_tokens == 0
|
23
|
+
|
24
|
+
|
25
|
+
def scrub_inner_thoughts_from_messages(messages: List[Message], llm_config: LLMConfig) -> List[Message]:
|
26
|
+
"""
|
27
|
+
Remove inner thoughts (reasoning text) from assistant messages when reasoning is completely disabled.
|
28
|
+
This makes the LLM think reasoning was never enabled by presenting clean message history.
|
29
|
+
|
30
|
+
Args:
|
31
|
+
messages: List of messages to potentially scrub
|
32
|
+
llm_config: The LLM configuration to check
|
33
|
+
|
34
|
+
Returns:
|
35
|
+
The message list with inner thoughts removed if reasoning is disabled, otherwise unchanged
|
36
|
+
"""
|
37
|
+
# early return if reasoning is not completely disabled
|
38
|
+
if not is_reasoning_completely_disabled(llm_config):
|
39
|
+
return messages
|
40
|
+
|
41
|
+
# process messages to remove inner thoughts from assistant messages
|
42
|
+
for message in messages:
|
43
|
+
if message.role == MessageRole.assistant and message.content and message.tool_calls:
|
44
|
+
# remove text content from assistant messages that also have tool calls
|
45
|
+
# keep only non-text content (if any)
|
46
|
+
message.content = [content for content in message.content if not isinstance(content, TextContent)]
|
47
|
+
|
48
|
+
return messages
|
@@ -1,17 +1,7 @@
|
|
1
1
|
from collections import OrderedDict
|
2
2
|
from typing import Any, Dict, Optional
|
3
3
|
|
4
|
-
from letta.constants import
|
5
|
-
from letta.functions.ast_parsers import coerce_dict_args_by_annotations, get_function_annotations_from_source
|
6
|
-
from letta.functions.composio_helpers import execute_composio_action, generate_composio_action_from_func_name
|
7
|
-
from letta.helpers.composio_helpers import get_composio_api_key
|
8
|
-
from letta.orm.enums import ToolType
|
9
|
-
from letta.schemas.agent import AgentState
|
10
|
-
from letta.schemas.sandbox_config import SandboxRunResult
|
11
|
-
from letta.schemas.tool import Tool
|
12
|
-
from letta.schemas.user import User
|
13
|
-
from letta.services.tool_executor.tool_execution_sandbox import ToolExecutionSandbox
|
14
|
-
from letta.utils import get_friendly_error_msg
|
4
|
+
from letta.constants import PRE_EXECUTION_MESSAGE_ARG
|
15
5
|
|
16
6
|
|
17
7
|
def enable_strict_mode(tool_schema: Dict[str, Any]) -> Dict[str, Any]:
|
@@ -44,6 +34,7 @@ def add_pre_execution_message(tool_schema: Dict[str, Any], description: Optional
|
|
44
34
|
|
45
35
|
Args:
|
46
36
|
tool_schema (Dict[str, Any]): The original tool schema.
|
37
|
+
description (Optional[str]): Description of the tool schema. Defaults to None.
|
47
38
|
|
48
39
|
Returns:
|
49
40
|
Dict[str, Any]: A new tool schema with the `pre_execution_message` field added at the beginning.
|
@@ -117,57 +108,3 @@ def remove_request_heartbeat(tool_schema: Dict[str, Any]) -> Dict[str, Any]:
|
|
117
108
|
schema["parameters"] = {**parameters, "properties": properties, "required": required}
|
118
109
|
|
119
110
|
return schema
|
120
|
-
|
121
|
-
|
122
|
-
# TODO: Deprecate the `execute_external_tool` function on the agent body
|
123
|
-
def execute_external_tool(
|
124
|
-
agent_state: AgentState,
|
125
|
-
function_name: str,
|
126
|
-
function_args: dict,
|
127
|
-
target_letta_tool: Tool,
|
128
|
-
actor: User,
|
129
|
-
allow_agent_state_modifications: bool = False,
|
130
|
-
) -> tuple[Any, Optional[SandboxRunResult]]:
|
131
|
-
# TODO: need to have an AgentState object that actually has full access to the block data
|
132
|
-
# this is because the sandbox tools need to be able to access block.value to edit this data
|
133
|
-
try:
|
134
|
-
if target_letta_tool.tool_type == ToolType.EXTERNAL_COMPOSIO:
|
135
|
-
action_name = generate_composio_action_from_func_name(target_letta_tool.name)
|
136
|
-
# Get entity ID from the agent_state
|
137
|
-
entity_id = None
|
138
|
-
for env_var in agent_state.tool_exec_environment_variables:
|
139
|
-
if env_var.key == COMPOSIO_ENTITY_ENV_VAR_KEY:
|
140
|
-
entity_id = env_var.value
|
141
|
-
# Get composio_api_key
|
142
|
-
composio_api_key = get_composio_api_key(actor=actor)
|
143
|
-
function_response = execute_composio_action(
|
144
|
-
action_name=action_name, args=function_args, api_key=composio_api_key, entity_id=entity_id
|
145
|
-
)
|
146
|
-
return function_response, None
|
147
|
-
elif target_letta_tool.tool_type == ToolType.CUSTOM:
|
148
|
-
# Parse the source code to extract function annotations
|
149
|
-
annotations = get_function_annotations_from_source(target_letta_tool.source_code, function_name)
|
150
|
-
# Coerce the function arguments to the correct types based on the annotations
|
151
|
-
function_args = coerce_dict_args_by_annotations(function_args, annotations)
|
152
|
-
|
153
|
-
# execute tool in a sandbox
|
154
|
-
# TODO: allow agent_state to specify which sandbox to execute tools in
|
155
|
-
# TODO: This is only temporary, can remove after we publish a pip package with this object
|
156
|
-
if allow_agent_state_modifications:
|
157
|
-
agent_state_copy = agent_state.__deepcopy__()
|
158
|
-
agent_state_copy.tools = []
|
159
|
-
agent_state_copy.tool_rules = []
|
160
|
-
else:
|
161
|
-
agent_state_copy = None
|
162
|
-
|
163
|
-
tool_execution_result = ToolExecutionSandbox(function_name, function_args, actor).run(agent_state=agent_state_copy)
|
164
|
-
function_response, updated_agent_state = tool_execution_result.func_return, tool_execution_result.agent_state
|
165
|
-
# TODO: Bring this back
|
166
|
-
# if allow_agent_state_modifications and updated_agent_state is not None:
|
167
|
-
# self.update_memory_if_changed(updated_agent_state.memory)
|
168
|
-
return function_response, tool_execution_result
|
169
|
-
except Exception as e:
|
170
|
-
# Need to catch error here, or else trunction wont happen
|
171
|
-
# TODO: modify to function execution error
|
172
|
-
function_response = get_friendly_error_msg(function_name=function_name, exception_name=type(e).__name__, exception_message=str(e))
|
173
|
-
return function_response, None
|
@@ -9,6 +9,7 @@ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
|
|
9
9
|
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
|
10
10
|
from letta.helpers.datetime_helpers import get_utc_timestamp_ns, ns_to_ms
|
11
11
|
from letta.llm_api.openai_client import is_openai_reasoning_model
|
12
|
+
from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
|
12
13
|
from letta.log import get_logger
|
13
14
|
from letta.otel.context import get_ctx_attributes
|
14
15
|
from letta.otel.metric_registry import MetricRegistry
|
@@ -19,6 +20,7 @@ from letta.schemas.message import Message
|
|
19
20
|
from letta.schemas.openai.chat_completion_response import FunctionCall, ToolCall
|
20
21
|
from letta.server.rest_api.json_parser import OptimisticJSONParser
|
21
22
|
from letta.streaming_utils import JSONInnerThoughtsExtractor
|
23
|
+
from letta.utils import count_tokens
|
22
24
|
|
23
25
|
logger = get_logger(__name__)
|
24
26
|
|
@@ -30,7 +32,14 @@ class OpenAIStreamingInterface:
|
|
30
32
|
and detection of tool call events.
|
31
33
|
"""
|
32
34
|
|
33
|
-
def __init__(
|
35
|
+
def __init__(
|
36
|
+
self,
|
37
|
+
use_assistant_message: bool = False,
|
38
|
+
put_inner_thoughts_in_kwarg: bool = False,
|
39
|
+
is_openai_proxy: bool = False,
|
40
|
+
messages: Optional[list] = None,
|
41
|
+
tools: Optional[list] = None,
|
42
|
+
):
|
34
43
|
self.use_assistant_message = use_assistant_message
|
35
44
|
self.assistant_message_tool_name = DEFAULT_MESSAGE_TOOL
|
36
45
|
self.assistant_message_tool_kwarg = DEFAULT_MESSAGE_TOOL_KWARG
|
@@ -53,10 +62,19 @@ class OpenAIStreamingInterface:
|
|
53
62
|
self.message_id = None
|
54
63
|
self.model = None
|
55
64
|
|
56
|
-
#
|
65
|
+
# Token counters (from OpenAI usage)
|
57
66
|
self.input_tokens = 0
|
58
67
|
self.output_tokens = 0
|
59
68
|
|
69
|
+
# Fallback token counters (using tiktoken cl200k-base)
|
70
|
+
self.fallback_input_tokens = 0
|
71
|
+
self.fallback_output_tokens = 0
|
72
|
+
|
73
|
+
# Store messages and tools for fallback counting
|
74
|
+
self.is_openai_proxy = is_openai_proxy
|
75
|
+
self.messages = messages or []
|
76
|
+
self.tools = tools or []
|
77
|
+
|
60
78
|
self.content_buffer: list[str] = []
|
61
79
|
self.tool_call_name: str | None = None
|
62
80
|
self.tool_call_id: str | None = None
|
@@ -95,6 +113,18 @@ class OpenAIStreamingInterface:
|
|
95
113
|
Iterates over the OpenAI stream, yielding SSE events.
|
96
114
|
It also collects tokens and detects if a tool call is triggered.
|
97
115
|
"""
|
116
|
+
# Fallback input token counting - this should only be required for non-OpenAI providers using the OpenAI client (e.g. LMStudio)
|
117
|
+
if self.is_openai_proxy:
|
118
|
+
if self.messages:
|
119
|
+
# Convert messages to dict format for token counting
|
120
|
+
message_dicts = [msg.to_openai_dict() if hasattr(msg, "to_openai_dict") else msg for msg in self.messages]
|
121
|
+
self.fallback_input_tokens = num_tokens_from_messages(message_dicts) # fallback to gpt-4 cl100k-base
|
122
|
+
|
123
|
+
if self.tools:
|
124
|
+
# Convert tools to dict format for token counting
|
125
|
+
tool_dicts = [tool["function"] if isinstance(tool, dict) and "function" in tool else tool for tool in self.tools]
|
126
|
+
self.fallback_input_tokens += num_tokens_from_functions(tool_dicts)
|
127
|
+
|
98
128
|
first_chunk = True
|
99
129
|
try:
|
100
130
|
async with stream:
|
@@ -113,6 +143,9 @@ class OpenAIStreamingInterface:
|
|
113
143
|
metric_attributes["model.name"] = chunk.model
|
114
144
|
MetricRegistry().ttft_ms_histogram.record(ns_to_ms(ttft_ns), metric_attributes)
|
115
145
|
|
146
|
+
if self.is_openai_proxy:
|
147
|
+
self.fallback_output_tokens += count_tokens(chunk.model_dump_json())
|
148
|
+
|
116
149
|
first_chunk = False
|
117
150
|
|
118
151
|
if not self.model or not self.message_id:
|
@@ -153,6 +186,9 @@ class OpenAIStreamingInterface:
|
|
153
186
|
tool_call.function.arguments
|
154
187
|
)
|
155
188
|
|
189
|
+
if self.is_openai_proxy:
|
190
|
+
self.fallback_output_tokens += count_tokens(tool_call.function.arguments)
|
191
|
+
|
156
192
|
# If we have inner thoughts, we should output them as a chunk
|
157
193
|
if updates_inner_thoughts:
|
158
194
|
if prev_message_type and prev_message_type != "reasoning_message":
|
@@ -215,11 +215,7 @@ class AnthropicClient(LLMClientBase):
|
|
215
215
|
)
|
216
216
|
llm_config.put_inner_thoughts_in_kwargs = True
|
217
217
|
else:
|
218
|
-
|
219
|
-
# tool_choice_type other than "auto" only plays nice if thinking goes inside the tool calls
|
220
|
-
tool_choice = {"type": "any", "disable_parallel_tool_use": True}
|
221
|
-
else:
|
222
|
-
tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
|
218
|
+
tool_choice = {"type": "any", "disable_parallel_tool_use": True}
|
223
219
|
tools_for_request = [OpenAITool(function=f) for f in tools] if tools is not None else None
|
224
220
|
|
225
221
|
# Add tool choice
|
@@ -102,7 +102,7 @@ class GoogleVertexClient(LLMClientBase):
|
|
102
102
|
unsupported_keys = ["default", "exclusiveMaximum", "exclusiveMinimum", "additionalProperties", "$schema"]
|
103
103
|
keys_to_remove_at_this_level = [key for key in unsupported_keys if key in schema_part]
|
104
104
|
for key_to_remove in keys_to_remove_at_this_level:
|
105
|
-
logger.
|
105
|
+
logger.debug(f"Removing unsupported keyword '{key_to_remove}' from schema part.")
|
106
106
|
del schema_part[key_to_remove]
|
107
107
|
|
108
108
|
if schema_part.get("type") == "string" and "format" in schema_part:
|
letta/llm_api/llm_client.py
CHANGED
@@ -58,7 +58,7 @@ class LLMClient:
|
|
58
58
|
put_inner_thoughts_first=put_inner_thoughts_first,
|
59
59
|
actor=actor,
|
60
60
|
)
|
61
|
-
case ProviderType.openai | ProviderType.together:
|
61
|
+
case ProviderType.openai | ProviderType.together | ProviderType.ollama:
|
62
62
|
from letta.llm_api.openai_client import OpenAIClient
|
63
63
|
|
64
64
|
return OpenAIClient(
|
letta/llm_api/openai_client.py
CHANGED