letta-nightly 0.6.48.dev20250406104033__py3-none-any.whl → 0.6.49.dev20250408030511__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/__init__.py +1 -1
- letta/agent.py +47 -12
- letta/agents/base_agent.py +7 -4
- letta/agents/helpers.py +52 -0
- letta/agents/letta_agent.py +105 -42
- letta/agents/voice_agent.py +2 -2
- letta/constants.py +13 -1
- letta/errors.py +10 -3
- letta/functions/function_sets/base.py +65 -0
- letta/functions/interface.py +2 -2
- letta/functions/mcp_client/base_client.py +18 -1
- letta/{dynamic_multi_agent.py → groups/dynamic_multi_agent.py} +3 -0
- letta/groups/helpers.py +113 -0
- letta/{round_robin_multi_agent.py → groups/round_robin_multi_agent.py} +2 -0
- letta/groups/sleeptime_multi_agent.py +259 -0
- letta/{supervisor_multi_agent.py → groups/supervisor_multi_agent.py} +1 -0
- letta/helpers/converters.py +109 -7
- letta/helpers/message_helper.py +1 -0
- letta/helpers/tool_rule_solver.py +40 -23
- letta/interface.py +12 -5
- letta/interfaces/anthropic_streaming_interface.py +329 -0
- letta/llm_api/anthropic.py +12 -1
- letta/llm_api/anthropic_client.py +65 -14
- letta/llm_api/azure_openai.py +2 -2
- letta/llm_api/google_ai_client.py +13 -2
- letta/llm_api/google_constants.py +3 -0
- letta/llm_api/google_vertex_client.py +2 -2
- letta/llm_api/llm_api_tools.py +1 -1
- letta/llm_api/llm_client.py +7 -0
- letta/llm_api/llm_client_base.py +2 -7
- letta/llm_api/openai.py +7 -1
- letta/llm_api/openai_client.py +250 -0
- letta/orm/__init__.py +4 -0
- letta/orm/agent.py +6 -0
- letta/orm/block.py +32 -2
- letta/orm/block_history.py +46 -0
- letta/orm/custom_columns.py +60 -0
- letta/orm/enums.py +7 -0
- letta/orm/group.py +6 -0
- letta/orm/groups_blocks.py +13 -0
- letta/orm/llm_batch_items.py +55 -0
- letta/orm/llm_batch_job.py +48 -0
- letta/orm/message.py +7 -1
- letta/orm/organization.py +2 -0
- letta/orm/sqlalchemy_base.py +18 -15
- letta/prompts/system/memgpt_sleeptime_chat.txt +52 -0
- letta/prompts/system/sleeptime.txt +26 -0
- letta/schemas/agent.py +13 -1
- letta/schemas/enums.py +17 -2
- letta/schemas/group.py +14 -1
- letta/schemas/letta_message.py +5 -3
- letta/schemas/llm_batch_job.py +53 -0
- letta/schemas/llm_config.py +14 -4
- letta/schemas/message.py +44 -0
- letta/schemas/tool.py +3 -0
- letta/schemas/usage.py +1 -0
- letta/server/db.py +2 -0
- letta/server/rest_api/app.py +1 -1
- letta/server/rest_api/chat_completions_interface.py +8 -3
- letta/server/rest_api/interface.py +36 -7
- letta/server/rest_api/routers/v1/agents.py +53 -39
- letta/server/rest_api/routers/v1/runs.py +14 -2
- letta/server/rest_api/utils.py +15 -4
- letta/server/server.py +120 -71
- letta/services/agent_manager.py +70 -6
- letta/services/block_manager.py +190 -2
- letta/services/group_manager.py +68 -0
- letta/services/helpers/agent_manager_helper.py +6 -4
- letta/services/llm_batch_manager.py +139 -0
- letta/services/message_manager.py +17 -31
- letta/services/tool_executor/tool_execution_sandbox.py +1 -3
- letta/services/tool_executor/tool_executor.py +9 -20
- letta/services/tool_manager.py +14 -3
- letta/services/tool_sandbox/__init__.py +0 -0
- letta/services/tool_sandbox/base.py +188 -0
- letta/services/tool_sandbox/e2b_sandbox.py +116 -0
- letta/services/tool_sandbox/local_sandbox.py +221 -0
- letta/sleeptime_agent.py +61 -0
- letta/streaming_interface.py +20 -10
- letta/utils.py +4 -0
- {letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/METADATA +2 -2
- {letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/RECORD +85 -69
- letta/offline_memory_agent.py +0 -173
- letta/services/tool_executor/async_tool_execution_sandbox.py +0 -397
- {letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/LICENSE +0 -0
- {letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/WHEEL +0 -0
- {letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/entry_points.txt +0 -0
|
@@ -38,29 +38,46 @@ class ToolRulesSolver(BaseModel):
|
|
|
38
38
|
)
|
|
39
39
|
tool_call_history: List[str] = Field(default_factory=list, description="History of tool calls, updated with each tool call.")
|
|
40
40
|
|
|
41
|
-
def __init__(
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
41
|
+
def __init__(
|
|
42
|
+
self,
|
|
43
|
+
tool_rules: Optional[List[BaseToolRule]] = None,
|
|
44
|
+
init_tool_rules: Optional[List[InitToolRule]] = None,
|
|
45
|
+
continue_tool_rules: Optional[List[ContinueToolRule]] = None,
|
|
46
|
+
child_based_tool_rules: Optional[List[Union[ChildToolRule, ConditionalToolRule, MaxCountPerStepToolRule]]] = None,
|
|
47
|
+
terminal_tool_rules: Optional[List[TerminalToolRule]] = None,
|
|
48
|
+
tool_call_history: Optional[List[str]] = None,
|
|
49
|
+
**kwargs,
|
|
50
|
+
):
|
|
51
|
+
super().__init__(
|
|
52
|
+
init_tool_rules=init_tool_rules or [],
|
|
53
|
+
continue_tool_rules=continue_tool_rules or [],
|
|
54
|
+
child_based_tool_rules=child_based_tool_rules or [],
|
|
55
|
+
terminal_tool_rules=terminal_tool_rules or [],
|
|
56
|
+
tool_call_history=tool_call_history or [],
|
|
57
|
+
**kwargs,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
if tool_rules:
|
|
61
|
+
for rule in tool_rules:
|
|
62
|
+
if rule.type == ToolRuleType.run_first:
|
|
63
|
+
assert isinstance(rule, InitToolRule)
|
|
64
|
+
self.init_tool_rules.append(rule)
|
|
65
|
+
elif rule.type == ToolRuleType.constrain_child_tools:
|
|
66
|
+
assert isinstance(rule, ChildToolRule)
|
|
67
|
+
self.child_based_tool_rules.append(rule)
|
|
68
|
+
elif rule.type == ToolRuleType.conditional:
|
|
69
|
+
assert isinstance(rule, ConditionalToolRule)
|
|
70
|
+
self.validate_conditional_tool(rule)
|
|
71
|
+
self.child_based_tool_rules.append(rule)
|
|
72
|
+
elif rule.type == ToolRuleType.exit_loop:
|
|
73
|
+
assert isinstance(rule, TerminalToolRule)
|
|
74
|
+
self.terminal_tool_rules.append(rule)
|
|
75
|
+
elif rule.type == ToolRuleType.continue_loop:
|
|
76
|
+
assert isinstance(rule, ContinueToolRule)
|
|
77
|
+
self.continue_tool_rules.append(rule)
|
|
78
|
+
elif rule.type == ToolRuleType.max_count_per_step:
|
|
79
|
+
assert isinstance(rule, MaxCountPerStepToolRule)
|
|
80
|
+
self.child_based_tool_rules.append(rule)
|
|
64
81
|
|
|
65
82
|
def register_tool_call(self, tool_name: str):
|
|
66
83
|
"""Update the internal state to track tool call history."""
|
letta/interface.py
CHANGED
|
@@ -30,7 +30,7 @@ class AgentInterface(ABC):
|
|
|
30
30
|
raise NotImplementedError
|
|
31
31
|
|
|
32
32
|
@abstractmethod
|
|
33
|
-
def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None):
|
|
33
|
+
def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None, chunk_index: Optional[int] = None):
|
|
34
34
|
"""Letta generates some internal monologue"""
|
|
35
35
|
raise NotImplementedError
|
|
36
36
|
|
|
@@ -40,7 +40,7 @@ class AgentInterface(ABC):
|
|
|
40
40
|
raise NotImplementedError
|
|
41
41
|
|
|
42
42
|
@abstractmethod
|
|
43
|
-
def function_message(self, msg: str, msg_obj: Optional[Message] = None):
|
|
43
|
+
def function_message(self, msg: str, msg_obj: Optional[Message] = None, chunk_index: Optional[int] = None):
|
|
44
44
|
"""Letta calls a function"""
|
|
45
45
|
raise NotImplementedError
|
|
46
46
|
|
|
@@ -79,7 +79,7 @@ class CLIInterface(AgentInterface):
|
|
|
79
79
|
print(fstr.format(msg=msg))
|
|
80
80
|
|
|
81
81
|
@staticmethod
|
|
82
|
-
def internal_monologue(msg: str, msg_obj: Optional[Message] = None):
|
|
82
|
+
def internal_monologue(msg: str, msg_obj: Optional[Message] = None, chunk_index: Optional[int] = None):
|
|
83
83
|
# ANSI escape code for italic is '\x1B[3m'
|
|
84
84
|
fstr = f"\x1B[3m{Fore.LIGHTBLACK_EX}{INNER_THOUGHTS_CLI_SYMBOL} {{msg}}{Style.RESET_ALL}"
|
|
85
85
|
if STRIP_UI:
|
|
@@ -108,7 +108,14 @@ class CLIInterface(AgentInterface):
|
|
|
108
108
|
print(fstr.format(msg=msg))
|
|
109
109
|
|
|
110
110
|
@staticmethod
|
|
111
|
-
def user_message(
|
|
111
|
+
def user_message(
|
|
112
|
+
msg: str,
|
|
113
|
+
msg_obj: Optional[Message] = None,
|
|
114
|
+
raw: bool = False,
|
|
115
|
+
dump: bool = False,
|
|
116
|
+
debug: bool = DEBUG,
|
|
117
|
+
chunk_index: Optional[int] = None,
|
|
118
|
+
):
|
|
112
119
|
def print_user_message(icon, msg, printf=print):
|
|
113
120
|
if STRIP_UI:
|
|
114
121
|
printf(f"{icon} {msg}")
|
|
@@ -154,7 +161,7 @@ class CLIInterface(AgentInterface):
|
|
|
154
161
|
printd_user_message("🧑", msg_json)
|
|
155
162
|
|
|
156
163
|
@staticmethod
|
|
157
|
-
def function_message(msg: str, msg_obj: Optional[Message] = None, debug: bool = DEBUG):
|
|
164
|
+
def function_message(msg: str, msg_obj: Optional[Message] = None, debug: bool = DEBUG, chunk_index: Optional[int] = None):
|
|
158
165
|
def print_function_message(icon, msg, color=Fore.RED, printf=print):
|
|
159
166
|
if STRIP_UI:
|
|
160
167
|
printf(f"⚡{icon} [function] {msg}")
|
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
from datetime import datetime, timezone
|
|
2
|
+
from enum import Enum
|
|
3
|
+
from typing import AsyncGenerator, List, Union
|
|
4
|
+
|
|
5
|
+
from anthropic import AsyncStream
|
|
6
|
+
from anthropic.types.beta import (
|
|
7
|
+
BetaInputJSONDelta,
|
|
8
|
+
BetaRawContentBlockDeltaEvent,
|
|
9
|
+
BetaRawContentBlockStartEvent,
|
|
10
|
+
BetaRawContentBlockStopEvent,
|
|
11
|
+
BetaRawMessageDeltaEvent,
|
|
12
|
+
BetaRawMessageStartEvent,
|
|
13
|
+
BetaRawMessageStopEvent,
|
|
14
|
+
BetaRawMessageStreamEvent,
|
|
15
|
+
BetaRedactedThinkingBlock,
|
|
16
|
+
BetaSignatureDelta,
|
|
17
|
+
BetaTextBlock,
|
|
18
|
+
BetaTextDelta,
|
|
19
|
+
BetaThinkingBlock,
|
|
20
|
+
BetaThinkingDelta,
|
|
21
|
+
BetaToolUseBlock,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
|
|
25
|
+
from letta.local_llm.constants import INNER_THOUGHTS_KWARG
|
|
26
|
+
from letta.log import get_logger
|
|
27
|
+
from letta.schemas.letta_message import (
|
|
28
|
+
AssistantMessage,
|
|
29
|
+
HiddenReasoningMessage,
|
|
30
|
+
LettaMessage,
|
|
31
|
+
ReasoningMessage,
|
|
32
|
+
ToolCallDelta,
|
|
33
|
+
ToolCallMessage,
|
|
34
|
+
)
|
|
35
|
+
from letta.schemas.letta_message_content import ReasoningContent, RedactedReasoningContent, TextContent
|
|
36
|
+
from letta.schemas.message import Message
|
|
37
|
+
from letta.schemas.openai.chat_completion_response import FunctionCall, ToolCall
|
|
38
|
+
from letta.server.rest_api.optimistic_json_parser import OptimisticJSONParser
|
|
39
|
+
|
|
40
|
+
logger = get_logger(__name__)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# TODO: These modes aren't used right now - but can be useful we do multiple sequential tool calling within one Claude message
|
|
44
|
+
class EventMode(Enum):
|
|
45
|
+
TEXT = "TEXT"
|
|
46
|
+
TOOL_USE = "TOOL_USE"
|
|
47
|
+
THINKING = "THINKING"
|
|
48
|
+
REDACTED_THINKING = "REDACTED_THINKING"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class AnthropicStreamingInterface:
|
|
52
|
+
"""
|
|
53
|
+
Encapsulates the logic for streaming responses from Anthropic.
|
|
54
|
+
This class handles parsing of partial tokens, pre-execution messages,
|
|
55
|
+
and detection of tool call events.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
def __init__(self, use_assistant_message: bool = False, put_inner_thoughts_in_kwarg: bool = False):
|
|
59
|
+
self.optimistic_json_parser: OptimisticJSONParser = OptimisticJSONParser()
|
|
60
|
+
self.use_assistant_message = use_assistant_message
|
|
61
|
+
|
|
62
|
+
# Premake IDs for database writes
|
|
63
|
+
self.letta_assistant_message_id = Message.generate_id()
|
|
64
|
+
self.letta_tool_message_id = Message.generate_id()
|
|
65
|
+
|
|
66
|
+
self.anthropic_mode = None
|
|
67
|
+
self.message_id = None
|
|
68
|
+
self.accumulated_inner_thoughts = []
|
|
69
|
+
self.tool_call_id = None
|
|
70
|
+
self.tool_call_name = None
|
|
71
|
+
self.accumulated_tool_call_args = []
|
|
72
|
+
self.previous_parse = {}
|
|
73
|
+
|
|
74
|
+
# usage trackers
|
|
75
|
+
self.input_tokens = 0
|
|
76
|
+
self.output_tokens = 0
|
|
77
|
+
|
|
78
|
+
# reasoning object trackers
|
|
79
|
+
self.reasoning_messages = []
|
|
80
|
+
|
|
81
|
+
# Buffer to hold tool call messages until inner thoughts are complete
|
|
82
|
+
self.tool_call_buffer = []
|
|
83
|
+
self.inner_thoughts_complete = False
|
|
84
|
+
self.put_inner_thoughts_in_kwarg = put_inner_thoughts_in_kwarg
|
|
85
|
+
|
|
86
|
+
def get_tool_call_object(self) -> ToolCall:
|
|
87
|
+
"""Useful for agent loop"""
|
|
88
|
+
return ToolCall(
|
|
89
|
+
id=self.tool_call_id, function=FunctionCall(arguments="".join(self.accumulated_tool_call_args), name=self.tool_call_name)
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
def _check_inner_thoughts_complete(self, combined_args: str) -> bool:
|
|
93
|
+
"""
|
|
94
|
+
Check if inner thoughts are complete in the current tool call arguments
|
|
95
|
+
by looking for a closing quote after the inner_thoughts field
|
|
96
|
+
"""
|
|
97
|
+
if not self.put_inner_thoughts_in_kwarg:
|
|
98
|
+
# None of the things should have inner thoughts in kwargs
|
|
99
|
+
return True
|
|
100
|
+
else:
|
|
101
|
+
parsed = self.optimistic_json_parser.parse(combined_args)
|
|
102
|
+
# TODO: This will break on tools with 0 input
|
|
103
|
+
return len(parsed.keys()) > 1 and INNER_THOUGHTS_KWARG in parsed.keys()
|
|
104
|
+
|
|
105
|
+
async def process(self, stream: AsyncStream[BetaRawMessageStreamEvent]) -> AsyncGenerator[LettaMessage, None]:
|
|
106
|
+
async with stream:
|
|
107
|
+
async for event in stream:
|
|
108
|
+
# TODO: Support BetaThinkingBlock, BetaRedactedThinkingBlock
|
|
109
|
+
if isinstance(event, BetaRawContentBlockStartEvent):
|
|
110
|
+
content = event.content_block
|
|
111
|
+
|
|
112
|
+
if isinstance(content, BetaTextBlock):
|
|
113
|
+
self.anthropic_mode = EventMode.TEXT
|
|
114
|
+
# TODO: Can capture citations, etc.
|
|
115
|
+
elif isinstance(content, BetaToolUseBlock):
|
|
116
|
+
self.anthropic_mode = EventMode.TOOL_USE
|
|
117
|
+
self.tool_call_id = content.id
|
|
118
|
+
self.tool_call_name = content.name
|
|
119
|
+
self.inner_thoughts_complete = False
|
|
120
|
+
|
|
121
|
+
if not self.use_assistant_message:
|
|
122
|
+
# Buffer the initial tool call message instead of yielding immediately
|
|
123
|
+
tool_call_msg = ToolCallMessage(
|
|
124
|
+
id=self.letta_tool_message_id,
|
|
125
|
+
tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id),
|
|
126
|
+
date=datetime.now(timezone.utc).isoformat(),
|
|
127
|
+
)
|
|
128
|
+
self.tool_call_buffer.append(tool_call_msg)
|
|
129
|
+
elif isinstance(content, BetaThinkingBlock):
|
|
130
|
+
self.anthropic_mode = EventMode.THINKING
|
|
131
|
+
# TODO: Can capture signature, etc.
|
|
132
|
+
elif isinstance(content, BetaRedactedThinkingBlock):
|
|
133
|
+
self.anthropic_mode = EventMode.REDACTED_THINKING
|
|
134
|
+
|
|
135
|
+
hidden_reasoning_message = HiddenReasoningMessage(
|
|
136
|
+
id=self.letta_assistant_message_id,
|
|
137
|
+
state="redacted",
|
|
138
|
+
hidden_reasoning=content.data,
|
|
139
|
+
date=datetime.now(timezone.utc).isoformat(),
|
|
140
|
+
)
|
|
141
|
+
self.reasoning_messages.append(hidden_reasoning_message)
|
|
142
|
+
yield hidden_reasoning_message
|
|
143
|
+
|
|
144
|
+
elif isinstance(event, BetaRawContentBlockDeltaEvent):
|
|
145
|
+
delta = event.delta
|
|
146
|
+
|
|
147
|
+
if isinstance(delta, BetaTextDelta):
|
|
148
|
+
# Safety check
|
|
149
|
+
if not self.anthropic_mode == EventMode.TEXT:
|
|
150
|
+
raise RuntimeError(
|
|
151
|
+
f"Streaming integrity failed - received BetaTextDelta object while not in TEXT EventMode: {delta}"
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
# TODO: Strip out </thinking> more robustly, this is pretty hacky lol
|
|
155
|
+
delta.text = delta.text.replace("</thinking>", "")
|
|
156
|
+
self.accumulated_inner_thoughts.append(delta.text)
|
|
157
|
+
|
|
158
|
+
reasoning_message = ReasoningMessage(
|
|
159
|
+
id=self.letta_assistant_message_id,
|
|
160
|
+
reasoning=self.accumulated_inner_thoughts[-1],
|
|
161
|
+
date=datetime.now(timezone.utc).isoformat(),
|
|
162
|
+
)
|
|
163
|
+
self.reasoning_messages.append(reasoning_message)
|
|
164
|
+
yield reasoning_message
|
|
165
|
+
|
|
166
|
+
elif isinstance(delta, BetaInputJSONDelta):
|
|
167
|
+
if not self.anthropic_mode == EventMode.TOOL_USE:
|
|
168
|
+
raise RuntimeError(
|
|
169
|
+
f"Streaming integrity failed - received BetaInputJSONDelta object while not in TOOL_USE EventMode: {delta}"
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
self.accumulated_tool_call_args.append(delta.partial_json)
|
|
173
|
+
combined_args = "".join(self.accumulated_tool_call_args)
|
|
174
|
+
current_parsed = self.optimistic_json_parser.parse(combined_args)
|
|
175
|
+
|
|
176
|
+
# Start detecting a difference in inner thoughts
|
|
177
|
+
previous_inner_thoughts = self.previous_parse.get(INNER_THOUGHTS_KWARG, "")
|
|
178
|
+
current_inner_thoughts = current_parsed.get(INNER_THOUGHTS_KWARG, "")
|
|
179
|
+
inner_thoughts_diff = current_inner_thoughts[len(previous_inner_thoughts) :]
|
|
180
|
+
|
|
181
|
+
if inner_thoughts_diff:
|
|
182
|
+
reasoning_message = ReasoningMessage(
|
|
183
|
+
id=self.letta_assistant_message_id,
|
|
184
|
+
reasoning=inner_thoughts_diff,
|
|
185
|
+
date=datetime.now(timezone.utc).isoformat(),
|
|
186
|
+
)
|
|
187
|
+
self.reasoning_messages.append(reasoning_message)
|
|
188
|
+
yield reasoning_message
|
|
189
|
+
|
|
190
|
+
# Check if inner thoughts are complete - if so, flush the buffer
|
|
191
|
+
if not self.inner_thoughts_complete and self._check_inner_thoughts_complete(combined_args):
|
|
192
|
+
self.inner_thoughts_complete = True
|
|
193
|
+
# Flush all buffered tool call messages
|
|
194
|
+
for buffered_msg in self.tool_call_buffer:
|
|
195
|
+
yield buffered_msg
|
|
196
|
+
self.tool_call_buffer = []
|
|
197
|
+
|
|
198
|
+
# Start detecting special case of "send_message"
|
|
199
|
+
if self.tool_call_name == DEFAULT_MESSAGE_TOOL and self.use_assistant_message:
|
|
200
|
+
previous_send_message = self.previous_parse.get(DEFAULT_MESSAGE_TOOL_KWARG, "")
|
|
201
|
+
current_send_message = current_parsed.get(DEFAULT_MESSAGE_TOOL_KWARG, "")
|
|
202
|
+
send_message_diff = current_send_message[len(previous_send_message) :]
|
|
203
|
+
|
|
204
|
+
# Only stream out if it's not an empty string
|
|
205
|
+
if send_message_diff:
|
|
206
|
+
yield AssistantMessage(
|
|
207
|
+
id=self.letta_assistant_message_id,
|
|
208
|
+
content=[TextContent(text=send_message_diff)],
|
|
209
|
+
date=datetime.now(timezone.utc).isoformat(),
|
|
210
|
+
)
|
|
211
|
+
else:
|
|
212
|
+
# Otherwise, it is a normal tool call - buffer or yield based on inner thoughts status
|
|
213
|
+
tool_call_msg = ToolCallMessage(
|
|
214
|
+
id=self.letta_tool_message_id,
|
|
215
|
+
tool_call=ToolCallDelta(arguments=delta.partial_json),
|
|
216
|
+
date=datetime.now(timezone.utc).isoformat(),
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
if self.inner_thoughts_complete:
|
|
220
|
+
yield tool_call_msg
|
|
221
|
+
else:
|
|
222
|
+
self.tool_call_buffer.append(tool_call_msg)
|
|
223
|
+
|
|
224
|
+
# Set previous parse
|
|
225
|
+
self.previous_parse = current_parsed
|
|
226
|
+
elif isinstance(delta, BetaThinkingDelta):
|
|
227
|
+
# Safety check
|
|
228
|
+
if not self.anthropic_mode == EventMode.THINKING:
|
|
229
|
+
raise RuntimeError(
|
|
230
|
+
f"Streaming integrity failed - received BetaThinkingBlock object while not in THINKING EventMode: {delta}"
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
reasoning_message = ReasoningMessage(
|
|
234
|
+
id=self.letta_assistant_message_id,
|
|
235
|
+
source="reasoner_model",
|
|
236
|
+
reasoning=delta.thinking,
|
|
237
|
+
date=datetime.now(timezone.utc).isoformat(),
|
|
238
|
+
)
|
|
239
|
+
self.reasoning_messages.append(reasoning_message)
|
|
240
|
+
yield reasoning_message
|
|
241
|
+
elif isinstance(delta, BetaSignatureDelta):
|
|
242
|
+
# Safety check
|
|
243
|
+
if not self.anthropic_mode == EventMode.THINKING:
|
|
244
|
+
raise RuntimeError(
|
|
245
|
+
f"Streaming integrity failed - received BetaSignatureDelta object while not in THINKING EventMode: {delta}"
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
reasoning_message = ReasoningMessage(
|
|
249
|
+
id=self.letta_assistant_message_id,
|
|
250
|
+
source="reasoner_model",
|
|
251
|
+
reasoning="",
|
|
252
|
+
date=datetime.now(timezone.utc).isoformat(),
|
|
253
|
+
signature=delta.signature,
|
|
254
|
+
)
|
|
255
|
+
self.reasoning_messages.append(reasoning_message)
|
|
256
|
+
yield reasoning_message
|
|
257
|
+
elif isinstance(event, BetaRawMessageStartEvent):
|
|
258
|
+
self.message_id = event.message.id
|
|
259
|
+
self.input_tokens += event.message.usage.input_tokens
|
|
260
|
+
self.output_tokens += event.message.usage.output_tokens
|
|
261
|
+
elif isinstance(event, BetaRawMessageDeltaEvent):
|
|
262
|
+
self.output_tokens += event.usage.output_tokens
|
|
263
|
+
elif isinstance(event, BetaRawMessageStopEvent):
|
|
264
|
+
# Don't do anything here! We don't want to stop the stream.
|
|
265
|
+
pass
|
|
266
|
+
elif isinstance(event, BetaRawContentBlockStopEvent):
|
|
267
|
+
# If we're exiting a tool use block and there are still buffered messages,
|
|
268
|
+
# we should flush them now
|
|
269
|
+
if self.anthropic_mode == EventMode.TOOL_USE and self.tool_call_buffer:
|
|
270
|
+
for buffered_msg in self.tool_call_buffer:
|
|
271
|
+
yield buffered_msg
|
|
272
|
+
self.tool_call_buffer = []
|
|
273
|
+
|
|
274
|
+
self.anthropic_mode = None
|
|
275
|
+
|
|
276
|
+
def get_reasoning_content(self) -> List[Union[TextContent, ReasoningContent, RedactedReasoningContent]]:
|
|
277
|
+
def _process_group(
|
|
278
|
+
group: List[Union[ReasoningMessage, HiddenReasoningMessage]], group_type: str
|
|
279
|
+
) -> Union[TextContent, ReasoningContent, RedactedReasoningContent]:
|
|
280
|
+
if group_type == "reasoning":
|
|
281
|
+
reasoning_text = "".join(chunk.reasoning for chunk in group)
|
|
282
|
+
is_native = any(chunk.source == "reasoner_model" for chunk in group)
|
|
283
|
+
signature = next((chunk.signature for chunk in group if chunk.signature is not None), None)
|
|
284
|
+
if is_native:
|
|
285
|
+
return ReasoningContent(is_native=is_native, reasoning=reasoning_text, signature=signature)
|
|
286
|
+
else:
|
|
287
|
+
return TextContent(text=reasoning_text)
|
|
288
|
+
elif group_type == "redacted":
|
|
289
|
+
redacted_text = "".join(chunk.hidden_reasoning for chunk in group if chunk.hidden_reasoning is not None)
|
|
290
|
+
return RedactedReasoningContent(data=redacted_text)
|
|
291
|
+
else:
|
|
292
|
+
raise ValueError("Unexpected group type")
|
|
293
|
+
|
|
294
|
+
merged = []
|
|
295
|
+
current_group = []
|
|
296
|
+
current_group_type = None # "reasoning" or "redacted"
|
|
297
|
+
|
|
298
|
+
for msg in self.reasoning_messages:
|
|
299
|
+
# Determine the type of the current message
|
|
300
|
+
if isinstance(msg, HiddenReasoningMessage):
|
|
301
|
+
msg_type = "redacted"
|
|
302
|
+
elif isinstance(msg, ReasoningMessage):
|
|
303
|
+
msg_type = "reasoning"
|
|
304
|
+
else:
|
|
305
|
+
raise ValueError("Unexpected message type")
|
|
306
|
+
|
|
307
|
+
# Initialize group type if not set
|
|
308
|
+
if current_group_type is None:
|
|
309
|
+
current_group_type = msg_type
|
|
310
|
+
|
|
311
|
+
# If the type changes, process the current group
|
|
312
|
+
if msg_type != current_group_type:
|
|
313
|
+
merged.append(_process_group(current_group, current_group_type))
|
|
314
|
+
current_group = []
|
|
315
|
+
current_group_type = msg_type
|
|
316
|
+
|
|
317
|
+
current_group.append(msg)
|
|
318
|
+
|
|
319
|
+
# Process the final group, if any.
|
|
320
|
+
if current_group:
|
|
321
|
+
merged.append(_process_group(current_group, current_group_type))
|
|
322
|
+
|
|
323
|
+
# Strip out XML from any text content fields
|
|
324
|
+
for content in merged:
|
|
325
|
+
if isinstance(content, TextContent) and content.text.endswith("</thinking>"):
|
|
326
|
+
cutoff = len(content.text) - len("</thinking>")
|
|
327
|
+
content.text = content.text[:cutoff]
|
|
328
|
+
|
|
329
|
+
return merged
|
letta/llm_api/anthropic.py
CHANGED
|
@@ -930,6 +930,8 @@ def anthropic_chat_completions_process_stream(
|
|
|
930
930
|
stream_interface.stream_start()
|
|
931
931
|
|
|
932
932
|
completion_tokens = 0
|
|
933
|
+
prev_message_type = None
|
|
934
|
+
message_idx = 0
|
|
933
935
|
try:
|
|
934
936
|
for chunk_idx, chat_completion_chunk in enumerate(
|
|
935
937
|
anthropic_chat_completions_request_stream(
|
|
@@ -945,7 +947,7 @@ def anthropic_chat_completions_process_stream(
|
|
|
945
947
|
|
|
946
948
|
if stream_interface:
|
|
947
949
|
if isinstance(stream_interface, AgentChunkStreamingInterface):
|
|
948
|
-
stream_interface.process_chunk(
|
|
950
|
+
message_type = stream_interface.process_chunk(
|
|
949
951
|
chat_completion_chunk,
|
|
950
952
|
message_id=chat_completion_response.id if create_message_id else chat_completion_chunk.id,
|
|
951
953
|
message_date=chat_completion_response.created if create_message_datetime else chat_completion_chunk.created,
|
|
@@ -953,7 +955,11 @@ def anthropic_chat_completions_process_stream(
|
|
|
953
955
|
# TODO handle emitting redacted reasoning content (e.g. as concat?)
|
|
954
956
|
expect_reasoning_content=extended_thinking,
|
|
955
957
|
name=name,
|
|
958
|
+
message_index=message_idx,
|
|
956
959
|
)
|
|
960
|
+
if message_type != prev_message_type and message_type is not None:
|
|
961
|
+
message_idx += 1
|
|
962
|
+
prev_message_type = message_type
|
|
957
963
|
elif isinstance(stream_interface, AgentRefreshStreamingInterface):
|
|
958
964
|
stream_interface.process_refresh(chat_completion_response)
|
|
959
965
|
else:
|
|
@@ -1107,4 +1113,9 @@ def anthropic_chat_completions_process_stream(
|
|
|
1107
1113
|
|
|
1108
1114
|
log_event(name="llm_response_received", attributes=chat_completion_response.model_dump())
|
|
1109
1115
|
|
|
1116
|
+
for choice in chat_completion_response.choices:
|
|
1117
|
+
if choice.message.content is not None:
|
|
1118
|
+
choice.message.content = choice.message.content.replace(f"<{inner_thoughts_xml_tag}>", "")
|
|
1119
|
+
choice.message.content = choice.message.content.replace(f"</{inner_thoughts_xml_tag}>", "")
|
|
1120
|
+
|
|
1110
1121
|
return chat_completion_response
|
|
@@ -1,9 +1,14 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import re
|
|
3
|
-
from typing import List, Optional, Union
|
|
3
|
+
from typing import Dict, List, Optional, Union
|
|
4
4
|
|
|
5
5
|
import anthropic
|
|
6
|
+
from anthropic import AsyncStream
|
|
6
7
|
from anthropic.types import Message as AnthropicMessage
|
|
8
|
+
from anthropic.types.beta import BetaRawMessageStreamEvent
|
|
9
|
+
from anthropic.types.beta.message_create_params import MessageCreateParamsNonStreaming
|
|
10
|
+
from anthropic.types.beta.messages import BetaMessageBatch
|
|
11
|
+
from anthropic.types.beta.messages.batch_create_params import Request
|
|
7
12
|
|
|
8
13
|
from letta.errors import (
|
|
9
14
|
ContextWindowExceededError,
|
|
@@ -28,6 +33,7 @@ from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
|
|
28
33
|
from letta.schemas.openai.chat_completion_response import Message as ChoiceMessage
|
|
29
34
|
from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
|
|
30
35
|
from letta.services.provider_manager import ProviderManager
|
|
36
|
+
from letta.tracing import trace_method
|
|
31
37
|
|
|
32
38
|
DUMMY_FIRST_USER_MESSAGE = "User initializing bootup sequence."
|
|
33
39
|
|
|
@@ -46,19 +52,49 @@ class AnthropicClient(LLMClientBase):
|
|
|
46
52
|
response = await client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
|
|
47
53
|
return response.model_dump()
|
|
48
54
|
|
|
55
|
+
@trace_method
|
|
56
|
+
async def stream_async(self, request_data: dict) -> AsyncStream[BetaRawMessageStreamEvent]:
|
|
57
|
+
client = self._get_anthropic_client(async_client=True)
|
|
58
|
+
request_data["stream"] = True
|
|
59
|
+
return await client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
|
|
60
|
+
|
|
61
|
+
@trace_method
|
|
62
|
+
async def batch_async(self, requests: Dict[str, dict]) -> BetaMessageBatch:
|
|
63
|
+
"""
|
|
64
|
+
Send a batch of requests to the Anthropic API asynchronously.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
requests (Dict[str, dict]): A mapping from custom_id to request parameter dicts.
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
List[dict]: A list of response dictionaries corresponding to each request.
|
|
71
|
+
"""
|
|
72
|
+
client = self._get_anthropic_client(async_client=True)
|
|
73
|
+
|
|
74
|
+
anthropic_requests = [
|
|
75
|
+
Request(custom_id=custom_id, params=MessageCreateParamsNonStreaming(**params)) for custom_id, params in requests.items()
|
|
76
|
+
]
|
|
77
|
+
|
|
78
|
+
batch_response = await client.beta.messages.batches.create(requests=anthropic_requests)
|
|
79
|
+
|
|
80
|
+
return batch_response
|
|
81
|
+
|
|
82
|
+
@trace_method
|
|
49
83
|
def _get_anthropic_client(self, async_client: bool = False) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]:
|
|
50
84
|
override_key = ProviderManager().get_anthropic_override_key()
|
|
51
85
|
if async_client:
|
|
52
86
|
return anthropic.AsyncAnthropic(api_key=override_key) if override_key else anthropic.AsyncAnthropic()
|
|
53
87
|
return anthropic.Anthropic(api_key=override_key) if override_key else anthropic.Anthropic()
|
|
54
88
|
|
|
89
|
+
@trace_method
|
|
55
90
|
def build_request_data(
|
|
56
91
|
self,
|
|
57
92
|
messages: List[PydanticMessage],
|
|
58
93
|
tools: List[dict],
|
|
59
|
-
tool_call: Optional[str],
|
|
60
94
|
force_tool_call: Optional[str] = None,
|
|
61
95
|
) -> dict:
|
|
96
|
+
# TODO: This needs to get cleaned up. The logic here is pretty confusing.
|
|
97
|
+
# TODO: I really want to get rid of prefixing, it's a recipe for disaster code maintenance wise
|
|
62
98
|
prefix_fill = True
|
|
63
99
|
if not self.use_tool_naming:
|
|
64
100
|
raise NotImplementedError("Only tool calling supported on Anthropic API requests")
|
|
@@ -74,11 +110,6 @@ class AnthropicClient(LLMClientBase):
|
|
|
74
110
|
|
|
75
111
|
# Extended Thinking
|
|
76
112
|
if self.llm_config.enable_reasoner:
|
|
77
|
-
assert (
|
|
78
|
-
self.llm_config.max_reasoning_tokens is not None and self.llm_config.max_reasoning_tokens < self.llm_config.max_tokens
|
|
79
|
-
), "max tokens must be greater than thinking budget"
|
|
80
|
-
assert not self.llm_config.put_inner_thoughts_in_kwargs, "extended thinking not compatible with put_inner_thoughts_in_kwargs"
|
|
81
|
-
|
|
82
113
|
data["thinking"] = {
|
|
83
114
|
"type": "enabled",
|
|
84
115
|
"budget_tokens": self.llm_config.max_reasoning_tokens,
|
|
@@ -90,15 +121,35 @@ class AnthropicClient(LLMClientBase):
|
|
|
90
121
|
prefix_fill = False
|
|
91
122
|
|
|
92
123
|
# Tools
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
124
|
+
# For an overview on tool choice:
|
|
125
|
+
# https://docs.anthropic.com/en/docs/build-with-claude/tool-use/overview
|
|
126
|
+
if not tools:
|
|
127
|
+
# Special case for summarization path
|
|
128
|
+
tools_for_request = None
|
|
129
|
+
tool_choice = None
|
|
130
|
+
elif force_tool_call is not None:
|
|
131
|
+
tool_choice = {"type": "tool", "name": force_tool_call}
|
|
132
|
+
tools_for_request = [Tool(function=f) for f in tools if f["name"] == force_tool_call]
|
|
133
|
+
|
|
134
|
+
# need to have this setting to be able to put inner thoughts in kwargs
|
|
135
|
+
if not self.llm_config.put_inner_thoughts_in_kwargs:
|
|
136
|
+
logger.warning(
|
|
137
|
+
f"Force setting put_inner_thoughts_in_kwargs to True for Claude because there is a forced tool call: {force_tool_call}"
|
|
138
|
+
)
|
|
139
|
+
self.llm_config.put_inner_thoughts_in_kwargs = True
|
|
140
|
+
else:
|
|
141
|
+
if self.llm_config.put_inner_thoughts_in_kwargs:
|
|
142
|
+
# tool_choice_type other than "auto" only plays nice if thinking goes inside the tool calls
|
|
143
|
+
tool_choice = {"type": "any", "disable_parallel_tool_use": True}
|
|
144
|
+
else:
|
|
145
|
+
tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
|
|
146
|
+
tools_for_request = [Tool(function=f) for f in tools] if tools is not None else None
|
|
147
|
+
|
|
148
|
+
# Add tool choice
|
|
149
|
+
data["tool_choice"] = tool_choice
|
|
100
150
|
|
|
101
151
|
# Add inner thoughts kwarg
|
|
152
|
+
# TODO: Can probably make this more efficient
|
|
102
153
|
if len(tools_for_request) > 0 and self.llm_config.put_inner_thoughts_in_kwargs:
|
|
103
154
|
tools_with_inner_thoughts = add_inner_thoughts_to_functions(
|
|
104
155
|
functions=[t.function.model_dump() for t in tools_for_request],
|
letta/llm_api/azure_openai.py
CHANGED
|
@@ -36,8 +36,8 @@ def azure_openai_get_deployed_model_list(base_url: str, api_key: str, api_versio
|
|
|
36
36
|
|
|
37
37
|
try:
|
|
38
38
|
models_list = client.models.list()
|
|
39
|
-
except
|
|
40
|
-
|
|
39
|
+
except Exception:
|
|
40
|
+
return []
|
|
41
41
|
|
|
42
42
|
all_available_models = [model.to_dict() for model in models_list.data]
|
|
43
43
|
|