letta-nightly 0.6.48.dev20250407104216__py3-none-any.whl → 0.6.49.dev20250408104230__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (87) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +47 -12
  3. letta/agents/base_agent.py +7 -4
  4. letta/agents/helpers.py +52 -0
  5. letta/agents/letta_agent.py +105 -42
  6. letta/agents/voice_agent.py +2 -2
  7. letta/constants.py +13 -1
  8. letta/errors.py +10 -3
  9. letta/functions/function_sets/base.py +65 -0
  10. letta/functions/interface.py +2 -2
  11. letta/functions/mcp_client/base_client.py +18 -1
  12. letta/{dynamic_multi_agent.py → groups/dynamic_multi_agent.py} +3 -0
  13. letta/groups/helpers.py +113 -0
  14. letta/{round_robin_multi_agent.py → groups/round_robin_multi_agent.py} +2 -0
  15. letta/groups/sleeptime_multi_agent.py +259 -0
  16. letta/{supervisor_multi_agent.py → groups/supervisor_multi_agent.py} +1 -0
  17. letta/helpers/converters.py +109 -7
  18. letta/helpers/message_helper.py +1 -0
  19. letta/helpers/tool_rule_solver.py +40 -23
  20. letta/interface.py +12 -5
  21. letta/interfaces/anthropic_streaming_interface.py +329 -0
  22. letta/llm_api/anthropic.py +12 -1
  23. letta/llm_api/anthropic_client.py +65 -14
  24. letta/llm_api/azure_openai.py +2 -2
  25. letta/llm_api/google_ai_client.py +13 -2
  26. letta/llm_api/google_constants.py +3 -0
  27. letta/llm_api/google_vertex_client.py +2 -2
  28. letta/llm_api/llm_api_tools.py +1 -1
  29. letta/llm_api/llm_client.py +7 -0
  30. letta/llm_api/llm_client_base.py +2 -7
  31. letta/llm_api/openai.py +7 -1
  32. letta/llm_api/openai_client.py +250 -0
  33. letta/orm/__init__.py +4 -0
  34. letta/orm/agent.py +6 -0
  35. letta/orm/block.py +32 -2
  36. letta/orm/block_history.py +46 -0
  37. letta/orm/custom_columns.py +60 -0
  38. letta/orm/enums.py +7 -0
  39. letta/orm/group.py +6 -0
  40. letta/orm/groups_blocks.py +13 -0
  41. letta/orm/llm_batch_items.py +55 -0
  42. letta/orm/llm_batch_job.py +48 -0
  43. letta/orm/message.py +7 -1
  44. letta/orm/organization.py +2 -0
  45. letta/orm/sqlalchemy_base.py +18 -15
  46. letta/prompts/system/memgpt_sleeptime_chat.txt +52 -0
  47. letta/prompts/system/sleeptime.txt +26 -0
  48. letta/schemas/agent.py +13 -1
  49. letta/schemas/enums.py +17 -2
  50. letta/schemas/group.py +14 -1
  51. letta/schemas/letta_message.py +5 -3
  52. letta/schemas/llm_batch_job.py +53 -0
  53. letta/schemas/llm_config.py +14 -4
  54. letta/schemas/message.py +44 -0
  55. letta/schemas/tool.py +3 -0
  56. letta/schemas/usage.py +1 -0
  57. letta/server/db.py +2 -0
  58. letta/server/rest_api/app.py +1 -1
  59. letta/server/rest_api/chat_completions_interface.py +8 -3
  60. letta/server/rest_api/interface.py +36 -7
  61. letta/server/rest_api/routers/v1/agents.py +53 -39
  62. letta/server/rest_api/routers/v1/runs.py +14 -2
  63. letta/server/rest_api/utils.py +15 -4
  64. letta/server/server.py +120 -71
  65. letta/services/agent_manager.py +70 -6
  66. letta/services/block_manager.py +190 -2
  67. letta/services/group_manager.py +68 -0
  68. letta/services/helpers/agent_manager_helper.py +6 -4
  69. letta/services/llm_batch_manager.py +139 -0
  70. letta/services/message_manager.py +17 -31
  71. letta/services/tool_executor/tool_execution_sandbox.py +1 -3
  72. letta/services/tool_executor/tool_executor.py +9 -20
  73. letta/services/tool_manager.py +14 -3
  74. letta/services/tool_sandbox/__init__.py +0 -0
  75. letta/services/tool_sandbox/base.py +188 -0
  76. letta/services/tool_sandbox/e2b_sandbox.py +116 -0
  77. letta/services/tool_sandbox/local_sandbox.py +221 -0
  78. letta/sleeptime_agent.py +61 -0
  79. letta/streaming_interface.py +20 -10
  80. letta/utils.py +4 -0
  81. {letta_nightly-0.6.48.dev20250407104216.dist-info → letta_nightly-0.6.49.dev20250408104230.dist-info}/METADATA +2 -2
  82. {letta_nightly-0.6.48.dev20250407104216.dist-info → letta_nightly-0.6.49.dev20250408104230.dist-info}/RECORD +85 -69
  83. letta/offline_memory_agent.py +0 -173
  84. letta/services/tool_executor/async_tool_execution_sandbox.py +0 -397
  85. {letta_nightly-0.6.48.dev20250407104216.dist-info → letta_nightly-0.6.49.dev20250408104230.dist-info}/LICENSE +0 -0
  86. {letta_nightly-0.6.48.dev20250407104216.dist-info → letta_nightly-0.6.49.dev20250408104230.dist-info}/WHEEL +0 -0
  87. {letta_nightly-0.6.48.dev20250407104216.dist-info → letta_nightly-0.6.49.dev20250408104230.dist-info}/entry_points.txt +0 -0
@@ -38,29 +38,46 @@ class ToolRulesSolver(BaseModel):
38
38
  )
39
39
  tool_call_history: List[str] = Field(default_factory=list, description="History of tool calls, updated with each tool call.")
40
40
 
41
- def __init__(self, tool_rules: List[BaseToolRule], **kwargs):
42
- super().__init__(**kwargs)
43
- # Separate the provided tool rules into init, standard, and terminal categories
44
- for rule in tool_rules:
45
- if rule.type == ToolRuleType.run_first:
46
- assert isinstance(rule, InitToolRule)
47
- self.init_tool_rules.append(rule)
48
- elif rule.type == ToolRuleType.constrain_child_tools:
49
- assert isinstance(rule, ChildToolRule)
50
- self.child_based_tool_rules.append(rule)
51
- elif rule.type == ToolRuleType.conditional:
52
- assert isinstance(rule, ConditionalToolRule)
53
- self.validate_conditional_tool(rule)
54
- self.child_based_tool_rules.append(rule)
55
- elif rule.type == ToolRuleType.exit_loop:
56
- assert isinstance(rule, TerminalToolRule)
57
- self.terminal_tool_rules.append(rule)
58
- elif rule.type == ToolRuleType.continue_loop:
59
- assert isinstance(rule, ContinueToolRule)
60
- self.continue_tool_rules.append(rule)
61
- elif rule.type == ToolRuleType.max_count_per_step:
62
- assert isinstance(rule, MaxCountPerStepToolRule)
63
- self.child_based_tool_rules.append(rule)
41
+ def __init__(
42
+ self,
43
+ tool_rules: Optional[List[BaseToolRule]] = None,
44
+ init_tool_rules: Optional[List[InitToolRule]] = None,
45
+ continue_tool_rules: Optional[List[ContinueToolRule]] = None,
46
+ child_based_tool_rules: Optional[List[Union[ChildToolRule, ConditionalToolRule, MaxCountPerStepToolRule]]] = None,
47
+ terminal_tool_rules: Optional[List[TerminalToolRule]] = None,
48
+ tool_call_history: Optional[List[str]] = None,
49
+ **kwargs,
50
+ ):
51
+ super().__init__(
52
+ init_tool_rules=init_tool_rules or [],
53
+ continue_tool_rules=continue_tool_rules or [],
54
+ child_based_tool_rules=child_based_tool_rules or [],
55
+ terminal_tool_rules=terminal_tool_rules or [],
56
+ tool_call_history=tool_call_history or [],
57
+ **kwargs,
58
+ )
59
+
60
+ if tool_rules:
61
+ for rule in tool_rules:
62
+ if rule.type == ToolRuleType.run_first:
63
+ assert isinstance(rule, InitToolRule)
64
+ self.init_tool_rules.append(rule)
65
+ elif rule.type == ToolRuleType.constrain_child_tools:
66
+ assert isinstance(rule, ChildToolRule)
67
+ self.child_based_tool_rules.append(rule)
68
+ elif rule.type == ToolRuleType.conditional:
69
+ assert isinstance(rule, ConditionalToolRule)
70
+ self.validate_conditional_tool(rule)
71
+ self.child_based_tool_rules.append(rule)
72
+ elif rule.type == ToolRuleType.exit_loop:
73
+ assert isinstance(rule, TerminalToolRule)
74
+ self.terminal_tool_rules.append(rule)
75
+ elif rule.type == ToolRuleType.continue_loop:
76
+ assert isinstance(rule, ContinueToolRule)
77
+ self.continue_tool_rules.append(rule)
78
+ elif rule.type == ToolRuleType.max_count_per_step:
79
+ assert isinstance(rule, MaxCountPerStepToolRule)
80
+ self.child_based_tool_rules.append(rule)
64
81
 
65
82
  def register_tool_call(self, tool_name: str):
66
83
  """Update the internal state to track tool call history."""
letta/interface.py CHANGED
@@ -30,7 +30,7 @@ class AgentInterface(ABC):
30
30
  raise NotImplementedError
31
31
 
32
32
  @abstractmethod
33
- def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None):
33
+ def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None, chunk_index: Optional[int] = None):
34
34
  """Letta generates some internal monologue"""
35
35
  raise NotImplementedError
36
36
 
@@ -40,7 +40,7 @@ class AgentInterface(ABC):
40
40
  raise NotImplementedError
41
41
 
42
42
  @abstractmethod
43
- def function_message(self, msg: str, msg_obj: Optional[Message] = None):
43
+ def function_message(self, msg: str, msg_obj: Optional[Message] = None, chunk_index: Optional[int] = None):
44
44
  """Letta calls a function"""
45
45
  raise NotImplementedError
46
46
 
@@ -79,7 +79,7 @@ class CLIInterface(AgentInterface):
79
79
  print(fstr.format(msg=msg))
80
80
 
81
81
  @staticmethod
82
- def internal_monologue(msg: str, msg_obj: Optional[Message] = None):
82
+ def internal_monologue(msg: str, msg_obj: Optional[Message] = None, chunk_index: Optional[int] = None):
83
83
  # ANSI escape code for italic is '\x1B[3m'
84
84
  fstr = f"\x1B[3m{Fore.LIGHTBLACK_EX}{INNER_THOUGHTS_CLI_SYMBOL} {{msg}}{Style.RESET_ALL}"
85
85
  if STRIP_UI:
@@ -108,7 +108,14 @@ class CLIInterface(AgentInterface):
108
108
  print(fstr.format(msg=msg))
109
109
 
110
110
  @staticmethod
111
- def user_message(msg: str, msg_obj: Optional[Message] = None, raw: bool = False, dump: bool = False, debug: bool = DEBUG):
111
+ def user_message(
112
+ msg: str,
113
+ msg_obj: Optional[Message] = None,
114
+ raw: bool = False,
115
+ dump: bool = False,
116
+ debug: bool = DEBUG,
117
+ chunk_index: Optional[int] = None,
118
+ ):
112
119
  def print_user_message(icon, msg, printf=print):
113
120
  if STRIP_UI:
114
121
  printf(f"{icon} {msg}")
@@ -154,7 +161,7 @@ class CLIInterface(AgentInterface):
154
161
  printd_user_message("🧑", msg_json)
155
162
 
156
163
  @staticmethod
157
- def function_message(msg: str, msg_obj: Optional[Message] = None, debug: bool = DEBUG):
164
+ def function_message(msg: str, msg_obj: Optional[Message] = None, debug: bool = DEBUG, chunk_index: Optional[int] = None):
158
165
  def print_function_message(icon, msg, color=Fore.RED, printf=print):
159
166
  if STRIP_UI:
160
167
  printf(f"⚡{icon} [function] {msg}")
@@ -0,0 +1,329 @@
1
+ from datetime import datetime, timezone
2
+ from enum import Enum
3
+ from typing import AsyncGenerator, List, Union
4
+
5
+ from anthropic import AsyncStream
6
+ from anthropic.types.beta import (
7
+ BetaInputJSONDelta,
8
+ BetaRawContentBlockDeltaEvent,
9
+ BetaRawContentBlockStartEvent,
10
+ BetaRawContentBlockStopEvent,
11
+ BetaRawMessageDeltaEvent,
12
+ BetaRawMessageStartEvent,
13
+ BetaRawMessageStopEvent,
14
+ BetaRawMessageStreamEvent,
15
+ BetaRedactedThinkingBlock,
16
+ BetaSignatureDelta,
17
+ BetaTextBlock,
18
+ BetaTextDelta,
19
+ BetaThinkingBlock,
20
+ BetaThinkingDelta,
21
+ BetaToolUseBlock,
22
+ )
23
+
24
+ from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
25
+ from letta.local_llm.constants import INNER_THOUGHTS_KWARG
26
+ from letta.log import get_logger
27
+ from letta.schemas.letta_message import (
28
+ AssistantMessage,
29
+ HiddenReasoningMessage,
30
+ LettaMessage,
31
+ ReasoningMessage,
32
+ ToolCallDelta,
33
+ ToolCallMessage,
34
+ )
35
+ from letta.schemas.letta_message_content import ReasoningContent, RedactedReasoningContent, TextContent
36
+ from letta.schemas.message import Message
37
+ from letta.schemas.openai.chat_completion_response import FunctionCall, ToolCall
38
+ from letta.server.rest_api.optimistic_json_parser import OptimisticJSONParser
39
+
40
+ logger = get_logger(__name__)
41
+
42
+
43
+ # TODO: These modes aren't used right now - but can be useful we do multiple sequential tool calling within one Claude message
44
+ class EventMode(Enum):
45
+ TEXT = "TEXT"
46
+ TOOL_USE = "TOOL_USE"
47
+ THINKING = "THINKING"
48
+ REDACTED_THINKING = "REDACTED_THINKING"
49
+
50
+
51
+ class AnthropicStreamingInterface:
52
+ """
53
+ Encapsulates the logic for streaming responses from Anthropic.
54
+ This class handles parsing of partial tokens, pre-execution messages,
55
+ and detection of tool call events.
56
+ """
57
+
58
+ def __init__(self, use_assistant_message: bool = False, put_inner_thoughts_in_kwarg: bool = False):
59
+ self.optimistic_json_parser: OptimisticJSONParser = OptimisticJSONParser()
60
+ self.use_assistant_message = use_assistant_message
61
+
62
+ # Premake IDs for database writes
63
+ self.letta_assistant_message_id = Message.generate_id()
64
+ self.letta_tool_message_id = Message.generate_id()
65
+
66
+ self.anthropic_mode = None
67
+ self.message_id = None
68
+ self.accumulated_inner_thoughts = []
69
+ self.tool_call_id = None
70
+ self.tool_call_name = None
71
+ self.accumulated_tool_call_args = []
72
+ self.previous_parse = {}
73
+
74
+ # usage trackers
75
+ self.input_tokens = 0
76
+ self.output_tokens = 0
77
+
78
+ # reasoning object trackers
79
+ self.reasoning_messages = []
80
+
81
+ # Buffer to hold tool call messages until inner thoughts are complete
82
+ self.tool_call_buffer = []
83
+ self.inner_thoughts_complete = False
84
+ self.put_inner_thoughts_in_kwarg = put_inner_thoughts_in_kwarg
85
+
86
+ def get_tool_call_object(self) -> ToolCall:
87
+ """Useful for agent loop"""
88
+ return ToolCall(
89
+ id=self.tool_call_id, function=FunctionCall(arguments="".join(self.accumulated_tool_call_args), name=self.tool_call_name)
90
+ )
91
+
92
+ def _check_inner_thoughts_complete(self, combined_args: str) -> bool:
93
+ """
94
+ Check if inner thoughts are complete in the current tool call arguments
95
+ by looking for a closing quote after the inner_thoughts field
96
+ """
97
+ if not self.put_inner_thoughts_in_kwarg:
98
+ # None of the things should have inner thoughts in kwargs
99
+ return True
100
+ else:
101
+ parsed = self.optimistic_json_parser.parse(combined_args)
102
+ # TODO: This will break on tools with 0 input
103
+ return len(parsed.keys()) > 1 and INNER_THOUGHTS_KWARG in parsed.keys()
104
+
105
+ async def process(self, stream: AsyncStream[BetaRawMessageStreamEvent]) -> AsyncGenerator[LettaMessage, None]:
106
+ async with stream:
107
+ async for event in stream:
108
+ # TODO: Support BetaThinkingBlock, BetaRedactedThinkingBlock
109
+ if isinstance(event, BetaRawContentBlockStartEvent):
110
+ content = event.content_block
111
+
112
+ if isinstance(content, BetaTextBlock):
113
+ self.anthropic_mode = EventMode.TEXT
114
+ # TODO: Can capture citations, etc.
115
+ elif isinstance(content, BetaToolUseBlock):
116
+ self.anthropic_mode = EventMode.TOOL_USE
117
+ self.tool_call_id = content.id
118
+ self.tool_call_name = content.name
119
+ self.inner_thoughts_complete = False
120
+
121
+ if not self.use_assistant_message:
122
+ # Buffer the initial tool call message instead of yielding immediately
123
+ tool_call_msg = ToolCallMessage(
124
+ id=self.letta_tool_message_id,
125
+ tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id),
126
+ date=datetime.now(timezone.utc).isoformat(),
127
+ )
128
+ self.tool_call_buffer.append(tool_call_msg)
129
+ elif isinstance(content, BetaThinkingBlock):
130
+ self.anthropic_mode = EventMode.THINKING
131
+ # TODO: Can capture signature, etc.
132
+ elif isinstance(content, BetaRedactedThinkingBlock):
133
+ self.anthropic_mode = EventMode.REDACTED_THINKING
134
+
135
+ hidden_reasoning_message = HiddenReasoningMessage(
136
+ id=self.letta_assistant_message_id,
137
+ state="redacted",
138
+ hidden_reasoning=content.data,
139
+ date=datetime.now(timezone.utc).isoformat(),
140
+ )
141
+ self.reasoning_messages.append(hidden_reasoning_message)
142
+ yield hidden_reasoning_message
143
+
144
+ elif isinstance(event, BetaRawContentBlockDeltaEvent):
145
+ delta = event.delta
146
+
147
+ if isinstance(delta, BetaTextDelta):
148
+ # Safety check
149
+ if not self.anthropic_mode == EventMode.TEXT:
150
+ raise RuntimeError(
151
+ f"Streaming integrity failed - received BetaTextDelta object while not in TEXT EventMode: {delta}"
152
+ )
153
+
154
+ # TODO: Strip out </thinking> more robustly, this is pretty hacky lol
155
+ delta.text = delta.text.replace("</thinking>", "")
156
+ self.accumulated_inner_thoughts.append(delta.text)
157
+
158
+ reasoning_message = ReasoningMessage(
159
+ id=self.letta_assistant_message_id,
160
+ reasoning=self.accumulated_inner_thoughts[-1],
161
+ date=datetime.now(timezone.utc).isoformat(),
162
+ )
163
+ self.reasoning_messages.append(reasoning_message)
164
+ yield reasoning_message
165
+
166
+ elif isinstance(delta, BetaInputJSONDelta):
167
+ if not self.anthropic_mode == EventMode.TOOL_USE:
168
+ raise RuntimeError(
169
+ f"Streaming integrity failed - received BetaInputJSONDelta object while not in TOOL_USE EventMode: {delta}"
170
+ )
171
+
172
+ self.accumulated_tool_call_args.append(delta.partial_json)
173
+ combined_args = "".join(self.accumulated_tool_call_args)
174
+ current_parsed = self.optimistic_json_parser.parse(combined_args)
175
+
176
+ # Start detecting a difference in inner thoughts
177
+ previous_inner_thoughts = self.previous_parse.get(INNER_THOUGHTS_KWARG, "")
178
+ current_inner_thoughts = current_parsed.get(INNER_THOUGHTS_KWARG, "")
179
+ inner_thoughts_diff = current_inner_thoughts[len(previous_inner_thoughts) :]
180
+
181
+ if inner_thoughts_diff:
182
+ reasoning_message = ReasoningMessage(
183
+ id=self.letta_assistant_message_id,
184
+ reasoning=inner_thoughts_diff,
185
+ date=datetime.now(timezone.utc).isoformat(),
186
+ )
187
+ self.reasoning_messages.append(reasoning_message)
188
+ yield reasoning_message
189
+
190
+ # Check if inner thoughts are complete - if so, flush the buffer
191
+ if not self.inner_thoughts_complete and self._check_inner_thoughts_complete(combined_args):
192
+ self.inner_thoughts_complete = True
193
+ # Flush all buffered tool call messages
194
+ for buffered_msg in self.tool_call_buffer:
195
+ yield buffered_msg
196
+ self.tool_call_buffer = []
197
+
198
+ # Start detecting special case of "send_message"
199
+ if self.tool_call_name == DEFAULT_MESSAGE_TOOL and self.use_assistant_message:
200
+ previous_send_message = self.previous_parse.get(DEFAULT_MESSAGE_TOOL_KWARG, "")
201
+ current_send_message = current_parsed.get(DEFAULT_MESSAGE_TOOL_KWARG, "")
202
+ send_message_diff = current_send_message[len(previous_send_message) :]
203
+
204
+ # Only stream out if it's not an empty string
205
+ if send_message_diff:
206
+ yield AssistantMessage(
207
+ id=self.letta_assistant_message_id,
208
+ content=[TextContent(text=send_message_diff)],
209
+ date=datetime.now(timezone.utc).isoformat(),
210
+ )
211
+ else:
212
+ # Otherwise, it is a normal tool call - buffer or yield based on inner thoughts status
213
+ tool_call_msg = ToolCallMessage(
214
+ id=self.letta_tool_message_id,
215
+ tool_call=ToolCallDelta(arguments=delta.partial_json),
216
+ date=datetime.now(timezone.utc).isoformat(),
217
+ )
218
+
219
+ if self.inner_thoughts_complete:
220
+ yield tool_call_msg
221
+ else:
222
+ self.tool_call_buffer.append(tool_call_msg)
223
+
224
+ # Set previous parse
225
+ self.previous_parse = current_parsed
226
+ elif isinstance(delta, BetaThinkingDelta):
227
+ # Safety check
228
+ if not self.anthropic_mode == EventMode.THINKING:
229
+ raise RuntimeError(
230
+ f"Streaming integrity failed - received BetaThinkingBlock object while not in THINKING EventMode: {delta}"
231
+ )
232
+
233
+ reasoning_message = ReasoningMessage(
234
+ id=self.letta_assistant_message_id,
235
+ source="reasoner_model",
236
+ reasoning=delta.thinking,
237
+ date=datetime.now(timezone.utc).isoformat(),
238
+ )
239
+ self.reasoning_messages.append(reasoning_message)
240
+ yield reasoning_message
241
+ elif isinstance(delta, BetaSignatureDelta):
242
+ # Safety check
243
+ if not self.anthropic_mode == EventMode.THINKING:
244
+ raise RuntimeError(
245
+ f"Streaming integrity failed - received BetaSignatureDelta object while not in THINKING EventMode: {delta}"
246
+ )
247
+
248
+ reasoning_message = ReasoningMessage(
249
+ id=self.letta_assistant_message_id,
250
+ source="reasoner_model",
251
+ reasoning="",
252
+ date=datetime.now(timezone.utc).isoformat(),
253
+ signature=delta.signature,
254
+ )
255
+ self.reasoning_messages.append(reasoning_message)
256
+ yield reasoning_message
257
+ elif isinstance(event, BetaRawMessageStartEvent):
258
+ self.message_id = event.message.id
259
+ self.input_tokens += event.message.usage.input_tokens
260
+ self.output_tokens += event.message.usage.output_tokens
261
+ elif isinstance(event, BetaRawMessageDeltaEvent):
262
+ self.output_tokens += event.usage.output_tokens
263
+ elif isinstance(event, BetaRawMessageStopEvent):
264
+ # Don't do anything here! We don't want to stop the stream.
265
+ pass
266
+ elif isinstance(event, BetaRawContentBlockStopEvent):
267
+ # If we're exiting a tool use block and there are still buffered messages,
268
+ # we should flush them now
269
+ if self.anthropic_mode == EventMode.TOOL_USE and self.tool_call_buffer:
270
+ for buffered_msg in self.tool_call_buffer:
271
+ yield buffered_msg
272
+ self.tool_call_buffer = []
273
+
274
+ self.anthropic_mode = None
275
+
276
+ def get_reasoning_content(self) -> List[Union[TextContent, ReasoningContent, RedactedReasoningContent]]:
277
+ def _process_group(
278
+ group: List[Union[ReasoningMessage, HiddenReasoningMessage]], group_type: str
279
+ ) -> Union[TextContent, ReasoningContent, RedactedReasoningContent]:
280
+ if group_type == "reasoning":
281
+ reasoning_text = "".join(chunk.reasoning for chunk in group)
282
+ is_native = any(chunk.source == "reasoner_model" for chunk in group)
283
+ signature = next((chunk.signature for chunk in group if chunk.signature is not None), None)
284
+ if is_native:
285
+ return ReasoningContent(is_native=is_native, reasoning=reasoning_text, signature=signature)
286
+ else:
287
+ return TextContent(text=reasoning_text)
288
+ elif group_type == "redacted":
289
+ redacted_text = "".join(chunk.hidden_reasoning for chunk in group if chunk.hidden_reasoning is not None)
290
+ return RedactedReasoningContent(data=redacted_text)
291
+ else:
292
+ raise ValueError("Unexpected group type")
293
+
294
+ merged = []
295
+ current_group = []
296
+ current_group_type = None # "reasoning" or "redacted"
297
+
298
+ for msg in self.reasoning_messages:
299
+ # Determine the type of the current message
300
+ if isinstance(msg, HiddenReasoningMessage):
301
+ msg_type = "redacted"
302
+ elif isinstance(msg, ReasoningMessage):
303
+ msg_type = "reasoning"
304
+ else:
305
+ raise ValueError("Unexpected message type")
306
+
307
+ # Initialize group type if not set
308
+ if current_group_type is None:
309
+ current_group_type = msg_type
310
+
311
+ # If the type changes, process the current group
312
+ if msg_type != current_group_type:
313
+ merged.append(_process_group(current_group, current_group_type))
314
+ current_group = []
315
+ current_group_type = msg_type
316
+
317
+ current_group.append(msg)
318
+
319
+ # Process the final group, if any.
320
+ if current_group:
321
+ merged.append(_process_group(current_group, current_group_type))
322
+
323
+ # Strip out XML from any text content fields
324
+ for content in merged:
325
+ if isinstance(content, TextContent) and content.text.endswith("</thinking>"):
326
+ cutoff = len(content.text) - len("</thinking>")
327
+ content.text = content.text[:cutoff]
328
+
329
+ return merged
@@ -930,6 +930,8 @@ def anthropic_chat_completions_process_stream(
930
930
  stream_interface.stream_start()
931
931
 
932
932
  completion_tokens = 0
933
+ prev_message_type = None
934
+ message_idx = 0
933
935
  try:
934
936
  for chunk_idx, chat_completion_chunk in enumerate(
935
937
  anthropic_chat_completions_request_stream(
@@ -945,7 +947,7 @@ def anthropic_chat_completions_process_stream(
945
947
 
946
948
  if stream_interface:
947
949
  if isinstance(stream_interface, AgentChunkStreamingInterface):
948
- stream_interface.process_chunk(
950
+ message_type = stream_interface.process_chunk(
949
951
  chat_completion_chunk,
950
952
  message_id=chat_completion_response.id if create_message_id else chat_completion_chunk.id,
951
953
  message_date=chat_completion_response.created if create_message_datetime else chat_completion_chunk.created,
@@ -953,7 +955,11 @@ def anthropic_chat_completions_process_stream(
953
955
  # TODO handle emitting redacted reasoning content (e.g. as concat?)
954
956
  expect_reasoning_content=extended_thinking,
955
957
  name=name,
958
+ message_index=message_idx,
956
959
  )
960
+ if message_type != prev_message_type and message_type is not None:
961
+ message_idx += 1
962
+ prev_message_type = message_type
957
963
  elif isinstance(stream_interface, AgentRefreshStreamingInterface):
958
964
  stream_interface.process_refresh(chat_completion_response)
959
965
  else:
@@ -1107,4 +1113,9 @@ def anthropic_chat_completions_process_stream(
1107
1113
 
1108
1114
  log_event(name="llm_response_received", attributes=chat_completion_response.model_dump())
1109
1115
 
1116
+ for choice in chat_completion_response.choices:
1117
+ if choice.message.content is not None:
1118
+ choice.message.content = choice.message.content.replace(f"<{inner_thoughts_xml_tag}>", "")
1119
+ choice.message.content = choice.message.content.replace(f"</{inner_thoughts_xml_tag}>", "")
1120
+
1110
1121
  return chat_completion_response
@@ -1,9 +1,14 @@
1
1
  import json
2
2
  import re
3
- from typing import List, Optional, Union
3
+ from typing import Dict, List, Optional, Union
4
4
 
5
5
  import anthropic
6
+ from anthropic import AsyncStream
6
7
  from anthropic.types import Message as AnthropicMessage
8
+ from anthropic.types.beta import BetaRawMessageStreamEvent
9
+ from anthropic.types.beta.message_create_params import MessageCreateParamsNonStreaming
10
+ from anthropic.types.beta.messages import BetaMessageBatch
11
+ from anthropic.types.beta.messages.batch_create_params import Request
7
12
 
8
13
  from letta.errors import (
9
14
  ContextWindowExceededError,
@@ -28,6 +33,7 @@ from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
28
33
  from letta.schemas.openai.chat_completion_response import Message as ChoiceMessage
29
34
  from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
30
35
  from letta.services.provider_manager import ProviderManager
36
+ from letta.tracing import trace_method
31
37
 
32
38
  DUMMY_FIRST_USER_MESSAGE = "User initializing bootup sequence."
33
39
 
@@ -46,19 +52,49 @@ class AnthropicClient(LLMClientBase):
46
52
  response = await client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
47
53
  return response.model_dump()
48
54
 
55
+ @trace_method
56
+ async def stream_async(self, request_data: dict) -> AsyncStream[BetaRawMessageStreamEvent]:
57
+ client = self._get_anthropic_client(async_client=True)
58
+ request_data["stream"] = True
59
+ return await client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
60
+
61
+ @trace_method
62
+ async def batch_async(self, requests: Dict[str, dict]) -> BetaMessageBatch:
63
+ """
64
+ Send a batch of requests to the Anthropic API asynchronously.
65
+
66
+ Args:
67
+ requests (Dict[str, dict]): A mapping from custom_id to request parameter dicts.
68
+
69
+ Returns:
70
+ List[dict]: A list of response dictionaries corresponding to each request.
71
+ """
72
+ client = self._get_anthropic_client(async_client=True)
73
+
74
+ anthropic_requests = [
75
+ Request(custom_id=custom_id, params=MessageCreateParamsNonStreaming(**params)) for custom_id, params in requests.items()
76
+ ]
77
+
78
+ batch_response = await client.beta.messages.batches.create(requests=anthropic_requests)
79
+
80
+ return batch_response
81
+
82
+ @trace_method
49
83
  def _get_anthropic_client(self, async_client: bool = False) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]:
50
84
  override_key = ProviderManager().get_anthropic_override_key()
51
85
  if async_client:
52
86
  return anthropic.AsyncAnthropic(api_key=override_key) if override_key else anthropic.AsyncAnthropic()
53
87
  return anthropic.Anthropic(api_key=override_key) if override_key else anthropic.Anthropic()
54
88
 
89
+ @trace_method
55
90
  def build_request_data(
56
91
  self,
57
92
  messages: List[PydanticMessage],
58
93
  tools: List[dict],
59
- tool_call: Optional[str],
60
94
  force_tool_call: Optional[str] = None,
61
95
  ) -> dict:
96
+ # TODO: This needs to get cleaned up. The logic here is pretty confusing.
97
+ # TODO: I really want to get rid of prefixing, it's a recipe for disaster code maintenance wise
62
98
  prefix_fill = True
63
99
  if not self.use_tool_naming:
64
100
  raise NotImplementedError("Only tool calling supported on Anthropic API requests")
@@ -74,11 +110,6 @@ class AnthropicClient(LLMClientBase):
74
110
 
75
111
  # Extended Thinking
76
112
  if self.llm_config.enable_reasoner:
77
- assert (
78
- self.llm_config.max_reasoning_tokens is not None and self.llm_config.max_reasoning_tokens < self.llm_config.max_tokens
79
- ), "max tokens must be greater than thinking budget"
80
- assert not self.llm_config.put_inner_thoughts_in_kwargs, "extended thinking not compatible with put_inner_thoughts_in_kwargs"
81
-
82
113
  data["thinking"] = {
83
114
  "type": "enabled",
84
115
  "budget_tokens": self.llm_config.max_reasoning_tokens,
@@ -90,15 +121,35 @@ class AnthropicClient(LLMClientBase):
90
121
  prefix_fill = False
91
122
 
92
123
  # Tools
93
- tools_for_request = (
94
- [Tool(function=f) for f in tools if f["name"] == force_tool_call]
95
- if force_tool_call is not None
96
- else [Tool(function=f) for f in tools]
97
- )
98
- if force_tool_call is not None:
99
- self.llm_config.put_inner_thoughts_in_kwargs = True # why do we do this ?
124
+ # For an overview on tool choice:
125
+ # https://docs.anthropic.com/en/docs/build-with-claude/tool-use/overview
126
+ if not tools:
127
+ # Special case for summarization path
128
+ tools_for_request = None
129
+ tool_choice = None
130
+ elif force_tool_call is not None:
131
+ tool_choice = {"type": "tool", "name": force_tool_call}
132
+ tools_for_request = [Tool(function=f) for f in tools if f["name"] == force_tool_call]
133
+
134
+ # need to have this setting to be able to put inner thoughts in kwargs
135
+ if not self.llm_config.put_inner_thoughts_in_kwargs:
136
+ logger.warning(
137
+ f"Force setting put_inner_thoughts_in_kwargs to True for Claude because there is a forced tool call: {force_tool_call}"
138
+ )
139
+ self.llm_config.put_inner_thoughts_in_kwargs = True
140
+ else:
141
+ if self.llm_config.put_inner_thoughts_in_kwargs:
142
+ # tool_choice_type other than "auto" only plays nice if thinking goes inside the tool calls
143
+ tool_choice = {"type": "any", "disable_parallel_tool_use": True}
144
+ else:
145
+ tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
146
+ tools_for_request = [Tool(function=f) for f in tools] if tools is not None else None
147
+
148
+ # Add tool choice
149
+ data["tool_choice"] = tool_choice
100
150
 
101
151
  # Add inner thoughts kwarg
152
+ # TODO: Can probably make this more efficient
102
153
  if len(tools_for_request) > 0 and self.llm_config.put_inner_thoughts_in_kwargs:
103
154
  tools_with_inner_thoughts = add_inner_thoughts_to_functions(
104
155
  functions=[t.function.model_dump() for t in tools_for_request],
@@ -36,8 +36,8 @@ def azure_openai_get_deployed_model_list(base_url: str, api_key: str, api_versio
36
36
 
37
37
  try:
38
38
  models_list = client.models.list()
39
- except requests.RequestException as e:
40
- raise RuntimeError(f"Failed to retrieve model list: {e}")
39
+ except Exception:
40
+ return []
41
41
 
42
42
  all_available_models = [model.to_dict() for model in models_list.data]
43
43