letta-nightly 0.8.4.dev20250614104137__py3-none-any.whl → 0.8.4.dev20250615221417__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. letta/__init__.py +1 -0
  2. letta/agents/base_agent.py +12 -1
  3. letta/agents/helpers.py +5 -2
  4. letta/agents/letta_agent.py +98 -61
  5. letta/agents/voice_sleeptime_agent.py +2 -1
  6. letta/constants.py +3 -5
  7. letta/data_sources/redis_client.py +30 -10
  8. letta/functions/function_sets/files.py +4 -4
  9. letta/functions/helpers.py +6 -1
  10. letta/functions/mcp_client/types.py +95 -0
  11. letta/groups/sleeptime_multi_agent_v2.py +2 -1
  12. letta/helpers/decorators.py +91 -0
  13. letta/interfaces/anthropic_streaming_interface.py +11 -0
  14. letta/interfaces/openai_streaming_interface.py +244 -225
  15. letta/llm_api/openai_client.py +1 -1
  16. letta/local_llm/utils.py +5 -1
  17. letta/orm/enums.py +1 -0
  18. letta/orm/mcp_server.py +3 -0
  19. letta/orm/tool.py +3 -0
  20. letta/otel/metric_registry.py +12 -0
  21. letta/otel/metrics.py +16 -7
  22. letta/schemas/letta_response.py +6 -1
  23. letta/schemas/letta_stop_reason.py +22 -0
  24. letta/schemas/mcp.py +48 -6
  25. letta/schemas/openai/chat_completion_request.py +1 -1
  26. letta/schemas/openai/chat_completion_response.py +1 -1
  27. letta/schemas/pip_requirement.py +14 -0
  28. letta/schemas/sandbox_config.py +1 -19
  29. letta/schemas/tool.py +5 -0
  30. letta/server/rest_api/json_parser.py +39 -3
  31. letta/server/rest_api/routers/v1/tools.py +3 -1
  32. letta/server/rest_api/routers/v1/voice.py +2 -3
  33. letta/server/rest_api/utils.py +1 -1
  34. letta/server/server.py +11 -2
  35. letta/services/agent_manager.py +37 -29
  36. letta/services/helpers/tool_execution_helper.py +39 -9
  37. letta/services/mcp/base_client.py +13 -2
  38. letta/services/mcp/sse_client.py +8 -1
  39. letta/services/mcp/streamable_http_client.py +56 -0
  40. letta/services/mcp_manager.py +23 -9
  41. letta/services/message_manager.py +30 -3
  42. letta/services/tool_executor/files_tool_executor.py +2 -3
  43. letta/services/tool_sandbox/e2b_sandbox.py +53 -3
  44. letta/services/tool_sandbox/local_sandbox.py +3 -1
  45. letta/services/user_manager.py +22 -0
  46. letta/settings.py +3 -0
  47. {letta_nightly-0.8.4.dev20250614104137.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/METADATA +5 -6
  48. {letta_nightly-0.8.4.dev20250614104137.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/RECORD +51 -48
  49. {letta_nightly-0.8.4.dev20250614104137.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/LICENSE +0 -0
  50. {letta_nightly-0.8.4.dev20250614104137.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/WHEEL +0 -0
  51. {letta_nightly-0.8.4.dev20250614104137.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/entry_points.txt +0 -0
@@ -6,13 +6,19 @@ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
6
6
 
7
7
  from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
8
8
  from letta.helpers.datetime_helpers import get_utc_timestamp_ns, ns_to_ms
9
+ from letta.log import get_logger
10
+ from letta.otel.context import get_ctx_attributes
11
+ from letta.otel.metric_registry import MetricRegistry
9
12
  from letta.schemas.letta_message import AssistantMessage, LettaMessage, ReasoningMessage, ToolCallDelta, ToolCallMessage
10
13
  from letta.schemas.letta_message_content import TextContent
14
+ from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
11
15
  from letta.schemas.message import Message
12
16
  from letta.schemas.openai.chat_completion_response import FunctionCall, ToolCall
13
17
  from letta.server.rest_api.json_parser import OptimisticJSONParser
14
18
  from letta.streaming_utils import JSONInnerThoughtsExtractor
15
19
 
20
+ logger = get_logger(__name__)
21
+
16
22
 
17
23
  class OpenAIStreamingInterface:
18
24
  """
@@ -60,6 +66,8 @@ class OpenAIStreamingInterface:
60
66
  def get_tool_call_object(self) -> ToolCall:
61
67
  """Useful for agent loop"""
62
68
  function_name = self.last_flushed_function_name if self.last_flushed_function_name else self.function_name_buffer
69
+ if not function_name:
70
+ raise ValueError("No tool call ID available")
63
71
  tool_call_id = self.last_flushed_function_id if self.last_flushed_function_id else self.function_id_buffer
64
72
  if not tool_call_id:
65
73
  raise ValueError("No tool call ID available")
@@ -79,254 +87,265 @@ class OpenAIStreamingInterface:
79
87
  It also collects tokens and detects if a tool call is triggered.
80
88
  """
81
89
  first_chunk = True
90
+ try:
91
+ async with stream:
92
+ prev_message_type = None
93
+ message_index = 0
94
+ async for chunk in stream:
95
+ if first_chunk and ttft_span is not None and provider_request_start_timestamp_ns is not None:
96
+ now = get_utc_timestamp_ns()
97
+ ttft_ns = now - provider_request_start_timestamp_ns
98
+ ttft_span.add_event(
99
+ name="openai_time_to_first_token_ms", attributes={"openai_time_to_first_token_ms": ns_to_ms(ttft_ns)}
100
+ )
101
+ metric_attributes = get_ctx_attributes()
102
+ metric_attributes["model.name"] = chunk.model
103
+ MetricRegistry().ttft_ms_histogram.record(ns_to_ms(ttft_ns), metric_attributes)
104
+
105
+ first_chunk = False
106
+
107
+ if not self.model or not self.message_id:
108
+ self.model = chunk.model
109
+ self.message_id = chunk.id
110
+
111
+ # track usage
112
+ if chunk.usage:
113
+ self.input_tokens += chunk.usage.prompt_tokens
114
+ self.output_tokens += chunk.usage.completion_tokens
115
+
116
+ if chunk.choices:
117
+ choice = chunk.choices[0]
118
+ message_delta = choice.delta
119
+
120
+ if message_delta.tool_calls is not None and len(message_delta.tool_calls) > 0:
121
+ tool_call = message_delta.tool_calls[0]
122
+
123
+ if tool_call.function.name:
124
+ # If we're waiting for the first key, then we should hold back the name
125
+ # ie add it to a buffer instead of returning it as a chunk
126
+ if self.function_name_buffer is None:
127
+ self.function_name_buffer = tool_call.function.name
128
+ else:
129
+ self.function_name_buffer += tool_call.function.name
82
130
 
83
- async with stream:
84
- prev_message_type = None
85
- message_index = 0
86
- async for chunk in stream:
87
- if first_chunk and ttft_span is not None and provider_request_start_timestamp_ns is not None:
88
- now = get_utc_timestamp_ns()
89
- ttft_ns = now - provider_request_start_timestamp_ns
90
- ttft_span.add_event(
91
- name="openai_time_to_first_token_ms", attributes={"openai_time_to_first_token_ms": ns_to_ms(ttft_ns)}
92
- )
93
- first_chunk = False
94
-
95
- if not self.model or not self.message_id:
96
- self.model = chunk.model
97
- self.message_id = chunk.id
98
-
99
- # track usage
100
- if chunk.usage:
101
- self.input_tokens += chunk.usage.prompt_tokens
102
- self.output_tokens += chunk.usage.completion_tokens
103
-
104
- if chunk.choices:
105
- choice = chunk.choices[0]
106
- message_delta = choice.delta
107
-
108
- if message_delta.tool_calls is not None and len(message_delta.tool_calls) > 0:
109
- tool_call = message_delta.tool_calls[0]
110
-
111
- if tool_call.function.name:
112
- # If we're waiting for the first key, then we should hold back the name
113
- # ie add it to a buffer instead of returning it as a chunk
114
- if self.function_name_buffer is None:
115
- self.function_name_buffer = tool_call.function.name
116
- else:
117
- self.function_name_buffer += tool_call.function.name
118
-
119
- if tool_call.id:
120
- # Buffer until next time
121
- if self.function_id_buffer is None:
122
- self.function_id_buffer = tool_call.id
123
- else:
124
- self.function_id_buffer += tool_call.id
125
-
126
- if tool_call.function.arguments:
127
- # updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
128
- self.current_function_arguments += tool_call.function.arguments
129
- updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(
130
- tool_call.function.arguments
131
- )
132
-
133
- # If we have inner thoughts, we should output them as a chunk
134
- if updates_inner_thoughts:
135
- if prev_message_type and prev_message_type != "reasoning_message":
136
- message_index += 1
137
- self.reasoning_messages.append(updates_inner_thoughts)
138
- reasoning_message = ReasoningMessage(
139
- id=self.letta_message_id,
140
- date=datetime.now(timezone.utc),
141
- reasoning=updates_inner_thoughts,
142
- # name=name,
143
- otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
144
- )
145
- prev_message_type = reasoning_message.message_type
146
- yield reasoning_message
147
-
148
- # Additionally inner thoughts may stream back with a chunk of main JSON
149
- # In that case, since we can only return a chunk at a time, we should buffer it
150
- if updates_main_json:
151
- if self.function_args_buffer is None:
152
- self.function_args_buffer = updates_main_json
153
- else:
154
- self.function_args_buffer += updates_main_json
131
+ if tool_call.id:
132
+ # Buffer until next time
133
+ if self.function_id_buffer is None:
134
+ self.function_id_buffer = tool_call.id
135
+ else:
136
+ self.function_id_buffer += tool_call.id
155
137
 
156
- # If we have main_json, we should output a ToolCallMessage
157
- elif updates_main_json:
138
+ if tool_call.function.arguments:
139
+ # updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
140
+ self.current_function_arguments += tool_call.function.arguments
141
+ updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(
142
+ tool_call.function.arguments
143
+ )
158
144
 
159
- # If there's something in the function_name buffer, we should release it first
160
- # NOTE: we could output it as part of a chunk that has both name and args,
161
- # however the frontend may expect name first, then args, so to be
162
- # safe we'll output name first in a separate chunk
163
- if self.function_name_buffer:
145
+ # If we have inner thoughts, we should output them as a chunk
146
+ if updates_inner_thoughts:
147
+ if prev_message_type and prev_message_type != "reasoning_message":
148
+ message_index += 1
149
+ self.reasoning_messages.append(updates_inner_thoughts)
150
+ reasoning_message = ReasoningMessage(
151
+ id=self.letta_message_id,
152
+ date=datetime.now(timezone.utc),
153
+ reasoning=updates_inner_thoughts,
154
+ # name=name,
155
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
156
+ )
157
+ prev_message_type = reasoning_message.message_type
158
+ yield reasoning_message
159
+
160
+ # Additionally inner thoughts may stream back with a chunk of main JSON
161
+ # In that case, since we can only return a chunk at a time, we should buffer it
162
+ if updates_main_json:
163
+ if self.function_args_buffer is None:
164
+ self.function_args_buffer = updates_main_json
165
+ else:
166
+ self.function_args_buffer += updates_main_json
164
167
 
165
- # use_assisitant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
166
- if self.use_assistant_message and self.function_name_buffer == self.assistant_message_tool_name:
168
+ # If we have main_json, we should output a ToolCallMessage
169
+ elif updates_main_json:
167
170
 
168
- # Store the ID of the tool call so allow skipping the corresponding response
169
- if self.function_id_buffer:
170
- self.prev_assistant_message_id = self.function_id_buffer
171
+ # If there's something in the function_name buffer, we should release it first
172
+ # NOTE: we could output it as part of a chunk that has both name and args,
173
+ # however the frontend may expect name first, then args, so to be
174
+ # safe we'll output name first in a separate chunk
175
+ if self.function_name_buffer:
171
176
 
172
- else:
173
- if prev_message_type and prev_message_type != "tool_call_message":
174
- message_index += 1
175
- self.tool_call_name = str(self.function_name_buffer)
176
- tool_call_msg = ToolCallMessage(
177
- id=self.letta_message_id,
178
- date=datetime.now(timezone.utc),
179
- tool_call=ToolCallDelta(
180
- name=self.function_name_buffer,
181
- arguments=None,
182
- tool_call_id=self.function_id_buffer,
183
- ),
184
- otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
185
- )
186
- prev_message_type = tool_call_msg.message_type
187
- yield tool_call_msg
188
-
189
- # Record what the last function name we flushed was
190
- self.last_flushed_function_name = self.function_name_buffer
191
- if self.last_flushed_function_id is None:
192
- self.last_flushed_function_id = self.function_id_buffer
193
- # Clear the buffer
194
- self.function_name_buffer = None
195
- self.function_id_buffer = None
196
- # Since we're clearing the name buffer, we should store
197
- # any updates to the arguments inside a separate buffer
198
-
199
- # Add any main_json updates to the arguments buffer
200
- if self.function_args_buffer is None:
201
- self.function_args_buffer = updates_main_json
202
- else:
203
- self.function_args_buffer += updates_main_json
177
+ # use_assisitant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
178
+ if self.use_assistant_message and self.function_name_buffer == self.assistant_message_tool_name:
204
179
 
205
- # If there was nothing in the name buffer, we can proceed to
206
- # output the arguments chunk as a ToolCallMessage
207
- else:
208
-
209
- # use_assisitant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
210
- if self.use_assistant_message and (
211
- self.last_flushed_function_name is not None
212
- and self.last_flushed_function_name == self.assistant_message_tool_name
213
- ):
214
- # do an additional parse on the updates_main_json
215
- if self.function_args_buffer:
216
- updates_main_json = self.function_args_buffer + updates_main_json
217
- self.function_args_buffer = None
218
-
219
- # Pretty gross hardcoding that assumes that if we're toggling into the keywords, we have the full prefix
220
- match_str = '{"' + self.assistant_message_tool_kwarg + '":"'
221
- if updates_main_json == match_str:
222
- updates_main_json = None
223
-
224
- else:
225
- # Some hardcoding to strip off the trailing "}"
226
- if updates_main_json in ["}", '"}']:
227
- updates_main_json = None
228
- if updates_main_json and len(updates_main_json) > 0 and updates_main_json[-1:] == '"':
229
- updates_main_json = updates_main_json[:-1]
230
-
231
- if not updates_main_json:
232
- # early exit to turn into content mode
233
- continue
234
-
235
- # There may be a buffer from a previous chunk, for example
236
- # if the previous chunk had arguments but we needed to flush name
237
- if self.function_args_buffer:
238
- # In this case, we should release the buffer + new data at once
239
- combined_chunk = self.function_args_buffer + updates_main_json
240
-
241
- if prev_message_type and prev_message_type != "assistant_message":
242
- message_index += 1
243
- assistant_message = AssistantMessage(
244
- id=self.letta_message_id,
245
- date=datetime.now(timezone.utc),
246
- content=combined_chunk,
247
- otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
248
- )
249
- prev_message_type = assistant_message.message_type
250
- yield assistant_message
251
180
  # Store the ID of the tool call so allow skipping the corresponding response
252
181
  if self.function_id_buffer:
253
182
  self.prev_assistant_message_id = self.function_id_buffer
254
- # clear buffer
255
- self.function_args_buffer = None
256
- self.function_id_buffer = None
257
183
 
258
184
  else:
259
- # If there's no buffer to clear, just output a new chunk with new data
260
- # TODO: THIS IS HORRIBLE
261
- # TODO: WE USE THE OLD JSON PARSER EARLIER (WHICH DOES NOTHING) AND NOW THE NEW JSON PARSER
262
- # TODO: THIS IS TOTALLY WRONG AND BAD, BUT SAVING FOR A LARGER REWRITE IN THE NEAR FUTURE
263
- parsed_args = self.optimistic_json_parser.parse(self.current_function_arguments)
264
-
265
- if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
266
- self.assistant_message_tool_kwarg
267
- ) != self.current_json_parse_result.get(self.assistant_message_tool_kwarg):
268
- new_content = parsed_args.get(self.assistant_message_tool_kwarg)
269
- prev_content = self.current_json_parse_result.get(self.assistant_message_tool_kwarg, "")
270
- # TODO: Assumes consistent state and that prev_content is subset of new_content
271
- diff = new_content.replace(prev_content, "", 1)
272
- self.current_json_parse_result = parsed_args
273
- if prev_message_type and prev_message_type != "assistant_message":
274
- message_index += 1
275
- assistant_message = AssistantMessage(
276
- id=self.letta_message_id,
277
- date=datetime.now(timezone.utc),
278
- content=diff,
279
- # name=name,
280
- otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
281
- )
282
- prev_message_type = assistant_message.message_type
283
- yield assistant_message
284
-
285
- # Store the ID of the tool call so allow skipping the corresponding response
286
- if self.function_id_buffer:
287
- self.prev_assistant_message_id = self.function_id_buffer
288
- # clear buffers
289
- self.function_id_buffer = None
290
- else:
291
-
292
- # There may be a buffer from a previous chunk, for example
293
- # if the previous chunk had arguments but we needed to flush name
294
- if self.function_args_buffer:
295
- # In this case, we should release the buffer + new data at once
296
- combined_chunk = self.function_args_buffer + updates_main_json
297
185
  if prev_message_type and prev_message_type != "tool_call_message":
298
186
  message_index += 1
187
+ self.tool_call_name = str(self.function_name_buffer)
299
188
  tool_call_msg = ToolCallMessage(
300
189
  id=self.letta_message_id,
301
190
  date=datetime.now(timezone.utc),
302
191
  tool_call=ToolCallDelta(
303
192
  name=self.function_name_buffer,
304
- arguments=combined_chunk,
193
+ arguments=None,
305
194
  tool_call_id=self.function_id_buffer,
306
195
  ),
307
- # name=name,
308
196
  otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
309
197
  )
310
198
  prev_message_type = tool_call_msg.message_type
311
199
  yield tool_call_msg
312
- # clear buffer
313
- self.function_args_buffer = None
314
- self.function_id_buffer = None
200
+
201
+ # Record what the last function name we flushed was
202
+ self.last_flushed_function_name = self.function_name_buffer
203
+ if self.last_flushed_function_id is None:
204
+ self.last_flushed_function_id = self.function_id_buffer
205
+ # Clear the buffer
206
+ self.function_name_buffer = None
207
+ self.function_id_buffer = None
208
+ # Since we're clearing the name buffer, we should store
209
+ # any updates to the arguments inside a separate buffer
210
+
211
+ # Add any main_json updates to the arguments buffer
212
+ if self.function_args_buffer is None:
213
+ self.function_args_buffer = updates_main_json
315
214
  else:
316
- # If there's no buffer to clear, just output a new chunk with new data
317
- if prev_message_type and prev_message_type != "tool_call_message":
318
- message_index += 1
319
- tool_call_msg = ToolCallMessage(
320
- id=self.letta_message_id,
321
- date=datetime.now(timezone.utc),
322
- tool_call=ToolCallDelta(
323
- name=None,
324
- arguments=updates_main_json,
325
- tool_call_id=self.function_id_buffer,
326
- ),
327
- # name=name,
328
- otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
329
- )
330
- prev_message_type = tool_call_msg.message_type
331
- yield tool_call_msg
332
- self.function_id_buffer = None
215
+ self.function_args_buffer += updates_main_json
216
+
217
+ # If there was nothing in the name buffer, we can proceed to
218
+ # output the arguments chunk as a ToolCallMessage
219
+ else:
220
+
221
+ # use_assisitant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
222
+ if self.use_assistant_message and (
223
+ self.last_flushed_function_name is not None
224
+ and self.last_flushed_function_name == self.assistant_message_tool_name
225
+ ):
226
+ # do an additional parse on the updates_main_json
227
+ if self.function_args_buffer:
228
+ updates_main_json = self.function_args_buffer + updates_main_json
229
+ self.function_args_buffer = None
230
+
231
+ # Pretty gross hardcoding that assumes that if we're toggling into the keywords, we have the full prefix
232
+ match_str = '{"' + self.assistant_message_tool_kwarg + '":"'
233
+ if updates_main_json == match_str:
234
+ updates_main_json = None
235
+
236
+ else:
237
+ # Some hardcoding to strip off the trailing "}"
238
+ if updates_main_json in ["}", '"}']:
239
+ updates_main_json = None
240
+ if updates_main_json and len(updates_main_json) > 0 and updates_main_json[-1:] == '"':
241
+ updates_main_json = updates_main_json[:-1]
242
+
243
+ if not updates_main_json:
244
+ # early exit to turn into content mode
245
+ continue
246
+
247
+ # There may be a buffer from a previous chunk, for example
248
+ # if the previous chunk had arguments but we needed to flush name
249
+ if self.function_args_buffer:
250
+ # In this case, we should release the buffer + new data at once
251
+ combined_chunk = self.function_args_buffer + updates_main_json
252
+
253
+ if prev_message_type and prev_message_type != "assistant_message":
254
+ message_index += 1
255
+ assistant_message = AssistantMessage(
256
+ id=self.letta_message_id,
257
+ date=datetime.now(timezone.utc),
258
+ content=combined_chunk,
259
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
260
+ )
261
+ prev_message_type = assistant_message.message_type
262
+ yield assistant_message
263
+ # Store the ID of the tool call so allow skipping the corresponding response
264
+ if self.function_id_buffer:
265
+ self.prev_assistant_message_id = self.function_id_buffer
266
+ # clear buffer
267
+ self.function_args_buffer = None
268
+ self.function_id_buffer = None
269
+
270
+ else:
271
+ # If there's no buffer to clear, just output a new chunk with new data
272
+ # TODO: THIS IS HORRIBLE
273
+ # TODO: WE USE THE OLD JSON PARSER EARLIER (WHICH DOES NOTHING) AND NOW THE NEW JSON PARSER
274
+ # TODO: THIS IS TOTALLY WRONG AND BAD, BUT SAVING FOR A LARGER REWRITE IN THE NEAR FUTURE
275
+ parsed_args = self.optimistic_json_parser.parse(self.current_function_arguments)
276
+
277
+ if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
278
+ self.assistant_message_tool_kwarg
279
+ ) != self.current_json_parse_result.get(self.assistant_message_tool_kwarg):
280
+ new_content = parsed_args.get(self.assistant_message_tool_kwarg)
281
+ prev_content = self.current_json_parse_result.get(self.assistant_message_tool_kwarg, "")
282
+ # TODO: Assumes consistent state and that prev_content is subset of new_content
283
+ diff = new_content.replace(prev_content, "", 1)
284
+ self.current_json_parse_result = parsed_args
285
+ if prev_message_type and prev_message_type != "assistant_message":
286
+ message_index += 1
287
+ assistant_message = AssistantMessage(
288
+ id=self.letta_message_id,
289
+ date=datetime.now(timezone.utc),
290
+ content=diff,
291
+ # name=name,
292
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
293
+ )
294
+ prev_message_type = assistant_message.message_type
295
+ yield assistant_message
296
+
297
+ # Store the ID of the tool call so allow skipping the corresponding response
298
+ if self.function_id_buffer:
299
+ self.prev_assistant_message_id = self.function_id_buffer
300
+ # clear buffers
301
+ self.function_id_buffer = None
302
+ else:
303
+
304
+ # There may be a buffer from a previous chunk, for example
305
+ # if the previous chunk had arguments but we needed to flush name
306
+ if self.function_args_buffer:
307
+ # In this case, we should release the buffer + new data at once
308
+ combined_chunk = self.function_args_buffer + updates_main_json
309
+ if prev_message_type and prev_message_type != "tool_call_message":
310
+ message_index += 1
311
+ tool_call_msg = ToolCallMessage(
312
+ id=self.letta_message_id,
313
+ date=datetime.now(timezone.utc),
314
+ tool_call=ToolCallDelta(
315
+ name=self.function_name_buffer,
316
+ arguments=combined_chunk,
317
+ tool_call_id=self.function_id_buffer,
318
+ ),
319
+ # name=name,
320
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
321
+ )
322
+ prev_message_type = tool_call_msg.message_type
323
+ yield tool_call_msg
324
+ # clear buffer
325
+ self.function_args_buffer = None
326
+ self.function_id_buffer = None
327
+ else:
328
+ # If there's no buffer to clear, just output a new chunk with new data
329
+ if prev_message_type and prev_message_type != "tool_call_message":
330
+ message_index += 1
331
+ tool_call_msg = ToolCallMessage(
332
+ id=self.letta_message_id,
333
+ date=datetime.now(timezone.utc),
334
+ tool_call=ToolCallDelta(
335
+ name=None,
336
+ arguments=updates_main_json,
337
+ tool_call_id=self.function_id_buffer,
338
+ ),
339
+ # name=name,
340
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
341
+ )
342
+ prev_message_type = tool_call_msg.message_type
343
+ yield tool_call_msg
344
+ self.function_id_buffer = None
345
+ except Exception as e:
346
+ logger.error("Error processing stream: %s", e)
347
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
348
+ yield stop_reason
349
+ raise
350
+ finally:
351
+ logger.info("OpenAIStreamingInterface: Stream processing complete.")
@@ -286,7 +286,7 @@ class OpenAIClient(LLMClientBase):
286
286
 
287
287
  # If we used a reasoning model, create a content part for the ommitted reasoning
288
288
  if is_openai_reasoning_model(llm_config.model):
289
- chat_completion_response.choices[0].message.ommitted_reasoning_content = True
289
+ chat_completion_response.choices[0].message.omitted_reasoning_content = True
290
290
 
291
291
  return chat_completion_response
292
292
 
letta/local_llm/utils.py CHANGED
@@ -100,7 +100,11 @@ def num_tokens_from_functions(functions: List[dict], model: str = "gpt-4"):
100
100
  try:
101
101
  if field == "type":
102
102
  function_tokens += 2
103
- function_tokens += len(encoding.encode(v["type"]))
103
+ # Handle both string and array types, e.g. {"type": ["string", "null"]}
104
+ if isinstance(v["type"], list):
105
+ function_tokens += len(encoding.encode(",".join(v["type"])))
106
+ else:
107
+ function_tokens += len(encoding.encode(v["type"]))
104
108
  elif field == "description":
105
109
  function_tokens += 2
106
110
  function_tokens += len(encoding.encode(v["description"]))
letta/orm/enums.py CHANGED
@@ -38,3 +38,4 @@ class ActorType(str, Enum):
38
38
  class MCPServerType(str, Enum):
39
39
  SSE = "sse"
40
40
  STDIO = "stdio"
41
+ STREAMABLE_HTTP = "streamable_http"
letta/orm/mcp_server.py CHANGED
@@ -36,6 +36,9 @@ class MCPServer(SqlalchemyBase, OrganizationMixin):
36
36
  String, nullable=True, doc="The URL of the server (MCP SSE client will connect to this URL)"
37
37
  )
38
38
 
39
+ # access token / api key for MCP servers that require authentication
40
+ token: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="The access token or api key for the MCP server")
41
+
39
42
  # stdio server
40
43
  stdio_config: Mapped[Optional[StdioServerConfig]] = mapped_column(
41
44
  MCPStdioServerConfigColumn, nullable=True, doc="The configuration for the stdio server"
letta/orm/tool.py CHANGED
@@ -44,6 +44,9 @@ class Tool(SqlalchemyBase, OrganizationMixin):
44
44
  source_code: Mapped[Optional[str]] = mapped_column(String, doc="The source code of the function.")
45
45
  json_schema: Mapped[Optional[dict]] = mapped_column(JSON, default=lambda: {}, doc="The OAI compatable JSON schema of the function.")
46
46
  args_json_schema: Mapped[Optional[dict]] = mapped_column(JSON, default=lambda: {}, doc="The JSON schema of the function arguments.")
47
+ pip_requirements: Mapped[Optional[List]] = mapped_column(
48
+ JSON, nullable=True, doc="Optional list of pip packages required by this tool."
49
+ )
47
50
  metadata_: Mapped[Optional[dict]] = mapped_column(JSON, default=lambda: {}, doc="A dictionary of additional metadata for the tool.")
48
51
  # relationships
49
52
  organization: Mapped["Organization"] = relationship("Organization", back_populates="tools", lazy="selectin")
@@ -95,6 +95,18 @@ class MetricRegistry:
95
95
  ),
96
96
  )
97
97
 
98
+ @property
99
+ def step_execution_time_ms_histogram(self) -> Histogram:
100
+ return self._get_or_create_metric(
101
+ "hist_step_execution_time_ms",
102
+ partial(
103
+ self._meter.create_histogram,
104
+ name="hist_step_execution_time_ms",
105
+ description="Histogram for step execution time (ms)",
106
+ unit="ms",
107
+ ),
108
+ )
109
+
98
110
  # TODO (cliandy): instrument this
99
111
  @property
100
112
  def message_cost(self) -> Histogram: