letta-nightly 0.8.4.dev20250614104137__py3-none-any.whl → 0.8.4.dev20250615221417__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -0
- letta/agents/base_agent.py +12 -1
- letta/agents/helpers.py +5 -2
- letta/agents/letta_agent.py +98 -61
- letta/agents/voice_sleeptime_agent.py +2 -1
- letta/constants.py +3 -5
- letta/data_sources/redis_client.py +30 -10
- letta/functions/function_sets/files.py +4 -4
- letta/functions/helpers.py +6 -1
- letta/functions/mcp_client/types.py +95 -0
- letta/groups/sleeptime_multi_agent_v2.py +2 -1
- letta/helpers/decorators.py +91 -0
- letta/interfaces/anthropic_streaming_interface.py +11 -0
- letta/interfaces/openai_streaming_interface.py +244 -225
- letta/llm_api/openai_client.py +1 -1
- letta/local_llm/utils.py +5 -1
- letta/orm/enums.py +1 -0
- letta/orm/mcp_server.py +3 -0
- letta/orm/tool.py +3 -0
- letta/otel/metric_registry.py +12 -0
- letta/otel/metrics.py +16 -7
- letta/schemas/letta_response.py +6 -1
- letta/schemas/letta_stop_reason.py +22 -0
- letta/schemas/mcp.py +48 -6
- letta/schemas/openai/chat_completion_request.py +1 -1
- letta/schemas/openai/chat_completion_response.py +1 -1
- letta/schemas/pip_requirement.py +14 -0
- letta/schemas/sandbox_config.py +1 -19
- letta/schemas/tool.py +5 -0
- letta/server/rest_api/json_parser.py +39 -3
- letta/server/rest_api/routers/v1/tools.py +3 -1
- letta/server/rest_api/routers/v1/voice.py +2 -3
- letta/server/rest_api/utils.py +1 -1
- letta/server/server.py +11 -2
- letta/services/agent_manager.py +37 -29
- letta/services/helpers/tool_execution_helper.py +39 -9
- letta/services/mcp/base_client.py +13 -2
- letta/services/mcp/sse_client.py +8 -1
- letta/services/mcp/streamable_http_client.py +56 -0
- letta/services/mcp_manager.py +23 -9
- letta/services/message_manager.py +30 -3
- letta/services/tool_executor/files_tool_executor.py +2 -3
- letta/services/tool_sandbox/e2b_sandbox.py +53 -3
- letta/services/tool_sandbox/local_sandbox.py +3 -1
- letta/services/user_manager.py +22 -0
- letta/settings.py +3 -0
- {letta_nightly-0.8.4.dev20250614104137.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/METADATA +5 -6
- {letta_nightly-0.8.4.dev20250614104137.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/RECORD +51 -48
- {letta_nightly-0.8.4.dev20250614104137.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/LICENSE +0 -0
- {letta_nightly-0.8.4.dev20250614104137.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/WHEEL +0 -0
- {letta_nightly-0.8.4.dev20250614104137.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/entry_points.txt +0 -0
@@ -6,13 +6,19 @@ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
|
|
6
6
|
|
7
7
|
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
|
8
8
|
from letta.helpers.datetime_helpers import get_utc_timestamp_ns, ns_to_ms
|
9
|
+
from letta.log import get_logger
|
10
|
+
from letta.otel.context import get_ctx_attributes
|
11
|
+
from letta.otel.metric_registry import MetricRegistry
|
9
12
|
from letta.schemas.letta_message import AssistantMessage, LettaMessage, ReasoningMessage, ToolCallDelta, ToolCallMessage
|
10
13
|
from letta.schemas.letta_message_content import TextContent
|
14
|
+
from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
|
11
15
|
from letta.schemas.message import Message
|
12
16
|
from letta.schemas.openai.chat_completion_response import FunctionCall, ToolCall
|
13
17
|
from letta.server.rest_api.json_parser import OptimisticJSONParser
|
14
18
|
from letta.streaming_utils import JSONInnerThoughtsExtractor
|
15
19
|
|
20
|
+
logger = get_logger(__name__)
|
21
|
+
|
16
22
|
|
17
23
|
class OpenAIStreamingInterface:
|
18
24
|
"""
|
@@ -60,6 +66,8 @@ class OpenAIStreamingInterface:
|
|
60
66
|
def get_tool_call_object(self) -> ToolCall:
|
61
67
|
"""Useful for agent loop"""
|
62
68
|
function_name = self.last_flushed_function_name if self.last_flushed_function_name else self.function_name_buffer
|
69
|
+
if not function_name:
|
70
|
+
raise ValueError("No tool call ID available")
|
63
71
|
tool_call_id = self.last_flushed_function_id if self.last_flushed_function_id else self.function_id_buffer
|
64
72
|
if not tool_call_id:
|
65
73
|
raise ValueError("No tool call ID available")
|
@@ -79,254 +87,265 @@ class OpenAIStreamingInterface:
|
|
79
87
|
It also collects tokens and detects if a tool call is triggered.
|
80
88
|
"""
|
81
89
|
first_chunk = True
|
90
|
+
try:
|
91
|
+
async with stream:
|
92
|
+
prev_message_type = None
|
93
|
+
message_index = 0
|
94
|
+
async for chunk in stream:
|
95
|
+
if first_chunk and ttft_span is not None and provider_request_start_timestamp_ns is not None:
|
96
|
+
now = get_utc_timestamp_ns()
|
97
|
+
ttft_ns = now - provider_request_start_timestamp_ns
|
98
|
+
ttft_span.add_event(
|
99
|
+
name="openai_time_to_first_token_ms", attributes={"openai_time_to_first_token_ms": ns_to_ms(ttft_ns)}
|
100
|
+
)
|
101
|
+
metric_attributes = get_ctx_attributes()
|
102
|
+
metric_attributes["model.name"] = chunk.model
|
103
|
+
MetricRegistry().ttft_ms_histogram.record(ns_to_ms(ttft_ns), metric_attributes)
|
104
|
+
|
105
|
+
first_chunk = False
|
106
|
+
|
107
|
+
if not self.model or not self.message_id:
|
108
|
+
self.model = chunk.model
|
109
|
+
self.message_id = chunk.id
|
110
|
+
|
111
|
+
# track usage
|
112
|
+
if chunk.usage:
|
113
|
+
self.input_tokens += chunk.usage.prompt_tokens
|
114
|
+
self.output_tokens += chunk.usage.completion_tokens
|
115
|
+
|
116
|
+
if chunk.choices:
|
117
|
+
choice = chunk.choices[0]
|
118
|
+
message_delta = choice.delta
|
119
|
+
|
120
|
+
if message_delta.tool_calls is not None and len(message_delta.tool_calls) > 0:
|
121
|
+
tool_call = message_delta.tool_calls[0]
|
122
|
+
|
123
|
+
if tool_call.function.name:
|
124
|
+
# If we're waiting for the first key, then we should hold back the name
|
125
|
+
# ie add it to a buffer instead of returning it as a chunk
|
126
|
+
if self.function_name_buffer is None:
|
127
|
+
self.function_name_buffer = tool_call.function.name
|
128
|
+
else:
|
129
|
+
self.function_name_buffer += tool_call.function.name
|
82
130
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
ttft_ns = now - provider_request_start_timestamp_ns
|
90
|
-
ttft_span.add_event(
|
91
|
-
name="openai_time_to_first_token_ms", attributes={"openai_time_to_first_token_ms": ns_to_ms(ttft_ns)}
|
92
|
-
)
|
93
|
-
first_chunk = False
|
94
|
-
|
95
|
-
if not self.model or not self.message_id:
|
96
|
-
self.model = chunk.model
|
97
|
-
self.message_id = chunk.id
|
98
|
-
|
99
|
-
# track usage
|
100
|
-
if chunk.usage:
|
101
|
-
self.input_tokens += chunk.usage.prompt_tokens
|
102
|
-
self.output_tokens += chunk.usage.completion_tokens
|
103
|
-
|
104
|
-
if chunk.choices:
|
105
|
-
choice = chunk.choices[0]
|
106
|
-
message_delta = choice.delta
|
107
|
-
|
108
|
-
if message_delta.tool_calls is not None and len(message_delta.tool_calls) > 0:
|
109
|
-
tool_call = message_delta.tool_calls[0]
|
110
|
-
|
111
|
-
if tool_call.function.name:
|
112
|
-
# If we're waiting for the first key, then we should hold back the name
|
113
|
-
# ie add it to a buffer instead of returning it as a chunk
|
114
|
-
if self.function_name_buffer is None:
|
115
|
-
self.function_name_buffer = tool_call.function.name
|
116
|
-
else:
|
117
|
-
self.function_name_buffer += tool_call.function.name
|
118
|
-
|
119
|
-
if tool_call.id:
|
120
|
-
# Buffer until next time
|
121
|
-
if self.function_id_buffer is None:
|
122
|
-
self.function_id_buffer = tool_call.id
|
123
|
-
else:
|
124
|
-
self.function_id_buffer += tool_call.id
|
125
|
-
|
126
|
-
if tool_call.function.arguments:
|
127
|
-
# updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
|
128
|
-
self.current_function_arguments += tool_call.function.arguments
|
129
|
-
updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(
|
130
|
-
tool_call.function.arguments
|
131
|
-
)
|
132
|
-
|
133
|
-
# If we have inner thoughts, we should output them as a chunk
|
134
|
-
if updates_inner_thoughts:
|
135
|
-
if prev_message_type and prev_message_type != "reasoning_message":
|
136
|
-
message_index += 1
|
137
|
-
self.reasoning_messages.append(updates_inner_thoughts)
|
138
|
-
reasoning_message = ReasoningMessage(
|
139
|
-
id=self.letta_message_id,
|
140
|
-
date=datetime.now(timezone.utc),
|
141
|
-
reasoning=updates_inner_thoughts,
|
142
|
-
# name=name,
|
143
|
-
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
144
|
-
)
|
145
|
-
prev_message_type = reasoning_message.message_type
|
146
|
-
yield reasoning_message
|
147
|
-
|
148
|
-
# Additionally inner thoughts may stream back with a chunk of main JSON
|
149
|
-
# In that case, since we can only return a chunk at a time, we should buffer it
|
150
|
-
if updates_main_json:
|
151
|
-
if self.function_args_buffer is None:
|
152
|
-
self.function_args_buffer = updates_main_json
|
153
|
-
else:
|
154
|
-
self.function_args_buffer += updates_main_json
|
131
|
+
if tool_call.id:
|
132
|
+
# Buffer until next time
|
133
|
+
if self.function_id_buffer is None:
|
134
|
+
self.function_id_buffer = tool_call.id
|
135
|
+
else:
|
136
|
+
self.function_id_buffer += tool_call.id
|
155
137
|
|
156
|
-
|
157
|
-
|
138
|
+
if tool_call.function.arguments:
|
139
|
+
# updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
|
140
|
+
self.current_function_arguments += tool_call.function.arguments
|
141
|
+
updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(
|
142
|
+
tool_call.function.arguments
|
143
|
+
)
|
158
144
|
|
159
|
-
# If
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
145
|
+
# If we have inner thoughts, we should output them as a chunk
|
146
|
+
if updates_inner_thoughts:
|
147
|
+
if prev_message_type and prev_message_type != "reasoning_message":
|
148
|
+
message_index += 1
|
149
|
+
self.reasoning_messages.append(updates_inner_thoughts)
|
150
|
+
reasoning_message = ReasoningMessage(
|
151
|
+
id=self.letta_message_id,
|
152
|
+
date=datetime.now(timezone.utc),
|
153
|
+
reasoning=updates_inner_thoughts,
|
154
|
+
# name=name,
|
155
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
156
|
+
)
|
157
|
+
prev_message_type = reasoning_message.message_type
|
158
|
+
yield reasoning_message
|
159
|
+
|
160
|
+
# Additionally inner thoughts may stream back with a chunk of main JSON
|
161
|
+
# In that case, since we can only return a chunk at a time, we should buffer it
|
162
|
+
if updates_main_json:
|
163
|
+
if self.function_args_buffer is None:
|
164
|
+
self.function_args_buffer = updates_main_json
|
165
|
+
else:
|
166
|
+
self.function_args_buffer += updates_main_json
|
164
167
|
|
165
|
-
|
166
|
-
|
168
|
+
# If we have main_json, we should output a ToolCallMessage
|
169
|
+
elif updates_main_json:
|
167
170
|
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
+
# If there's something in the function_name buffer, we should release it first
|
172
|
+
# NOTE: we could output it as part of a chunk that has both name and args,
|
173
|
+
# however the frontend may expect name first, then args, so to be
|
174
|
+
# safe we'll output name first in a separate chunk
|
175
|
+
if self.function_name_buffer:
|
171
176
|
|
172
|
-
|
173
|
-
if
|
174
|
-
message_index += 1
|
175
|
-
self.tool_call_name = str(self.function_name_buffer)
|
176
|
-
tool_call_msg = ToolCallMessage(
|
177
|
-
id=self.letta_message_id,
|
178
|
-
date=datetime.now(timezone.utc),
|
179
|
-
tool_call=ToolCallDelta(
|
180
|
-
name=self.function_name_buffer,
|
181
|
-
arguments=None,
|
182
|
-
tool_call_id=self.function_id_buffer,
|
183
|
-
),
|
184
|
-
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
185
|
-
)
|
186
|
-
prev_message_type = tool_call_msg.message_type
|
187
|
-
yield tool_call_msg
|
188
|
-
|
189
|
-
# Record what the last function name we flushed was
|
190
|
-
self.last_flushed_function_name = self.function_name_buffer
|
191
|
-
if self.last_flushed_function_id is None:
|
192
|
-
self.last_flushed_function_id = self.function_id_buffer
|
193
|
-
# Clear the buffer
|
194
|
-
self.function_name_buffer = None
|
195
|
-
self.function_id_buffer = None
|
196
|
-
# Since we're clearing the name buffer, we should store
|
197
|
-
# any updates to the arguments inside a separate buffer
|
198
|
-
|
199
|
-
# Add any main_json updates to the arguments buffer
|
200
|
-
if self.function_args_buffer is None:
|
201
|
-
self.function_args_buffer = updates_main_json
|
202
|
-
else:
|
203
|
-
self.function_args_buffer += updates_main_json
|
177
|
+
# use_assisitant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
|
178
|
+
if self.use_assistant_message and self.function_name_buffer == self.assistant_message_tool_name:
|
204
179
|
|
205
|
-
# If there was nothing in the name buffer, we can proceed to
|
206
|
-
# output the arguments chunk as a ToolCallMessage
|
207
|
-
else:
|
208
|
-
|
209
|
-
# use_assisitant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
|
210
|
-
if self.use_assistant_message and (
|
211
|
-
self.last_flushed_function_name is not None
|
212
|
-
and self.last_flushed_function_name == self.assistant_message_tool_name
|
213
|
-
):
|
214
|
-
# do an additional parse on the updates_main_json
|
215
|
-
if self.function_args_buffer:
|
216
|
-
updates_main_json = self.function_args_buffer + updates_main_json
|
217
|
-
self.function_args_buffer = None
|
218
|
-
|
219
|
-
# Pretty gross hardcoding that assumes that if we're toggling into the keywords, we have the full prefix
|
220
|
-
match_str = '{"' + self.assistant_message_tool_kwarg + '":"'
|
221
|
-
if updates_main_json == match_str:
|
222
|
-
updates_main_json = None
|
223
|
-
|
224
|
-
else:
|
225
|
-
# Some hardcoding to strip off the trailing "}"
|
226
|
-
if updates_main_json in ["}", '"}']:
|
227
|
-
updates_main_json = None
|
228
|
-
if updates_main_json and len(updates_main_json) > 0 and updates_main_json[-1:] == '"':
|
229
|
-
updates_main_json = updates_main_json[:-1]
|
230
|
-
|
231
|
-
if not updates_main_json:
|
232
|
-
# early exit to turn into content mode
|
233
|
-
continue
|
234
|
-
|
235
|
-
# There may be a buffer from a previous chunk, for example
|
236
|
-
# if the previous chunk had arguments but we needed to flush name
|
237
|
-
if self.function_args_buffer:
|
238
|
-
# In this case, we should release the buffer + new data at once
|
239
|
-
combined_chunk = self.function_args_buffer + updates_main_json
|
240
|
-
|
241
|
-
if prev_message_type and prev_message_type != "assistant_message":
|
242
|
-
message_index += 1
|
243
|
-
assistant_message = AssistantMessage(
|
244
|
-
id=self.letta_message_id,
|
245
|
-
date=datetime.now(timezone.utc),
|
246
|
-
content=combined_chunk,
|
247
|
-
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
248
|
-
)
|
249
|
-
prev_message_type = assistant_message.message_type
|
250
|
-
yield assistant_message
|
251
180
|
# Store the ID of the tool call so allow skipping the corresponding response
|
252
181
|
if self.function_id_buffer:
|
253
182
|
self.prev_assistant_message_id = self.function_id_buffer
|
254
|
-
# clear buffer
|
255
|
-
self.function_args_buffer = None
|
256
|
-
self.function_id_buffer = None
|
257
183
|
|
258
184
|
else:
|
259
|
-
# If there's no buffer to clear, just output a new chunk with new data
|
260
|
-
# TODO: THIS IS HORRIBLE
|
261
|
-
# TODO: WE USE THE OLD JSON PARSER EARLIER (WHICH DOES NOTHING) AND NOW THE NEW JSON PARSER
|
262
|
-
# TODO: THIS IS TOTALLY WRONG AND BAD, BUT SAVING FOR A LARGER REWRITE IN THE NEAR FUTURE
|
263
|
-
parsed_args = self.optimistic_json_parser.parse(self.current_function_arguments)
|
264
|
-
|
265
|
-
if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
|
266
|
-
self.assistant_message_tool_kwarg
|
267
|
-
) != self.current_json_parse_result.get(self.assistant_message_tool_kwarg):
|
268
|
-
new_content = parsed_args.get(self.assistant_message_tool_kwarg)
|
269
|
-
prev_content = self.current_json_parse_result.get(self.assistant_message_tool_kwarg, "")
|
270
|
-
# TODO: Assumes consistent state and that prev_content is subset of new_content
|
271
|
-
diff = new_content.replace(prev_content, "", 1)
|
272
|
-
self.current_json_parse_result = parsed_args
|
273
|
-
if prev_message_type and prev_message_type != "assistant_message":
|
274
|
-
message_index += 1
|
275
|
-
assistant_message = AssistantMessage(
|
276
|
-
id=self.letta_message_id,
|
277
|
-
date=datetime.now(timezone.utc),
|
278
|
-
content=diff,
|
279
|
-
# name=name,
|
280
|
-
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
281
|
-
)
|
282
|
-
prev_message_type = assistant_message.message_type
|
283
|
-
yield assistant_message
|
284
|
-
|
285
|
-
# Store the ID of the tool call so allow skipping the corresponding response
|
286
|
-
if self.function_id_buffer:
|
287
|
-
self.prev_assistant_message_id = self.function_id_buffer
|
288
|
-
# clear buffers
|
289
|
-
self.function_id_buffer = None
|
290
|
-
else:
|
291
|
-
|
292
|
-
# There may be a buffer from a previous chunk, for example
|
293
|
-
# if the previous chunk had arguments but we needed to flush name
|
294
|
-
if self.function_args_buffer:
|
295
|
-
# In this case, we should release the buffer + new data at once
|
296
|
-
combined_chunk = self.function_args_buffer + updates_main_json
|
297
185
|
if prev_message_type and prev_message_type != "tool_call_message":
|
298
186
|
message_index += 1
|
187
|
+
self.tool_call_name = str(self.function_name_buffer)
|
299
188
|
tool_call_msg = ToolCallMessage(
|
300
189
|
id=self.letta_message_id,
|
301
190
|
date=datetime.now(timezone.utc),
|
302
191
|
tool_call=ToolCallDelta(
|
303
192
|
name=self.function_name_buffer,
|
304
|
-
arguments=
|
193
|
+
arguments=None,
|
305
194
|
tool_call_id=self.function_id_buffer,
|
306
195
|
),
|
307
|
-
# name=name,
|
308
196
|
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
309
197
|
)
|
310
198
|
prev_message_type = tool_call_msg.message_type
|
311
199
|
yield tool_call_msg
|
312
|
-
|
313
|
-
|
314
|
-
|
200
|
+
|
201
|
+
# Record what the last function name we flushed was
|
202
|
+
self.last_flushed_function_name = self.function_name_buffer
|
203
|
+
if self.last_flushed_function_id is None:
|
204
|
+
self.last_flushed_function_id = self.function_id_buffer
|
205
|
+
# Clear the buffer
|
206
|
+
self.function_name_buffer = None
|
207
|
+
self.function_id_buffer = None
|
208
|
+
# Since we're clearing the name buffer, we should store
|
209
|
+
# any updates to the arguments inside a separate buffer
|
210
|
+
|
211
|
+
# Add any main_json updates to the arguments buffer
|
212
|
+
if self.function_args_buffer is None:
|
213
|
+
self.function_args_buffer = updates_main_json
|
315
214
|
else:
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
215
|
+
self.function_args_buffer += updates_main_json
|
216
|
+
|
217
|
+
# If there was nothing in the name buffer, we can proceed to
|
218
|
+
# output the arguments chunk as a ToolCallMessage
|
219
|
+
else:
|
220
|
+
|
221
|
+
# use_assisitant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
|
222
|
+
if self.use_assistant_message and (
|
223
|
+
self.last_flushed_function_name is not None
|
224
|
+
and self.last_flushed_function_name == self.assistant_message_tool_name
|
225
|
+
):
|
226
|
+
# do an additional parse on the updates_main_json
|
227
|
+
if self.function_args_buffer:
|
228
|
+
updates_main_json = self.function_args_buffer + updates_main_json
|
229
|
+
self.function_args_buffer = None
|
230
|
+
|
231
|
+
# Pretty gross hardcoding that assumes that if we're toggling into the keywords, we have the full prefix
|
232
|
+
match_str = '{"' + self.assistant_message_tool_kwarg + '":"'
|
233
|
+
if updates_main_json == match_str:
|
234
|
+
updates_main_json = None
|
235
|
+
|
236
|
+
else:
|
237
|
+
# Some hardcoding to strip off the trailing "}"
|
238
|
+
if updates_main_json in ["}", '"}']:
|
239
|
+
updates_main_json = None
|
240
|
+
if updates_main_json and len(updates_main_json) > 0 and updates_main_json[-1:] == '"':
|
241
|
+
updates_main_json = updates_main_json[:-1]
|
242
|
+
|
243
|
+
if not updates_main_json:
|
244
|
+
# early exit to turn into content mode
|
245
|
+
continue
|
246
|
+
|
247
|
+
# There may be a buffer from a previous chunk, for example
|
248
|
+
# if the previous chunk had arguments but we needed to flush name
|
249
|
+
if self.function_args_buffer:
|
250
|
+
# In this case, we should release the buffer + new data at once
|
251
|
+
combined_chunk = self.function_args_buffer + updates_main_json
|
252
|
+
|
253
|
+
if prev_message_type and prev_message_type != "assistant_message":
|
254
|
+
message_index += 1
|
255
|
+
assistant_message = AssistantMessage(
|
256
|
+
id=self.letta_message_id,
|
257
|
+
date=datetime.now(timezone.utc),
|
258
|
+
content=combined_chunk,
|
259
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
260
|
+
)
|
261
|
+
prev_message_type = assistant_message.message_type
|
262
|
+
yield assistant_message
|
263
|
+
# Store the ID of the tool call so allow skipping the corresponding response
|
264
|
+
if self.function_id_buffer:
|
265
|
+
self.prev_assistant_message_id = self.function_id_buffer
|
266
|
+
# clear buffer
|
267
|
+
self.function_args_buffer = None
|
268
|
+
self.function_id_buffer = None
|
269
|
+
|
270
|
+
else:
|
271
|
+
# If there's no buffer to clear, just output a new chunk with new data
|
272
|
+
# TODO: THIS IS HORRIBLE
|
273
|
+
# TODO: WE USE THE OLD JSON PARSER EARLIER (WHICH DOES NOTHING) AND NOW THE NEW JSON PARSER
|
274
|
+
# TODO: THIS IS TOTALLY WRONG AND BAD, BUT SAVING FOR A LARGER REWRITE IN THE NEAR FUTURE
|
275
|
+
parsed_args = self.optimistic_json_parser.parse(self.current_function_arguments)
|
276
|
+
|
277
|
+
if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
|
278
|
+
self.assistant_message_tool_kwarg
|
279
|
+
) != self.current_json_parse_result.get(self.assistant_message_tool_kwarg):
|
280
|
+
new_content = parsed_args.get(self.assistant_message_tool_kwarg)
|
281
|
+
prev_content = self.current_json_parse_result.get(self.assistant_message_tool_kwarg, "")
|
282
|
+
# TODO: Assumes consistent state and that prev_content is subset of new_content
|
283
|
+
diff = new_content.replace(prev_content, "", 1)
|
284
|
+
self.current_json_parse_result = parsed_args
|
285
|
+
if prev_message_type and prev_message_type != "assistant_message":
|
286
|
+
message_index += 1
|
287
|
+
assistant_message = AssistantMessage(
|
288
|
+
id=self.letta_message_id,
|
289
|
+
date=datetime.now(timezone.utc),
|
290
|
+
content=diff,
|
291
|
+
# name=name,
|
292
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
293
|
+
)
|
294
|
+
prev_message_type = assistant_message.message_type
|
295
|
+
yield assistant_message
|
296
|
+
|
297
|
+
# Store the ID of the tool call so allow skipping the corresponding response
|
298
|
+
if self.function_id_buffer:
|
299
|
+
self.prev_assistant_message_id = self.function_id_buffer
|
300
|
+
# clear buffers
|
301
|
+
self.function_id_buffer = None
|
302
|
+
else:
|
303
|
+
|
304
|
+
# There may be a buffer from a previous chunk, for example
|
305
|
+
# if the previous chunk had arguments but we needed to flush name
|
306
|
+
if self.function_args_buffer:
|
307
|
+
# In this case, we should release the buffer + new data at once
|
308
|
+
combined_chunk = self.function_args_buffer + updates_main_json
|
309
|
+
if prev_message_type and prev_message_type != "tool_call_message":
|
310
|
+
message_index += 1
|
311
|
+
tool_call_msg = ToolCallMessage(
|
312
|
+
id=self.letta_message_id,
|
313
|
+
date=datetime.now(timezone.utc),
|
314
|
+
tool_call=ToolCallDelta(
|
315
|
+
name=self.function_name_buffer,
|
316
|
+
arguments=combined_chunk,
|
317
|
+
tool_call_id=self.function_id_buffer,
|
318
|
+
),
|
319
|
+
# name=name,
|
320
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
321
|
+
)
|
322
|
+
prev_message_type = tool_call_msg.message_type
|
323
|
+
yield tool_call_msg
|
324
|
+
# clear buffer
|
325
|
+
self.function_args_buffer = None
|
326
|
+
self.function_id_buffer = None
|
327
|
+
else:
|
328
|
+
# If there's no buffer to clear, just output a new chunk with new data
|
329
|
+
if prev_message_type and prev_message_type != "tool_call_message":
|
330
|
+
message_index += 1
|
331
|
+
tool_call_msg = ToolCallMessage(
|
332
|
+
id=self.letta_message_id,
|
333
|
+
date=datetime.now(timezone.utc),
|
334
|
+
tool_call=ToolCallDelta(
|
335
|
+
name=None,
|
336
|
+
arguments=updates_main_json,
|
337
|
+
tool_call_id=self.function_id_buffer,
|
338
|
+
),
|
339
|
+
# name=name,
|
340
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
341
|
+
)
|
342
|
+
prev_message_type = tool_call_msg.message_type
|
343
|
+
yield tool_call_msg
|
344
|
+
self.function_id_buffer = None
|
345
|
+
except Exception as e:
|
346
|
+
logger.error("Error processing stream: %s", e)
|
347
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
|
348
|
+
yield stop_reason
|
349
|
+
raise
|
350
|
+
finally:
|
351
|
+
logger.info("OpenAIStreamingInterface: Stream processing complete.")
|
letta/llm_api/openai_client.py
CHANGED
@@ -286,7 +286,7 @@ class OpenAIClient(LLMClientBase):
|
|
286
286
|
|
287
287
|
# If we used a reasoning model, create a content part for the ommitted reasoning
|
288
288
|
if is_openai_reasoning_model(llm_config.model):
|
289
|
-
chat_completion_response.choices[0].message.
|
289
|
+
chat_completion_response.choices[0].message.omitted_reasoning_content = True
|
290
290
|
|
291
291
|
return chat_completion_response
|
292
292
|
|
letta/local_llm/utils.py
CHANGED
@@ -100,7 +100,11 @@ def num_tokens_from_functions(functions: List[dict], model: str = "gpt-4"):
|
|
100
100
|
try:
|
101
101
|
if field == "type":
|
102
102
|
function_tokens += 2
|
103
|
-
|
103
|
+
# Handle both string and array types, e.g. {"type": ["string", "null"]}
|
104
|
+
if isinstance(v["type"], list):
|
105
|
+
function_tokens += len(encoding.encode(",".join(v["type"])))
|
106
|
+
else:
|
107
|
+
function_tokens += len(encoding.encode(v["type"]))
|
104
108
|
elif field == "description":
|
105
109
|
function_tokens += 2
|
106
110
|
function_tokens += len(encoding.encode(v["description"]))
|
letta/orm/enums.py
CHANGED
letta/orm/mcp_server.py
CHANGED
@@ -36,6 +36,9 @@ class MCPServer(SqlalchemyBase, OrganizationMixin):
|
|
36
36
|
String, nullable=True, doc="The URL of the server (MCP SSE client will connect to this URL)"
|
37
37
|
)
|
38
38
|
|
39
|
+
# access token / api key for MCP servers that require authentication
|
40
|
+
token: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="The access token or api key for the MCP server")
|
41
|
+
|
39
42
|
# stdio server
|
40
43
|
stdio_config: Mapped[Optional[StdioServerConfig]] = mapped_column(
|
41
44
|
MCPStdioServerConfigColumn, nullable=True, doc="The configuration for the stdio server"
|
letta/orm/tool.py
CHANGED
@@ -44,6 +44,9 @@ class Tool(SqlalchemyBase, OrganizationMixin):
|
|
44
44
|
source_code: Mapped[Optional[str]] = mapped_column(String, doc="The source code of the function.")
|
45
45
|
json_schema: Mapped[Optional[dict]] = mapped_column(JSON, default=lambda: {}, doc="The OAI compatable JSON schema of the function.")
|
46
46
|
args_json_schema: Mapped[Optional[dict]] = mapped_column(JSON, default=lambda: {}, doc="The JSON schema of the function arguments.")
|
47
|
+
pip_requirements: Mapped[Optional[List]] = mapped_column(
|
48
|
+
JSON, nullable=True, doc="Optional list of pip packages required by this tool."
|
49
|
+
)
|
47
50
|
metadata_: Mapped[Optional[dict]] = mapped_column(JSON, default=lambda: {}, doc="A dictionary of additional metadata for the tool.")
|
48
51
|
# relationships
|
49
52
|
organization: Mapped["Organization"] = relationship("Organization", back_populates="tools", lazy="selectin")
|
letta/otel/metric_registry.py
CHANGED
@@ -95,6 +95,18 @@ class MetricRegistry:
|
|
95
95
|
),
|
96
96
|
)
|
97
97
|
|
98
|
+
@property
|
99
|
+
def step_execution_time_ms_histogram(self) -> Histogram:
|
100
|
+
return self._get_or_create_metric(
|
101
|
+
"hist_step_execution_time_ms",
|
102
|
+
partial(
|
103
|
+
self._meter.create_histogram,
|
104
|
+
name="hist_step_execution_time_ms",
|
105
|
+
description="Histogram for step execution time (ms)",
|
106
|
+
unit="ms",
|
107
|
+
),
|
108
|
+
)
|
109
|
+
|
98
110
|
# TODO (cliandy): instrument this
|
99
111
|
@property
|
100
112
|
def message_cost(self) -> Histogram:
|