letta-nightly 0.11.0.dev20250807104511__py3-none-any.whl → 0.11.0.dev20250808104456__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/agent.py +2 -1
- letta/agents/letta_agent.py +215 -143
- letta/constants.py +4 -1
- letta/embeddings.py +6 -5
- letta/functions/function_sets/base.py +2 -2
- letta/functions/function_sets/files.py +22 -9
- letta/interfaces/anthropic_streaming_interface.py +291 -265
- letta/interfaces/openai_streaming_interface.py +270 -250
- letta/llm_api/anthropic.py +3 -10
- letta/llm_api/openai_client.py +6 -1
- letta/orm/__init__.py +1 -0
- letta/orm/step.py +14 -0
- letta/orm/step_metrics.py +71 -0
- letta/schemas/enums.py +9 -0
- letta/schemas/llm_config.py +8 -6
- letta/schemas/providers/lmstudio.py +2 -2
- letta/schemas/providers/ollama.py +42 -54
- letta/schemas/providers/openai.py +1 -1
- letta/schemas/step.py +6 -0
- letta/schemas/step_metrics.py +23 -0
- letta/schemas/tool_rule.py +10 -29
- letta/services/step_manager.py +179 -1
- letta/services/tool_executor/builtin_tool_executor.py +4 -1
- letta/services/tool_executor/core_tool_executor.py +2 -10
- letta/services/tool_executor/files_tool_executor.py +89 -40
- {letta_nightly-0.11.0.dev20250807104511.dist-info → letta_nightly-0.11.0.dev20250808104456.dist-info}/METADATA +1 -1
- {letta_nightly-0.11.0.dev20250807104511.dist-info → letta_nightly-0.11.0.dev20250808104456.dist-info}/RECORD +30 -28
- {letta_nightly-0.11.0.dev20250807104511.dist-info → letta_nightly-0.11.0.dev20250808104456.dist-info}/LICENSE +0 -0
- {letta_nightly-0.11.0.dev20250807104511.dist-info → letta_nightly-0.11.0.dev20250808104456.dist-info}/WHEEL +0 -0
- {letta_nightly-0.11.0.dev20250807104511.dist-info → letta_nightly-0.11.0.dev20250808104456.dist-info}/entry_points.txt +0 -0
@@ -120,260 +120,34 @@ class OpenAIStreamingInterface:
|
|
120
120
|
tool_dicts = [tool["function"] if isinstance(tool, dict) and "function" in tool else tool for tool in self.tools]
|
121
121
|
self.fallback_input_tokens += num_tokens_from_functions(tool_dicts)
|
122
122
|
|
123
|
+
prev_message_type = None
|
124
|
+
message_index = 0
|
123
125
|
try:
|
124
126
|
async with stream:
|
125
|
-
prev_message_type = None
|
126
|
-
message_index = 0
|
127
127
|
async for chunk in stream:
|
128
|
-
|
129
|
-
self.
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
self.function_name_buffer += tool_call.function.name
|
151
|
-
|
152
|
-
if tool_call.id:
|
153
|
-
# Buffer until next time
|
154
|
-
if self.function_id_buffer is None:
|
155
|
-
self.function_id_buffer = tool_call.id
|
156
|
-
else:
|
157
|
-
self.function_id_buffer += tool_call.id
|
158
|
-
|
159
|
-
if tool_call.function.arguments:
|
160
|
-
# updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
|
161
|
-
self.current_function_arguments += tool_call.function.arguments
|
162
|
-
updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(
|
163
|
-
tool_call.function.arguments
|
164
|
-
)
|
165
|
-
|
166
|
-
if self.is_openai_proxy:
|
167
|
-
self.fallback_output_tokens += count_tokens(tool_call.function.arguments)
|
128
|
+
try:
|
129
|
+
async for message in self._process_chunk(chunk, ttft_span, prev_message_type, message_index):
|
130
|
+
new_message_type = message.message_type
|
131
|
+
if new_message_type != prev_message_type:
|
132
|
+
if prev_message_type != None:
|
133
|
+
message_index += 1
|
134
|
+
prev_message_type = new_message_type
|
135
|
+
yield message
|
136
|
+
except asyncio.CancelledError as e:
|
137
|
+
import traceback
|
138
|
+
|
139
|
+
logger.info("Cancelled stream attempt but overriding %s: %s", e, traceback.format_exc())
|
140
|
+
async for message in self._process_chunk(chunk, ttft_span, prev_message_type, message_index):
|
141
|
+
new_message_type = message.message_type
|
142
|
+
if new_message_type != prev_message_type:
|
143
|
+
if prev_message_type != None:
|
144
|
+
message_index += 1
|
145
|
+
prev_message_type = new_message_type
|
146
|
+
yield message
|
147
|
+
|
148
|
+
# Don't raise the exception here
|
149
|
+
continue
|
168
150
|
|
169
|
-
# If we have inner thoughts, we should output them as a chunk
|
170
|
-
if updates_inner_thoughts:
|
171
|
-
if prev_message_type and prev_message_type != "reasoning_message":
|
172
|
-
message_index += 1
|
173
|
-
self.reasoning_messages.append(updates_inner_thoughts)
|
174
|
-
reasoning_message = ReasoningMessage(
|
175
|
-
id=self.letta_message_id,
|
176
|
-
date=datetime.now(timezone.utc),
|
177
|
-
reasoning=updates_inner_thoughts,
|
178
|
-
# name=name,
|
179
|
-
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
180
|
-
)
|
181
|
-
prev_message_type = reasoning_message.message_type
|
182
|
-
yield reasoning_message
|
183
|
-
|
184
|
-
# Additionally inner thoughts may stream back with a chunk of main JSON
|
185
|
-
# In that case, since we can only return a chunk at a time, we should buffer it
|
186
|
-
if updates_main_json:
|
187
|
-
if self.function_args_buffer is None:
|
188
|
-
self.function_args_buffer = updates_main_json
|
189
|
-
else:
|
190
|
-
self.function_args_buffer += updates_main_json
|
191
|
-
|
192
|
-
# If we have main_json, we should output a ToolCallMessage
|
193
|
-
elif updates_main_json:
|
194
|
-
|
195
|
-
# If there's something in the function_name buffer, we should release it first
|
196
|
-
# NOTE: we could output it as part of a chunk that has both name and args,
|
197
|
-
# however the frontend may expect name first, then args, so to be
|
198
|
-
# safe we'll output name first in a separate chunk
|
199
|
-
if self.function_name_buffer:
|
200
|
-
|
201
|
-
# use_assisitant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
|
202
|
-
if self.use_assistant_message and self.function_name_buffer == self.assistant_message_tool_name:
|
203
|
-
|
204
|
-
# Store the ID of the tool call so allow skipping the corresponding response
|
205
|
-
if self.function_id_buffer:
|
206
|
-
self.prev_assistant_message_id = self.function_id_buffer
|
207
|
-
|
208
|
-
else:
|
209
|
-
if prev_message_type and prev_message_type != "tool_call_message":
|
210
|
-
message_index += 1
|
211
|
-
self.tool_call_name = str(self.function_name_buffer)
|
212
|
-
tool_call_msg = ToolCallMessage(
|
213
|
-
id=self.letta_message_id,
|
214
|
-
date=datetime.now(timezone.utc),
|
215
|
-
tool_call=ToolCallDelta(
|
216
|
-
name=self.function_name_buffer,
|
217
|
-
arguments=None,
|
218
|
-
tool_call_id=self.function_id_buffer,
|
219
|
-
),
|
220
|
-
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
221
|
-
)
|
222
|
-
prev_message_type = tool_call_msg.message_type
|
223
|
-
yield tool_call_msg
|
224
|
-
|
225
|
-
# Record what the last function name we flushed was
|
226
|
-
self.last_flushed_function_name = self.function_name_buffer
|
227
|
-
if self.last_flushed_function_id is None:
|
228
|
-
self.last_flushed_function_id = self.function_id_buffer
|
229
|
-
# Clear the buffer
|
230
|
-
self.function_name_buffer = None
|
231
|
-
self.function_id_buffer = None
|
232
|
-
# Since we're clearing the name buffer, we should store
|
233
|
-
# any updates to the arguments inside a separate buffer
|
234
|
-
|
235
|
-
# Add any main_json updates to the arguments buffer
|
236
|
-
if self.function_args_buffer is None:
|
237
|
-
self.function_args_buffer = updates_main_json
|
238
|
-
else:
|
239
|
-
self.function_args_buffer += updates_main_json
|
240
|
-
|
241
|
-
# If there was nothing in the name buffer, we can proceed to
|
242
|
-
# output the arguments chunk as a ToolCallMessage
|
243
|
-
else:
|
244
|
-
# use_assistant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
|
245
|
-
if self.use_assistant_message and (
|
246
|
-
self.last_flushed_function_name is not None
|
247
|
-
and self.last_flushed_function_name == self.assistant_message_tool_name
|
248
|
-
):
|
249
|
-
# do an additional parse on the updates_main_json
|
250
|
-
if self.function_args_buffer:
|
251
|
-
updates_main_json = self.function_args_buffer + updates_main_json
|
252
|
-
self.function_args_buffer = None
|
253
|
-
|
254
|
-
# Pretty gross hardcoding that assumes that if we're toggling into the keywords, we have the full prefix
|
255
|
-
match_str = '{"' + self.assistant_message_tool_kwarg + '":"'
|
256
|
-
if updates_main_json == match_str:
|
257
|
-
updates_main_json = None
|
258
|
-
|
259
|
-
else:
|
260
|
-
# Some hardcoding to strip off the trailing "}"
|
261
|
-
if updates_main_json in ["}", '"}']:
|
262
|
-
updates_main_json = None
|
263
|
-
if updates_main_json and len(updates_main_json) > 0 and updates_main_json[-1:] == '"':
|
264
|
-
updates_main_json = updates_main_json[:-1]
|
265
|
-
|
266
|
-
if not updates_main_json:
|
267
|
-
# early exit to turn into content mode
|
268
|
-
continue
|
269
|
-
|
270
|
-
# There may be a buffer from a previous chunk, for example
|
271
|
-
# if the previous chunk had arguments but we needed to flush name
|
272
|
-
if self.function_args_buffer:
|
273
|
-
# In this case, we should release the buffer + new data at once
|
274
|
-
combined_chunk = self.function_args_buffer + updates_main_json
|
275
|
-
|
276
|
-
if prev_message_type and prev_message_type != "assistant_message":
|
277
|
-
message_index += 1
|
278
|
-
assistant_message = AssistantMessage(
|
279
|
-
id=self.letta_message_id,
|
280
|
-
date=datetime.now(timezone.utc),
|
281
|
-
content=combined_chunk,
|
282
|
-
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
283
|
-
)
|
284
|
-
prev_message_type = assistant_message.message_type
|
285
|
-
yield assistant_message
|
286
|
-
# Store the ID of the tool call so allow skipping the corresponding response
|
287
|
-
if self.function_id_buffer:
|
288
|
-
self.prev_assistant_message_id = self.function_id_buffer
|
289
|
-
# clear buffer
|
290
|
-
self.function_args_buffer = None
|
291
|
-
self.function_id_buffer = None
|
292
|
-
|
293
|
-
else:
|
294
|
-
# If there's no buffer to clear, just output a new chunk with new data
|
295
|
-
# TODO: THIS IS HORRIBLE
|
296
|
-
# TODO: WE USE THE OLD JSON PARSER EARLIER (WHICH DOES NOTHING) AND NOW THE NEW JSON PARSER
|
297
|
-
# TODO: THIS IS TOTALLY WRONG AND BAD, BUT SAVING FOR A LARGER REWRITE IN THE NEAR FUTURE
|
298
|
-
parsed_args = self.optimistic_json_parser.parse(self.current_function_arguments)
|
299
|
-
|
300
|
-
if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
|
301
|
-
self.assistant_message_tool_kwarg
|
302
|
-
) != self.current_json_parse_result.get(self.assistant_message_tool_kwarg):
|
303
|
-
new_content = parsed_args.get(self.assistant_message_tool_kwarg)
|
304
|
-
prev_content = self.current_json_parse_result.get(self.assistant_message_tool_kwarg, "")
|
305
|
-
# TODO: Assumes consistent state and that prev_content is subset of new_content
|
306
|
-
diff = new_content.replace(prev_content, "", 1)
|
307
|
-
self.current_json_parse_result = parsed_args
|
308
|
-
if prev_message_type and prev_message_type != "assistant_message":
|
309
|
-
message_index += 1
|
310
|
-
assistant_message = AssistantMessage(
|
311
|
-
id=self.letta_message_id,
|
312
|
-
date=datetime.now(timezone.utc),
|
313
|
-
content=diff,
|
314
|
-
# name=name,
|
315
|
-
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
316
|
-
)
|
317
|
-
prev_message_type = assistant_message.message_type
|
318
|
-
yield assistant_message
|
319
|
-
|
320
|
-
# Store the ID of the tool call so allow skipping the corresponding response
|
321
|
-
if self.function_id_buffer:
|
322
|
-
self.prev_assistant_message_id = self.function_id_buffer
|
323
|
-
# clear buffers
|
324
|
-
self.function_id_buffer = None
|
325
|
-
else:
|
326
|
-
|
327
|
-
# There may be a buffer from a previous chunk, for example
|
328
|
-
# if the previous chunk had arguments but we needed to flush name
|
329
|
-
if self.function_args_buffer:
|
330
|
-
# In this case, we should release the buffer + new data at once
|
331
|
-
combined_chunk = self.function_args_buffer + updates_main_json
|
332
|
-
if prev_message_type and prev_message_type != "tool_call_message":
|
333
|
-
message_index += 1
|
334
|
-
tool_call_msg = ToolCallMessage(
|
335
|
-
id=self.letta_message_id,
|
336
|
-
date=datetime.now(timezone.utc),
|
337
|
-
tool_call=ToolCallDelta(
|
338
|
-
name=self.function_name_buffer,
|
339
|
-
arguments=combined_chunk,
|
340
|
-
tool_call_id=self.function_id_buffer,
|
341
|
-
),
|
342
|
-
# name=name,
|
343
|
-
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
344
|
-
)
|
345
|
-
prev_message_type = tool_call_msg.message_type
|
346
|
-
yield tool_call_msg
|
347
|
-
# clear buffer
|
348
|
-
self.function_args_buffer = None
|
349
|
-
self.function_id_buffer = None
|
350
|
-
else:
|
351
|
-
# If there's no buffer to clear, just output a new chunk with new data
|
352
|
-
if prev_message_type and prev_message_type != "tool_call_message":
|
353
|
-
message_index += 1
|
354
|
-
tool_call_msg = ToolCallMessage(
|
355
|
-
id=self.letta_message_id,
|
356
|
-
date=datetime.now(timezone.utc),
|
357
|
-
tool_call=ToolCallDelta(
|
358
|
-
name=None,
|
359
|
-
arguments=updates_main_json,
|
360
|
-
tool_call_id=self.function_id_buffer,
|
361
|
-
),
|
362
|
-
# name=name,
|
363
|
-
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
364
|
-
)
|
365
|
-
prev_message_type = tool_call_msg.message_type
|
366
|
-
yield tool_call_msg
|
367
|
-
self.function_id_buffer = None
|
368
|
-
except asyncio.CancelledError as e:
|
369
|
-
import traceback
|
370
|
-
|
371
|
-
logger.error("Cancelled stream %s: %s", e, traceback.format_exc())
|
372
|
-
ttft_span.add_event(
|
373
|
-
name="stop_reason",
|
374
|
-
attributes={"stop_reason": StopReasonType.cancelled.value, "error": str(e), "stacktrace": traceback.format_exc()},
|
375
|
-
)
|
376
|
-
raise e
|
377
151
|
except Exception as e:
|
378
152
|
import traceback
|
379
153
|
|
@@ -386,3 +160,249 @@ class OpenAIStreamingInterface:
|
|
386
160
|
raise e
|
387
161
|
finally:
|
388
162
|
logger.info("OpenAIStreamingInterface: Stream processing complete.")
|
163
|
+
|
164
|
+
async def _process_chunk(
|
165
|
+
self,
|
166
|
+
chunk: ChatCompletionChunk,
|
167
|
+
ttft_span: Optional["Span"] = None,
|
168
|
+
prev_message_type: Optional[str] = None,
|
169
|
+
message_index: int = 0,
|
170
|
+
) -> AsyncGenerator[LettaMessage | LettaStopReason, None]:
|
171
|
+
if not self.model or not self.message_id:
|
172
|
+
self.model = chunk.model
|
173
|
+
self.message_id = chunk.id
|
174
|
+
|
175
|
+
# track usage
|
176
|
+
if chunk.usage:
|
177
|
+
self.input_tokens += chunk.usage.prompt_tokens
|
178
|
+
self.output_tokens += chunk.usage.completion_tokens
|
179
|
+
|
180
|
+
if chunk.choices:
|
181
|
+
choice = chunk.choices[0]
|
182
|
+
message_delta = choice.delta
|
183
|
+
|
184
|
+
if message_delta.tool_calls is not None and len(message_delta.tool_calls) > 0:
|
185
|
+
tool_call = message_delta.tool_calls[0]
|
186
|
+
|
187
|
+
if tool_call.function.name:
|
188
|
+
# If we're waiting for the first key, then we should hold back the name
|
189
|
+
# ie add it to a buffer instead of returning it as a chunk
|
190
|
+
if self.function_name_buffer is None:
|
191
|
+
self.function_name_buffer = tool_call.function.name
|
192
|
+
else:
|
193
|
+
self.function_name_buffer += tool_call.function.name
|
194
|
+
|
195
|
+
if tool_call.id:
|
196
|
+
# Buffer until next time
|
197
|
+
if self.function_id_buffer is None:
|
198
|
+
self.function_id_buffer = tool_call.id
|
199
|
+
else:
|
200
|
+
self.function_id_buffer += tool_call.id
|
201
|
+
|
202
|
+
if tool_call.function.arguments:
|
203
|
+
# updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
|
204
|
+
self.current_function_arguments += tool_call.function.arguments
|
205
|
+
updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
|
206
|
+
|
207
|
+
if self.is_openai_proxy:
|
208
|
+
self.fallback_output_tokens += count_tokens(tool_call.function.arguments)
|
209
|
+
|
210
|
+
# If we have inner thoughts, we should output them as a chunk
|
211
|
+
if updates_inner_thoughts:
|
212
|
+
if prev_message_type and prev_message_type != "reasoning_message":
|
213
|
+
message_index += 1
|
214
|
+
self.reasoning_messages.append(updates_inner_thoughts)
|
215
|
+
reasoning_message = ReasoningMessage(
|
216
|
+
id=self.letta_message_id,
|
217
|
+
date=datetime.now(timezone.utc),
|
218
|
+
reasoning=updates_inner_thoughts,
|
219
|
+
# name=name,
|
220
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
221
|
+
)
|
222
|
+
prev_message_type = reasoning_message.message_type
|
223
|
+
yield reasoning_message
|
224
|
+
|
225
|
+
# Additionally inner thoughts may stream back with a chunk of main JSON
|
226
|
+
# In that case, since we can only return a chunk at a time, we should buffer it
|
227
|
+
if updates_main_json:
|
228
|
+
if self.function_args_buffer is None:
|
229
|
+
self.function_args_buffer = updates_main_json
|
230
|
+
else:
|
231
|
+
self.function_args_buffer += updates_main_json
|
232
|
+
|
233
|
+
# If we have main_json, we should output a ToolCallMessage
|
234
|
+
elif updates_main_json:
|
235
|
+
|
236
|
+
# If there's something in the function_name buffer, we should release it first
|
237
|
+
# NOTE: we could output it as part of a chunk that has both name and args,
|
238
|
+
# however the frontend may expect name first, then args, so to be
|
239
|
+
# safe we'll output name first in a separate chunk
|
240
|
+
if self.function_name_buffer:
|
241
|
+
|
242
|
+
# use_assisitant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
|
243
|
+
if self.use_assistant_message and self.function_name_buffer == self.assistant_message_tool_name:
|
244
|
+
|
245
|
+
# Store the ID of the tool call so allow skipping the corresponding response
|
246
|
+
if self.function_id_buffer:
|
247
|
+
self.prev_assistant_message_id = self.function_id_buffer
|
248
|
+
|
249
|
+
else:
|
250
|
+
if prev_message_type and prev_message_type != "tool_call_message":
|
251
|
+
message_index += 1
|
252
|
+
self.tool_call_name = str(self.function_name_buffer)
|
253
|
+
tool_call_msg = ToolCallMessage(
|
254
|
+
id=self.letta_message_id,
|
255
|
+
date=datetime.now(timezone.utc),
|
256
|
+
tool_call=ToolCallDelta(
|
257
|
+
name=self.function_name_buffer,
|
258
|
+
arguments=None,
|
259
|
+
tool_call_id=self.function_id_buffer,
|
260
|
+
),
|
261
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
262
|
+
)
|
263
|
+
prev_message_type = tool_call_msg.message_type
|
264
|
+
yield tool_call_msg
|
265
|
+
|
266
|
+
# Record what the last function name we flushed was
|
267
|
+
self.last_flushed_function_name = self.function_name_buffer
|
268
|
+
if self.last_flushed_function_id is None:
|
269
|
+
self.last_flushed_function_id = self.function_id_buffer
|
270
|
+
# Clear the buffer
|
271
|
+
self.function_name_buffer = None
|
272
|
+
self.function_id_buffer = None
|
273
|
+
# Since we're clearing the name buffer, we should store
|
274
|
+
# any updates to the arguments inside a separate buffer
|
275
|
+
|
276
|
+
# Add any main_json updates to the arguments buffer
|
277
|
+
if self.function_args_buffer is None:
|
278
|
+
self.function_args_buffer = updates_main_json
|
279
|
+
else:
|
280
|
+
self.function_args_buffer += updates_main_json
|
281
|
+
|
282
|
+
# If there was nothing in the name buffer, we can proceed to
|
283
|
+
# output the arguments chunk as a ToolCallMessage
|
284
|
+
else:
|
285
|
+
# use_assistant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
|
286
|
+
if self.use_assistant_message and (
|
287
|
+
self.last_flushed_function_name is not None
|
288
|
+
and self.last_flushed_function_name == self.assistant_message_tool_name
|
289
|
+
):
|
290
|
+
# do an additional parse on the updates_main_json
|
291
|
+
if self.function_args_buffer:
|
292
|
+
updates_main_json = self.function_args_buffer + updates_main_json
|
293
|
+
self.function_args_buffer = None
|
294
|
+
|
295
|
+
# Pretty gross hardcoding that assumes that if we're toggling into the keywords, we have the full prefix
|
296
|
+
match_str = '{"' + self.assistant_message_tool_kwarg + '":"'
|
297
|
+
if updates_main_json == match_str:
|
298
|
+
updates_main_json = None
|
299
|
+
|
300
|
+
else:
|
301
|
+
# Some hardcoding to strip off the trailing "}"
|
302
|
+
if updates_main_json in ["}", '"}']:
|
303
|
+
updates_main_json = None
|
304
|
+
if updates_main_json and len(updates_main_json) > 0 and updates_main_json[-1:] == '"':
|
305
|
+
updates_main_json = updates_main_json[:-1]
|
306
|
+
|
307
|
+
if not updates_main_json:
|
308
|
+
# early exit to turn into content mode
|
309
|
+
pass
|
310
|
+
|
311
|
+
# There may be a buffer from a previous chunk, for example
|
312
|
+
# if the previous chunk had arguments but we needed to flush name
|
313
|
+
if self.function_args_buffer:
|
314
|
+
# In this case, we should release the buffer + new data at once
|
315
|
+
combined_chunk = self.function_args_buffer + updates_main_json
|
316
|
+
|
317
|
+
if prev_message_type and prev_message_type != "assistant_message":
|
318
|
+
message_index += 1
|
319
|
+
assistant_message = AssistantMessage(
|
320
|
+
id=self.letta_message_id,
|
321
|
+
date=datetime.now(timezone.utc),
|
322
|
+
content=combined_chunk,
|
323
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
324
|
+
)
|
325
|
+
prev_message_type = assistant_message.message_type
|
326
|
+
yield assistant_message
|
327
|
+
# Store the ID of the tool call so allow skipping the corresponding response
|
328
|
+
if self.function_id_buffer:
|
329
|
+
self.prev_assistant_message_id = self.function_id_buffer
|
330
|
+
# clear buffer
|
331
|
+
self.function_args_buffer = None
|
332
|
+
self.function_id_buffer = None
|
333
|
+
|
334
|
+
else:
|
335
|
+
# If there's no buffer to clear, just output a new chunk with new data
|
336
|
+
# TODO: THIS IS HORRIBLE
|
337
|
+
# TODO: WE USE THE OLD JSON PARSER EARLIER (WHICH DOES NOTHING) AND NOW THE NEW JSON PARSER
|
338
|
+
# TODO: THIS IS TOTALLY WRONG AND BAD, BUT SAVING FOR A LARGER REWRITE IN THE NEAR FUTURE
|
339
|
+
parsed_args = self.optimistic_json_parser.parse(self.current_function_arguments)
|
340
|
+
|
341
|
+
if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
|
342
|
+
self.assistant_message_tool_kwarg
|
343
|
+
) != self.current_json_parse_result.get(self.assistant_message_tool_kwarg):
|
344
|
+
new_content = parsed_args.get(self.assistant_message_tool_kwarg)
|
345
|
+
prev_content = self.current_json_parse_result.get(self.assistant_message_tool_kwarg, "")
|
346
|
+
# TODO: Assumes consistent state and that prev_content is subset of new_content
|
347
|
+
diff = new_content.replace(prev_content, "", 1)
|
348
|
+
self.current_json_parse_result = parsed_args
|
349
|
+
if prev_message_type and prev_message_type != "assistant_message":
|
350
|
+
message_index += 1
|
351
|
+
assistant_message = AssistantMessage(
|
352
|
+
id=self.letta_message_id,
|
353
|
+
date=datetime.now(timezone.utc),
|
354
|
+
content=diff,
|
355
|
+
# name=name,
|
356
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
357
|
+
)
|
358
|
+
prev_message_type = assistant_message.message_type
|
359
|
+
yield assistant_message
|
360
|
+
|
361
|
+
# Store the ID of the tool call so allow skipping the corresponding response
|
362
|
+
if self.function_id_buffer:
|
363
|
+
self.prev_assistant_message_id = self.function_id_buffer
|
364
|
+
# clear buffers
|
365
|
+
self.function_id_buffer = None
|
366
|
+
else:
|
367
|
+
|
368
|
+
# There may be a buffer from a previous chunk, for example
|
369
|
+
# if the previous chunk had arguments but we needed to flush name
|
370
|
+
if self.function_args_buffer:
|
371
|
+
# In this case, we should release the buffer + new data at once
|
372
|
+
combined_chunk = self.function_args_buffer + updates_main_json
|
373
|
+
if prev_message_type and prev_message_type != "tool_call_message":
|
374
|
+
message_index += 1
|
375
|
+
tool_call_msg = ToolCallMessage(
|
376
|
+
id=self.letta_message_id,
|
377
|
+
date=datetime.now(timezone.utc),
|
378
|
+
tool_call=ToolCallDelta(
|
379
|
+
name=self.function_name_buffer,
|
380
|
+
arguments=combined_chunk,
|
381
|
+
tool_call_id=self.function_id_buffer,
|
382
|
+
),
|
383
|
+
# name=name,
|
384
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
385
|
+
)
|
386
|
+
prev_message_type = tool_call_msg.message_type
|
387
|
+
yield tool_call_msg
|
388
|
+
# clear buffer
|
389
|
+
self.function_args_buffer = None
|
390
|
+
self.function_id_buffer = None
|
391
|
+
else:
|
392
|
+
# If there's no buffer to clear, just output a new chunk with new data
|
393
|
+
if prev_message_type and prev_message_type != "tool_call_message":
|
394
|
+
message_index += 1
|
395
|
+
tool_call_msg = ToolCallMessage(
|
396
|
+
id=self.letta_message_id,
|
397
|
+
date=datetime.now(timezone.utc),
|
398
|
+
tool_call=ToolCallDelta(
|
399
|
+
name=None,
|
400
|
+
arguments=updates_main_json,
|
401
|
+
tool_call_id=self.function_id_buffer,
|
402
|
+
),
|
403
|
+
# name=name,
|
404
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
405
|
+
)
|
406
|
+
prev_message_type = tool_call_msg.message_type
|
407
|
+
yield tool_call_msg
|
408
|
+
self.function_id_buffer = None
|
letta/llm_api/anthropic.py
CHANGED
@@ -55,19 +55,12 @@ BASE_URL = "https://api.anthropic.com/v1"
|
|
55
55
|
# https://docs.anthropic.com/claude/docs/models-overview
|
56
56
|
# Sadly hardcoded
|
57
57
|
MODEL_LIST = [
|
58
|
+
## Opus 4.1
|
58
59
|
{
|
59
|
-
"name": "claude-opus-4-
|
60
|
-
"context_window": 200000,
|
61
|
-
},
|
62
|
-
{
|
63
|
-
"name": "claude-sonnet-4-20250514",
|
64
|
-
"context_window": 200000,
|
65
|
-
},
|
66
|
-
{
|
67
|
-
"name": "claude-3-5-haiku-20241022",
|
60
|
+
"name": "claude-opus-4-1-20250805",
|
68
61
|
"context_window": 200000,
|
69
62
|
},
|
70
|
-
## Opus
|
63
|
+
## Opus 3
|
71
64
|
{
|
72
65
|
"name": "claude-3-opus-20240229",
|
73
66
|
"context_window": 200000,
|
letta/llm_api/openai_client.py
CHANGED
@@ -49,6 +49,11 @@ def is_openai_reasoning_model(model: str) -> bool:
|
|
49
49
|
return is_reasoning
|
50
50
|
|
51
51
|
|
52
|
+
def is_openai_5_model(model: str) -> bool:
|
53
|
+
"""Utility function to check if the model is a '5' model"""
|
54
|
+
return model.startswith("gpt-5")
|
55
|
+
|
56
|
+
|
52
57
|
def accepts_developer_role(model: str) -> bool:
|
53
58
|
"""Checks if the model accepts the 'developer' role. Note that not all reasoning models accept this role.
|
54
59
|
|
@@ -65,7 +70,7 @@ def supports_temperature_param(model: str) -> bool:
|
|
65
70
|
|
66
71
|
Example error: 400 - {'error': {'message': "Unsupported parameter: 'temperature' is not supported with this model.", 'type': 'invalid_request_error', 'param': 'temperature', 'code': 'unsupported_parameter'}}
|
67
72
|
"""
|
68
|
-
if is_openai_reasoning_model(model):
|
73
|
+
if is_openai_reasoning_model(model) or is_openai_5_model(model):
|
69
74
|
return False
|
70
75
|
else:
|
71
76
|
return True
|
letta/orm/__init__.py
CHANGED
@@ -29,6 +29,7 @@ from letta.orm.sandbox_config import AgentEnvironmentVariable, SandboxConfig, Sa
|
|
29
29
|
from letta.orm.source import Source
|
30
30
|
from letta.orm.sources_agents import SourcesAgents
|
31
31
|
from letta.orm.step import Step
|
32
|
+
from letta.orm.step_metrics import StepMetrics
|
32
33
|
from letta.orm.tool import Tool
|
33
34
|
from letta.orm.tools_agents import ToolsAgents
|
34
35
|
from letta.orm.user import User
|
letta/orm/step.py
CHANGED
@@ -6,12 +6,16 @@ from sqlalchemy.orm import Mapped, mapped_column, relationship
|
|
6
6
|
|
7
7
|
from letta.orm.mixins import ProjectMixin
|
8
8
|
from letta.orm.sqlalchemy_base import SqlalchemyBase
|
9
|
+
from letta.schemas.enums import StepStatus
|
9
10
|
from letta.schemas.letta_stop_reason import StopReasonType
|
10
11
|
from letta.schemas.step import Step as PydanticStep
|
11
12
|
|
12
13
|
if TYPE_CHECKING:
|
13
14
|
from letta.orm.job import Job
|
15
|
+
from letta.orm.message import Message
|
16
|
+
from letta.orm.organization import Organization
|
14
17
|
from letta.orm.provider import Provider
|
18
|
+
from letta.orm.step_metrics import StepMetrics
|
15
19
|
|
16
20
|
|
17
21
|
class Step(SqlalchemyBase, ProjectMixin):
|
@@ -55,6 +59,13 @@ class Step(SqlalchemyBase, ProjectMixin):
|
|
55
59
|
None, nullable=True, doc="The feedback for this step. Must be either 'positive' or 'negative'."
|
56
60
|
)
|
57
61
|
|
62
|
+
# error handling
|
63
|
+
error_type: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The type/class of the error that occurred")
|
64
|
+
error_data: Mapped[Optional[Dict]] = mapped_column(
|
65
|
+
JSON, nullable=True, doc="Error details including message, traceback, and additional context"
|
66
|
+
)
|
67
|
+
status: Mapped[Optional[StepStatus]] = mapped_column(None, nullable=True, doc="Step status: pending, success, or failed")
|
68
|
+
|
58
69
|
# Relationships (foreign keys)
|
59
70
|
organization: Mapped[Optional["Organization"]] = relationship("Organization")
|
60
71
|
provider: Mapped[Optional["Provider"]] = relationship("Provider")
|
@@ -62,3 +73,6 @@ class Step(SqlalchemyBase, ProjectMixin):
|
|
62
73
|
|
63
74
|
# Relationships (backrefs)
|
64
75
|
messages: Mapped[List["Message"]] = relationship("Message", back_populates="step", cascade="save-update", lazy="noload")
|
76
|
+
metrics: Mapped[Optional["StepMetrics"]] = relationship(
|
77
|
+
"StepMetrics", back_populates="step", cascade="all, delete-orphan", lazy="noload", uselist=False
|
78
|
+
)
|