letta-nightly 0.11.0.dev20250807000848__py3-none-any.whl → 0.11.0.dev20250808055434__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. letta/agent.py +2 -1
  2. letta/agents/letta_agent.py +215 -143
  3. letta/functions/function_sets/base.py +2 -2
  4. letta/functions/function_sets/files.py +22 -9
  5. letta/interfaces/anthropic_streaming_interface.py +291 -265
  6. letta/interfaces/openai_streaming_interface.py +270 -250
  7. letta/llm_api/anthropic.py +3 -10
  8. letta/llm_api/openai_client.py +6 -1
  9. letta/orm/__init__.py +1 -0
  10. letta/orm/step.py +14 -0
  11. letta/orm/step_metrics.py +71 -0
  12. letta/schemas/enums.py +9 -0
  13. letta/schemas/llm_config.py +8 -6
  14. letta/schemas/providers/lmstudio.py +2 -2
  15. letta/schemas/providers/openai.py +1 -1
  16. letta/schemas/step.py +6 -0
  17. letta/schemas/step_metrics.py +23 -0
  18. letta/schemas/tool_rule.py +10 -29
  19. letta/services/step_manager.py +179 -1
  20. letta/services/tool_executor/builtin_tool_executor.py +4 -1
  21. letta/services/tool_executor/core_tool_executor.py +2 -10
  22. letta/services/tool_executor/files_tool_executor.py +89 -40
  23. {letta_nightly-0.11.0.dev20250807000848.dist-info → letta_nightly-0.11.0.dev20250808055434.dist-info}/METADATA +1 -1
  24. {letta_nightly-0.11.0.dev20250807000848.dist-info → letta_nightly-0.11.0.dev20250808055434.dist-info}/RECORD +27 -25
  25. {letta_nightly-0.11.0.dev20250807000848.dist-info → letta_nightly-0.11.0.dev20250808055434.dist-info}/LICENSE +0 -0
  26. {letta_nightly-0.11.0.dev20250807000848.dist-info → letta_nightly-0.11.0.dev20250808055434.dist-info}/WHEEL +0 -0
  27. {letta_nightly-0.11.0.dev20250807000848.dist-info → letta_nightly-0.11.0.dev20250808055434.dist-info}/entry_points.txt +0 -0
@@ -120,260 +120,34 @@ class OpenAIStreamingInterface:
120
120
  tool_dicts = [tool["function"] if isinstance(tool, dict) and "function" in tool else tool for tool in self.tools]
121
121
  self.fallback_input_tokens += num_tokens_from_functions(tool_dicts)
122
122
 
123
+ prev_message_type = None
124
+ message_index = 0
123
125
  try:
124
126
  async with stream:
125
- prev_message_type = None
126
- message_index = 0
127
127
  async for chunk in stream:
128
- if not self.model or not self.message_id:
129
- self.model = chunk.model
130
- self.message_id = chunk.id
131
-
132
- # track usage
133
- if chunk.usage:
134
- self.input_tokens += chunk.usage.prompt_tokens
135
- self.output_tokens += chunk.usage.completion_tokens
136
-
137
- if chunk.choices:
138
- choice = chunk.choices[0]
139
- message_delta = choice.delta
140
-
141
- if message_delta.tool_calls is not None and len(message_delta.tool_calls) > 0:
142
- tool_call = message_delta.tool_calls[0]
143
-
144
- if tool_call.function.name:
145
- # If we're waiting for the first key, then we should hold back the name
146
- # ie add it to a buffer instead of returning it as a chunk
147
- if self.function_name_buffer is None:
148
- self.function_name_buffer = tool_call.function.name
149
- else:
150
- self.function_name_buffer += tool_call.function.name
151
-
152
- if tool_call.id:
153
- # Buffer until next time
154
- if self.function_id_buffer is None:
155
- self.function_id_buffer = tool_call.id
156
- else:
157
- self.function_id_buffer += tool_call.id
158
-
159
- if tool_call.function.arguments:
160
- # updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
161
- self.current_function_arguments += tool_call.function.arguments
162
- updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(
163
- tool_call.function.arguments
164
- )
165
-
166
- if self.is_openai_proxy:
167
- self.fallback_output_tokens += count_tokens(tool_call.function.arguments)
128
+ try:
129
+ async for message in self._process_chunk(chunk, ttft_span, prev_message_type, message_index):
130
+ new_message_type = message.message_type
131
+ if new_message_type != prev_message_type:
132
+ if prev_message_type != None:
133
+ message_index += 1
134
+ prev_message_type = new_message_type
135
+ yield message
136
+ except asyncio.CancelledError as e:
137
+ import traceback
138
+
139
+ logger.info("Cancelled stream attempt but overriding %s: %s", e, traceback.format_exc())
140
+ async for message in self._process_chunk(chunk, ttft_span, prev_message_type, message_index):
141
+ new_message_type = message.message_type
142
+ if new_message_type != prev_message_type:
143
+ if prev_message_type != None:
144
+ message_index += 1
145
+ prev_message_type = new_message_type
146
+ yield message
147
+
148
+ # Don't raise the exception here
149
+ continue
168
150
 
169
- # If we have inner thoughts, we should output them as a chunk
170
- if updates_inner_thoughts:
171
- if prev_message_type and prev_message_type != "reasoning_message":
172
- message_index += 1
173
- self.reasoning_messages.append(updates_inner_thoughts)
174
- reasoning_message = ReasoningMessage(
175
- id=self.letta_message_id,
176
- date=datetime.now(timezone.utc),
177
- reasoning=updates_inner_thoughts,
178
- # name=name,
179
- otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
180
- )
181
- prev_message_type = reasoning_message.message_type
182
- yield reasoning_message
183
-
184
- # Additionally inner thoughts may stream back with a chunk of main JSON
185
- # In that case, since we can only return a chunk at a time, we should buffer it
186
- if updates_main_json:
187
- if self.function_args_buffer is None:
188
- self.function_args_buffer = updates_main_json
189
- else:
190
- self.function_args_buffer += updates_main_json
191
-
192
- # If we have main_json, we should output a ToolCallMessage
193
- elif updates_main_json:
194
-
195
- # If there's something in the function_name buffer, we should release it first
196
- # NOTE: we could output it as part of a chunk that has both name and args,
197
- # however the frontend may expect name first, then args, so to be
198
- # safe we'll output name first in a separate chunk
199
- if self.function_name_buffer:
200
-
201
- # use_assisitant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
202
- if self.use_assistant_message and self.function_name_buffer == self.assistant_message_tool_name:
203
-
204
- # Store the ID of the tool call so allow skipping the corresponding response
205
- if self.function_id_buffer:
206
- self.prev_assistant_message_id = self.function_id_buffer
207
-
208
- else:
209
- if prev_message_type and prev_message_type != "tool_call_message":
210
- message_index += 1
211
- self.tool_call_name = str(self.function_name_buffer)
212
- tool_call_msg = ToolCallMessage(
213
- id=self.letta_message_id,
214
- date=datetime.now(timezone.utc),
215
- tool_call=ToolCallDelta(
216
- name=self.function_name_buffer,
217
- arguments=None,
218
- tool_call_id=self.function_id_buffer,
219
- ),
220
- otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
221
- )
222
- prev_message_type = tool_call_msg.message_type
223
- yield tool_call_msg
224
-
225
- # Record what the last function name we flushed was
226
- self.last_flushed_function_name = self.function_name_buffer
227
- if self.last_flushed_function_id is None:
228
- self.last_flushed_function_id = self.function_id_buffer
229
- # Clear the buffer
230
- self.function_name_buffer = None
231
- self.function_id_buffer = None
232
- # Since we're clearing the name buffer, we should store
233
- # any updates to the arguments inside a separate buffer
234
-
235
- # Add any main_json updates to the arguments buffer
236
- if self.function_args_buffer is None:
237
- self.function_args_buffer = updates_main_json
238
- else:
239
- self.function_args_buffer += updates_main_json
240
-
241
- # If there was nothing in the name buffer, we can proceed to
242
- # output the arguments chunk as a ToolCallMessage
243
- else:
244
- # use_assistant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
245
- if self.use_assistant_message and (
246
- self.last_flushed_function_name is not None
247
- and self.last_flushed_function_name == self.assistant_message_tool_name
248
- ):
249
- # do an additional parse on the updates_main_json
250
- if self.function_args_buffer:
251
- updates_main_json = self.function_args_buffer + updates_main_json
252
- self.function_args_buffer = None
253
-
254
- # Pretty gross hardcoding that assumes that if we're toggling into the keywords, we have the full prefix
255
- match_str = '{"' + self.assistant_message_tool_kwarg + '":"'
256
- if updates_main_json == match_str:
257
- updates_main_json = None
258
-
259
- else:
260
- # Some hardcoding to strip off the trailing "}"
261
- if updates_main_json in ["}", '"}']:
262
- updates_main_json = None
263
- if updates_main_json and len(updates_main_json) > 0 and updates_main_json[-1:] == '"':
264
- updates_main_json = updates_main_json[:-1]
265
-
266
- if not updates_main_json:
267
- # early exit to turn into content mode
268
- continue
269
-
270
- # There may be a buffer from a previous chunk, for example
271
- # if the previous chunk had arguments but we needed to flush name
272
- if self.function_args_buffer:
273
- # In this case, we should release the buffer + new data at once
274
- combined_chunk = self.function_args_buffer + updates_main_json
275
-
276
- if prev_message_type and prev_message_type != "assistant_message":
277
- message_index += 1
278
- assistant_message = AssistantMessage(
279
- id=self.letta_message_id,
280
- date=datetime.now(timezone.utc),
281
- content=combined_chunk,
282
- otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
283
- )
284
- prev_message_type = assistant_message.message_type
285
- yield assistant_message
286
- # Store the ID of the tool call so allow skipping the corresponding response
287
- if self.function_id_buffer:
288
- self.prev_assistant_message_id = self.function_id_buffer
289
- # clear buffer
290
- self.function_args_buffer = None
291
- self.function_id_buffer = None
292
-
293
- else:
294
- # If there's no buffer to clear, just output a new chunk with new data
295
- # TODO: THIS IS HORRIBLE
296
- # TODO: WE USE THE OLD JSON PARSER EARLIER (WHICH DOES NOTHING) AND NOW THE NEW JSON PARSER
297
- # TODO: THIS IS TOTALLY WRONG AND BAD, BUT SAVING FOR A LARGER REWRITE IN THE NEAR FUTURE
298
- parsed_args = self.optimistic_json_parser.parse(self.current_function_arguments)
299
-
300
- if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
301
- self.assistant_message_tool_kwarg
302
- ) != self.current_json_parse_result.get(self.assistant_message_tool_kwarg):
303
- new_content = parsed_args.get(self.assistant_message_tool_kwarg)
304
- prev_content = self.current_json_parse_result.get(self.assistant_message_tool_kwarg, "")
305
- # TODO: Assumes consistent state and that prev_content is subset of new_content
306
- diff = new_content.replace(prev_content, "", 1)
307
- self.current_json_parse_result = parsed_args
308
- if prev_message_type and prev_message_type != "assistant_message":
309
- message_index += 1
310
- assistant_message = AssistantMessage(
311
- id=self.letta_message_id,
312
- date=datetime.now(timezone.utc),
313
- content=diff,
314
- # name=name,
315
- otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
316
- )
317
- prev_message_type = assistant_message.message_type
318
- yield assistant_message
319
-
320
- # Store the ID of the tool call so allow skipping the corresponding response
321
- if self.function_id_buffer:
322
- self.prev_assistant_message_id = self.function_id_buffer
323
- # clear buffers
324
- self.function_id_buffer = None
325
- else:
326
-
327
- # There may be a buffer from a previous chunk, for example
328
- # if the previous chunk had arguments but we needed to flush name
329
- if self.function_args_buffer:
330
- # In this case, we should release the buffer + new data at once
331
- combined_chunk = self.function_args_buffer + updates_main_json
332
- if prev_message_type and prev_message_type != "tool_call_message":
333
- message_index += 1
334
- tool_call_msg = ToolCallMessage(
335
- id=self.letta_message_id,
336
- date=datetime.now(timezone.utc),
337
- tool_call=ToolCallDelta(
338
- name=self.function_name_buffer,
339
- arguments=combined_chunk,
340
- tool_call_id=self.function_id_buffer,
341
- ),
342
- # name=name,
343
- otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
344
- )
345
- prev_message_type = tool_call_msg.message_type
346
- yield tool_call_msg
347
- # clear buffer
348
- self.function_args_buffer = None
349
- self.function_id_buffer = None
350
- else:
351
- # If there's no buffer to clear, just output a new chunk with new data
352
- if prev_message_type and prev_message_type != "tool_call_message":
353
- message_index += 1
354
- tool_call_msg = ToolCallMessage(
355
- id=self.letta_message_id,
356
- date=datetime.now(timezone.utc),
357
- tool_call=ToolCallDelta(
358
- name=None,
359
- arguments=updates_main_json,
360
- tool_call_id=self.function_id_buffer,
361
- ),
362
- # name=name,
363
- otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
364
- )
365
- prev_message_type = tool_call_msg.message_type
366
- yield tool_call_msg
367
- self.function_id_buffer = None
368
- except asyncio.CancelledError as e:
369
- import traceback
370
-
371
- logger.error("Cancelled stream %s: %s", e, traceback.format_exc())
372
- ttft_span.add_event(
373
- name="stop_reason",
374
- attributes={"stop_reason": StopReasonType.cancelled.value, "error": str(e), "stacktrace": traceback.format_exc()},
375
- )
376
- raise e
377
151
  except Exception as e:
378
152
  import traceback
379
153
 
@@ -386,3 +160,249 @@ class OpenAIStreamingInterface:
386
160
  raise e
387
161
  finally:
388
162
  logger.info("OpenAIStreamingInterface: Stream processing complete.")
163
+
164
+ async def _process_chunk(
165
+ self,
166
+ chunk: ChatCompletionChunk,
167
+ ttft_span: Optional["Span"] = None,
168
+ prev_message_type: Optional[str] = None,
169
+ message_index: int = 0,
170
+ ) -> AsyncGenerator[LettaMessage | LettaStopReason, None]:
171
+ if not self.model or not self.message_id:
172
+ self.model = chunk.model
173
+ self.message_id = chunk.id
174
+
175
+ # track usage
176
+ if chunk.usage:
177
+ self.input_tokens += chunk.usage.prompt_tokens
178
+ self.output_tokens += chunk.usage.completion_tokens
179
+
180
+ if chunk.choices:
181
+ choice = chunk.choices[0]
182
+ message_delta = choice.delta
183
+
184
+ if message_delta.tool_calls is not None and len(message_delta.tool_calls) > 0:
185
+ tool_call = message_delta.tool_calls[0]
186
+
187
+ if tool_call.function.name:
188
+ # If we're waiting for the first key, then we should hold back the name
189
+ # ie add it to a buffer instead of returning it as a chunk
190
+ if self.function_name_buffer is None:
191
+ self.function_name_buffer = tool_call.function.name
192
+ else:
193
+ self.function_name_buffer += tool_call.function.name
194
+
195
+ if tool_call.id:
196
+ # Buffer until next time
197
+ if self.function_id_buffer is None:
198
+ self.function_id_buffer = tool_call.id
199
+ else:
200
+ self.function_id_buffer += tool_call.id
201
+
202
+ if tool_call.function.arguments:
203
+ # updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
204
+ self.current_function_arguments += tool_call.function.arguments
205
+ updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
206
+
207
+ if self.is_openai_proxy:
208
+ self.fallback_output_tokens += count_tokens(tool_call.function.arguments)
209
+
210
+ # If we have inner thoughts, we should output them as a chunk
211
+ if updates_inner_thoughts:
212
+ if prev_message_type and prev_message_type != "reasoning_message":
213
+ message_index += 1
214
+ self.reasoning_messages.append(updates_inner_thoughts)
215
+ reasoning_message = ReasoningMessage(
216
+ id=self.letta_message_id,
217
+ date=datetime.now(timezone.utc),
218
+ reasoning=updates_inner_thoughts,
219
+ # name=name,
220
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
221
+ )
222
+ prev_message_type = reasoning_message.message_type
223
+ yield reasoning_message
224
+
225
+ # Additionally inner thoughts may stream back with a chunk of main JSON
226
+ # In that case, since we can only return a chunk at a time, we should buffer it
227
+ if updates_main_json:
228
+ if self.function_args_buffer is None:
229
+ self.function_args_buffer = updates_main_json
230
+ else:
231
+ self.function_args_buffer += updates_main_json
232
+
233
+ # If we have main_json, we should output a ToolCallMessage
234
+ elif updates_main_json:
235
+
236
+ # If there's something in the function_name buffer, we should release it first
237
+ # NOTE: we could output it as part of a chunk that has both name and args,
238
+ # however the frontend may expect name first, then args, so to be
239
+ # safe we'll output name first in a separate chunk
240
+ if self.function_name_buffer:
241
+
242
+ # use_assisitant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
243
+ if self.use_assistant_message and self.function_name_buffer == self.assistant_message_tool_name:
244
+
245
+ # Store the ID of the tool call so allow skipping the corresponding response
246
+ if self.function_id_buffer:
247
+ self.prev_assistant_message_id = self.function_id_buffer
248
+
249
+ else:
250
+ if prev_message_type and prev_message_type != "tool_call_message":
251
+ message_index += 1
252
+ self.tool_call_name = str(self.function_name_buffer)
253
+ tool_call_msg = ToolCallMessage(
254
+ id=self.letta_message_id,
255
+ date=datetime.now(timezone.utc),
256
+ tool_call=ToolCallDelta(
257
+ name=self.function_name_buffer,
258
+ arguments=None,
259
+ tool_call_id=self.function_id_buffer,
260
+ ),
261
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
262
+ )
263
+ prev_message_type = tool_call_msg.message_type
264
+ yield tool_call_msg
265
+
266
+ # Record what the last function name we flushed was
267
+ self.last_flushed_function_name = self.function_name_buffer
268
+ if self.last_flushed_function_id is None:
269
+ self.last_flushed_function_id = self.function_id_buffer
270
+ # Clear the buffer
271
+ self.function_name_buffer = None
272
+ self.function_id_buffer = None
273
+ # Since we're clearing the name buffer, we should store
274
+ # any updates to the arguments inside a separate buffer
275
+
276
+ # Add any main_json updates to the arguments buffer
277
+ if self.function_args_buffer is None:
278
+ self.function_args_buffer = updates_main_json
279
+ else:
280
+ self.function_args_buffer += updates_main_json
281
+
282
+ # If there was nothing in the name buffer, we can proceed to
283
+ # output the arguments chunk as a ToolCallMessage
284
+ else:
285
+ # use_assistant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
286
+ if self.use_assistant_message and (
287
+ self.last_flushed_function_name is not None
288
+ and self.last_flushed_function_name == self.assistant_message_tool_name
289
+ ):
290
+ # do an additional parse on the updates_main_json
291
+ if self.function_args_buffer:
292
+ updates_main_json = self.function_args_buffer + updates_main_json
293
+ self.function_args_buffer = None
294
+
295
+ # Pretty gross hardcoding that assumes that if we're toggling into the keywords, we have the full prefix
296
+ match_str = '{"' + self.assistant_message_tool_kwarg + '":"'
297
+ if updates_main_json == match_str:
298
+ updates_main_json = None
299
+
300
+ else:
301
+ # Some hardcoding to strip off the trailing "}"
302
+ if updates_main_json in ["}", '"}']:
303
+ updates_main_json = None
304
+ if updates_main_json and len(updates_main_json) > 0 and updates_main_json[-1:] == '"':
305
+ updates_main_json = updates_main_json[:-1]
306
+
307
+ if not updates_main_json:
308
+ # early exit to turn into content mode
309
+ pass
310
+
311
+ # There may be a buffer from a previous chunk, for example
312
+ # if the previous chunk had arguments but we needed to flush name
313
+ if self.function_args_buffer:
314
+ # In this case, we should release the buffer + new data at once
315
+ combined_chunk = self.function_args_buffer + updates_main_json
316
+
317
+ if prev_message_type and prev_message_type != "assistant_message":
318
+ message_index += 1
319
+ assistant_message = AssistantMessage(
320
+ id=self.letta_message_id,
321
+ date=datetime.now(timezone.utc),
322
+ content=combined_chunk,
323
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
324
+ )
325
+ prev_message_type = assistant_message.message_type
326
+ yield assistant_message
327
+ # Store the ID of the tool call so allow skipping the corresponding response
328
+ if self.function_id_buffer:
329
+ self.prev_assistant_message_id = self.function_id_buffer
330
+ # clear buffer
331
+ self.function_args_buffer = None
332
+ self.function_id_buffer = None
333
+
334
+ else:
335
+ # If there's no buffer to clear, just output a new chunk with new data
336
+ # TODO: THIS IS HORRIBLE
337
+ # TODO: WE USE THE OLD JSON PARSER EARLIER (WHICH DOES NOTHING) AND NOW THE NEW JSON PARSER
338
+ # TODO: THIS IS TOTALLY WRONG AND BAD, BUT SAVING FOR A LARGER REWRITE IN THE NEAR FUTURE
339
+ parsed_args = self.optimistic_json_parser.parse(self.current_function_arguments)
340
+
341
+ if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
342
+ self.assistant_message_tool_kwarg
343
+ ) != self.current_json_parse_result.get(self.assistant_message_tool_kwarg):
344
+ new_content = parsed_args.get(self.assistant_message_tool_kwarg)
345
+ prev_content = self.current_json_parse_result.get(self.assistant_message_tool_kwarg, "")
346
+ # TODO: Assumes consistent state and that prev_content is subset of new_content
347
+ diff = new_content.replace(prev_content, "", 1)
348
+ self.current_json_parse_result = parsed_args
349
+ if prev_message_type and prev_message_type != "assistant_message":
350
+ message_index += 1
351
+ assistant_message = AssistantMessage(
352
+ id=self.letta_message_id,
353
+ date=datetime.now(timezone.utc),
354
+ content=diff,
355
+ # name=name,
356
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
357
+ )
358
+ prev_message_type = assistant_message.message_type
359
+ yield assistant_message
360
+
361
+ # Store the ID of the tool call so allow skipping the corresponding response
362
+ if self.function_id_buffer:
363
+ self.prev_assistant_message_id = self.function_id_buffer
364
+ # clear buffers
365
+ self.function_id_buffer = None
366
+ else:
367
+
368
+ # There may be a buffer from a previous chunk, for example
369
+ # if the previous chunk had arguments but we needed to flush name
370
+ if self.function_args_buffer:
371
+ # In this case, we should release the buffer + new data at once
372
+ combined_chunk = self.function_args_buffer + updates_main_json
373
+ if prev_message_type and prev_message_type != "tool_call_message":
374
+ message_index += 1
375
+ tool_call_msg = ToolCallMessage(
376
+ id=self.letta_message_id,
377
+ date=datetime.now(timezone.utc),
378
+ tool_call=ToolCallDelta(
379
+ name=self.function_name_buffer,
380
+ arguments=combined_chunk,
381
+ tool_call_id=self.function_id_buffer,
382
+ ),
383
+ # name=name,
384
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
385
+ )
386
+ prev_message_type = tool_call_msg.message_type
387
+ yield tool_call_msg
388
+ # clear buffer
389
+ self.function_args_buffer = None
390
+ self.function_id_buffer = None
391
+ else:
392
+ # If there's no buffer to clear, just output a new chunk with new data
393
+ if prev_message_type and prev_message_type != "tool_call_message":
394
+ message_index += 1
395
+ tool_call_msg = ToolCallMessage(
396
+ id=self.letta_message_id,
397
+ date=datetime.now(timezone.utc),
398
+ tool_call=ToolCallDelta(
399
+ name=None,
400
+ arguments=updates_main_json,
401
+ tool_call_id=self.function_id_buffer,
402
+ ),
403
+ # name=name,
404
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
405
+ )
406
+ prev_message_type = tool_call_msg.message_type
407
+ yield tool_call_msg
408
+ self.function_id_buffer = None
@@ -55,19 +55,12 @@ BASE_URL = "https://api.anthropic.com/v1"
55
55
  # https://docs.anthropic.com/claude/docs/models-overview
56
56
  # Sadly hardcoded
57
57
  MODEL_LIST = [
58
+ ## Opus 4.1
58
59
  {
59
- "name": "claude-opus-4-20250514",
60
- "context_window": 200000,
61
- },
62
- {
63
- "name": "claude-sonnet-4-20250514",
64
- "context_window": 200000,
65
- },
66
- {
67
- "name": "claude-3-5-haiku-20241022",
60
+ "name": "claude-opus-4-1-20250805",
68
61
  "context_window": 200000,
69
62
  },
70
- ## Opus
63
+ ## Opus 3
71
64
  {
72
65
  "name": "claude-3-opus-20240229",
73
66
  "context_window": 200000,
@@ -49,6 +49,11 @@ def is_openai_reasoning_model(model: str) -> bool:
49
49
  return is_reasoning
50
50
 
51
51
 
52
+ def is_openai_5_model(model: str) -> bool:
53
+ """Utility function to check if the model is a '5' model"""
54
+ return model.startswith("gpt-5")
55
+
56
+
52
57
  def accepts_developer_role(model: str) -> bool:
53
58
  """Checks if the model accepts the 'developer' role. Note that not all reasoning models accept this role.
54
59
 
@@ -65,7 +70,7 @@ def supports_temperature_param(model: str) -> bool:
65
70
 
66
71
  Example error: 400 - {'error': {'message': "Unsupported parameter: 'temperature' is not supported with this model.", 'type': 'invalid_request_error', 'param': 'temperature', 'code': 'unsupported_parameter'}}
67
72
  """
68
- if is_openai_reasoning_model(model):
73
+ if is_openai_reasoning_model(model) or is_openai_5_model(model):
69
74
  return False
70
75
  else:
71
76
  return True
letta/orm/__init__.py CHANGED
@@ -29,6 +29,7 @@ from letta.orm.sandbox_config import AgentEnvironmentVariable, SandboxConfig, Sa
29
29
  from letta.orm.source import Source
30
30
  from letta.orm.sources_agents import SourcesAgents
31
31
  from letta.orm.step import Step
32
+ from letta.orm.step_metrics import StepMetrics
32
33
  from letta.orm.tool import Tool
33
34
  from letta.orm.tools_agents import ToolsAgents
34
35
  from letta.orm.user import User
letta/orm/step.py CHANGED
@@ -6,12 +6,16 @@ from sqlalchemy.orm import Mapped, mapped_column, relationship
6
6
 
7
7
  from letta.orm.mixins import ProjectMixin
8
8
  from letta.orm.sqlalchemy_base import SqlalchemyBase
9
+ from letta.schemas.enums import StepStatus
9
10
  from letta.schemas.letta_stop_reason import StopReasonType
10
11
  from letta.schemas.step import Step as PydanticStep
11
12
 
12
13
  if TYPE_CHECKING:
13
14
  from letta.orm.job import Job
15
+ from letta.orm.message import Message
16
+ from letta.orm.organization import Organization
14
17
  from letta.orm.provider import Provider
18
+ from letta.orm.step_metrics import StepMetrics
15
19
 
16
20
 
17
21
  class Step(SqlalchemyBase, ProjectMixin):
@@ -55,6 +59,13 @@ class Step(SqlalchemyBase, ProjectMixin):
55
59
  None, nullable=True, doc="The feedback for this step. Must be either 'positive' or 'negative'."
56
60
  )
57
61
 
62
+ # error handling
63
+ error_type: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The type/class of the error that occurred")
64
+ error_data: Mapped[Optional[Dict]] = mapped_column(
65
+ JSON, nullable=True, doc="Error details including message, traceback, and additional context"
66
+ )
67
+ status: Mapped[Optional[StepStatus]] = mapped_column(None, nullable=True, doc="Step status: pending, success, or failed")
68
+
58
69
  # Relationships (foreign keys)
59
70
  organization: Mapped[Optional["Organization"]] = relationship("Organization")
60
71
  provider: Mapped[Optional["Provider"]] = relationship("Provider")
@@ -62,3 +73,6 @@ class Step(SqlalchemyBase, ProjectMixin):
62
73
 
63
74
  # Relationships (backrefs)
64
75
  messages: Mapped[List["Message"]] = relationship("Message", back_populates="step", cascade="save-update", lazy="noload")
76
+ metrics: Mapped[Optional["StepMetrics"]] = relationship(
77
+ "StepMetrics", back_populates="step", cascade="all, delete-orphan", lazy="noload", uselist=False
78
+ )