letta-nightly 0.11.0.dev20250807000848__py3-none-any.whl → 0.11.0.dev20250808055434__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. letta/agent.py +2 -1
  2. letta/agents/letta_agent.py +215 -143
  3. letta/functions/function_sets/base.py +2 -2
  4. letta/functions/function_sets/files.py +22 -9
  5. letta/interfaces/anthropic_streaming_interface.py +291 -265
  6. letta/interfaces/openai_streaming_interface.py +270 -250
  7. letta/llm_api/anthropic.py +3 -10
  8. letta/llm_api/openai_client.py +6 -1
  9. letta/orm/__init__.py +1 -0
  10. letta/orm/step.py +14 -0
  11. letta/orm/step_metrics.py +71 -0
  12. letta/schemas/enums.py +9 -0
  13. letta/schemas/llm_config.py +8 -6
  14. letta/schemas/providers/lmstudio.py +2 -2
  15. letta/schemas/providers/openai.py +1 -1
  16. letta/schemas/step.py +6 -0
  17. letta/schemas/step_metrics.py +23 -0
  18. letta/schemas/tool_rule.py +10 -29
  19. letta/services/step_manager.py +179 -1
  20. letta/services/tool_executor/builtin_tool_executor.py +4 -1
  21. letta/services/tool_executor/core_tool_executor.py +2 -10
  22. letta/services/tool_executor/files_tool_executor.py +89 -40
  23. {letta_nightly-0.11.0.dev20250807000848.dist-info → letta_nightly-0.11.0.dev20250808055434.dist-info}/METADATA +1 -1
  24. {letta_nightly-0.11.0.dev20250807000848.dist-info → letta_nightly-0.11.0.dev20250808055434.dist-info}/RECORD +27 -25
  25. {letta_nightly-0.11.0.dev20250807000848.dist-info → letta_nightly-0.11.0.dev20250808055434.dist-info}/LICENSE +0 -0
  26. {letta_nightly-0.11.0.dev20250807000848.dist-info → letta_nightly-0.11.0.dev20250808055434.dist-info}/WHEEL +0 -0
  27. {letta_nightly-0.11.0.dev20250807000848.dist-info → letta_nightly-0.11.0.dev20250808055434.dist-info}/entry_points.txt +0 -0
@@ -126,271 +126,6 @@ class AnthropicStreamingInterface:
126
126
  logger.error("Error checking inner thoughts: %s", e)
127
127
  raise
128
128
 
129
- async def process(
130
- self,
131
- stream: AsyncStream[BetaRawMessageStreamEvent],
132
- ttft_span: Optional["Span"] = None,
133
- ) -> AsyncGenerator[LettaMessage | LettaStopReason, None]:
134
- prev_message_type = None
135
- message_index = 0
136
- try:
137
- async with stream:
138
- async for event in stream:
139
- # TODO: Support BetaThinkingBlock, BetaRedactedThinkingBlock
140
- if isinstance(event, BetaRawContentBlockStartEvent):
141
- content = event.content_block
142
-
143
- if isinstance(content, BetaTextBlock):
144
- self.anthropic_mode = EventMode.TEXT
145
- # TODO: Can capture citations, etc.
146
- elif isinstance(content, BetaToolUseBlock):
147
- self.anthropic_mode = EventMode.TOOL_USE
148
- self.tool_call_id = content.id
149
- self.tool_call_name = content.name
150
- self.inner_thoughts_complete = False
151
-
152
- if not self.use_assistant_message:
153
- # Buffer the initial tool call message instead of yielding immediately
154
- tool_call_msg = ToolCallMessage(
155
- id=self.letta_message_id,
156
- tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id),
157
- date=datetime.now(timezone.utc).isoformat(),
158
- )
159
- self.tool_call_buffer.append(tool_call_msg)
160
- elif isinstance(content, BetaThinkingBlock):
161
- self.anthropic_mode = EventMode.THINKING
162
- # TODO: Can capture signature, etc.
163
- elif isinstance(content, BetaRedactedThinkingBlock):
164
- self.anthropic_mode = EventMode.REDACTED_THINKING
165
- if prev_message_type and prev_message_type != "hidden_reasoning_message":
166
- message_index += 1
167
- hidden_reasoning_message = HiddenReasoningMessage(
168
- id=self.letta_message_id,
169
- state="redacted",
170
- hidden_reasoning=content.data,
171
- date=datetime.now(timezone.utc).isoformat(),
172
- otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
173
- )
174
- self.reasoning_messages.append(hidden_reasoning_message)
175
- prev_message_type = hidden_reasoning_message.message_type
176
- yield hidden_reasoning_message
177
-
178
- elif isinstance(event, BetaRawContentBlockDeltaEvent):
179
- delta = event.delta
180
-
181
- if isinstance(delta, BetaTextDelta):
182
- # Safety check
183
- if not self.anthropic_mode == EventMode.TEXT:
184
- raise RuntimeError(
185
- f"Streaming integrity failed - received BetaTextDelta object while not in TEXT EventMode: {delta}"
186
- )
187
-
188
- # Combine buffer with current text to handle tags split across chunks
189
- combined_text = self.partial_tag_buffer + delta.text
190
-
191
- # Remove all occurrences of </thinking> tag
192
- cleaned_text = combined_text.replace("</thinking>", "")
193
-
194
- # Extract just the new content (without the buffer part)
195
- if len(self.partial_tag_buffer) <= len(cleaned_text):
196
- delta.text = cleaned_text[len(self.partial_tag_buffer) :]
197
- else:
198
- # Edge case: the tag was removed and now the text is shorter than the buffer
199
- delta.text = ""
200
-
201
- # Store the last 10 characters (or all if less than 10) for the next chunk
202
- # This is enough to catch "</thinking" which is 10 characters
203
- self.partial_tag_buffer = combined_text[-10:] if len(combined_text) > 10 else combined_text
204
- self.accumulated_inner_thoughts.append(delta.text)
205
-
206
- if prev_message_type and prev_message_type != "reasoning_message":
207
- message_index += 1
208
- reasoning_message = ReasoningMessage(
209
- id=self.letta_message_id,
210
- reasoning=self.accumulated_inner_thoughts[-1],
211
- date=datetime.now(timezone.utc).isoformat(),
212
- otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
213
- )
214
- self.reasoning_messages.append(reasoning_message)
215
- prev_message_type = reasoning_message.message_type
216
- yield reasoning_message
217
-
218
- elif isinstance(delta, BetaInputJSONDelta):
219
- if not self.anthropic_mode == EventMode.TOOL_USE:
220
- raise RuntimeError(
221
- f"Streaming integrity failed - received BetaInputJSONDelta object while not in TOOL_USE EventMode: {delta}"
222
- )
223
-
224
- self.accumulated_tool_call_args += delta.partial_json
225
- current_parsed = self.json_parser.parse(self.accumulated_tool_call_args)
226
-
227
- # Start detecting a difference in inner thoughts
228
- previous_inner_thoughts = self.previous_parse.get(INNER_THOUGHTS_KWARG, "")
229
- current_inner_thoughts = current_parsed.get(INNER_THOUGHTS_KWARG, "")
230
- inner_thoughts_diff = current_inner_thoughts[len(previous_inner_thoughts) :]
231
-
232
- if inner_thoughts_diff:
233
- if prev_message_type and prev_message_type != "reasoning_message":
234
- message_index += 1
235
- reasoning_message = ReasoningMessage(
236
- id=self.letta_message_id,
237
- reasoning=inner_thoughts_diff,
238
- date=datetime.now(timezone.utc).isoformat(),
239
- otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
240
- )
241
- self.reasoning_messages.append(reasoning_message)
242
- prev_message_type = reasoning_message.message_type
243
- yield reasoning_message
244
-
245
- # Check if inner thoughts are complete - if so, flush the buffer
246
- if not self.inner_thoughts_complete and self._check_inner_thoughts_complete(self.accumulated_tool_call_args):
247
- self.inner_thoughts_complete = True
248
- # Flush all buffered tool call messages
249
- if len(self.tool_call_buffer) > 0:
250
- if prev_message_type and prev_message_type != "tool_call_message":
251
- message_index += 1
252
-
253
- # Strip out the inner thoughts from the buffered tool call arguments before streaming
254
- tool_call_args = ""
255
- for buffered_msg in self.tool_call_buffer:
256
- tool_call_args += buffered_msg.tool_call.arguments if buffered_msg.tool_call.arguments else ""
257
- tool_call_args = tool_call_args.replace(f'"{INNER_THOUGHTS_KWARG}": "{current_inner_thoughts}"', "")
258
-
259
- tool_call_msg = ToolCallMessage(
260
- id=self.tool_call_buffer[0].id,
261
- otid=Message.generate_otid_from_id(self.tool_call_buffer[0].id, message_index),
262
- date=self.tool_call_buffer[0].date,
263
- name=self.tool_call_buffer[0].name,
264
- sender_id=self.tool_call_buffer[0].sender_id,
265
- step_id=self.tool_call_buffer[0].step_id,
266
- tool_call=ToolCallDelta(
267
- name=self.tool_call_name,
268
- tool_call_id=self.tool_call_id,
269
- arguments=tool_call_args,
270
- ),
271
- )
272
- prev_message_type = tool_call_msg.message_type
273
- yield tool_call_msg
274
- self.tool_call_buffer = []
275
-
276
- # Start detecting special case of "send_message"
277
- if self.tool_call_name == DEFAULT_MESSAGE_TOOL and self.use_assistant_message:
278
- previous_send_message = self.previous_parse.get(DEFAULT_MESSAGE_TOOL_KWARG, "")
279
- current_send_message = current_parsed.get(DEFAULT_MESSAGE_TOOL_KWARG, "")
280
- send_message_diff = current_send_message[len(previous_send_message) :]
281
-
282
- # Only stream out if it's not an empty string
283
- if send_message_diff:
284
- if prev_message_type and prev_message_type != "assistant_message":
285
- message_index += 1
286
- assistant_msg = AssistantMessage(
287
- id=self.letta_message_id,
288
- content=[TextContent(text=send_message_diff)],
289
- date=datetime.now(timezone.utc).isoformat(),
290
- otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
291
- )
292
- prev_message_type = assistant_msg.message_type
293
- yield assistant_msg
294
- else:
295
- # Otherwise, it is a normal tool call - buffer or yield based on inner thoughts status
296
- tool_call_msg = ToolCallMessage(
297
- id=self.letta_message_id,
298
- tool_call=ToolCallDelta(
299
- name=self.tool_call_name, tool_call_id=self.tool_call_id, arguments=delta.partial_json
300
- ),
301
- date=datetime.now(timezone.utc).isoformat(),
302
- )
303
- if self.inner_thoughts_complete:
304
- if prev_message_type and prev_message_type != "tool_call_message":
305
- message_index += 1
306
- tool_call_msg.otid = Message.generate_otid_from_id(self.letta_message_id, message_index)
307
- prev_message_type = tool_call_msg.message_type
308
- yield tool_call_msg
309
- else:
310
- self.tool_call_buffer.append(tool_call_msg)
311
-
312
- # Set previous parse
313
- self.previous_parse = current_parsed
314
- elif isinstance(delta, BetaThinkingDelta):
315
- # Safety check
316
- if not self.anthropic_mode == EventMode.THINKING:
317
- raise RuntimeError(
318
- f"Streaming integrity failed - received BetaThinkingBlock object while not in THINKING EventMode: {delta}"
319
- )
320
-
321
- if prev_message_type and prev_message_type != "reasoning_message":
322
- message_index += 1
323
- reasoning_message = ReasoningMessage(
324
- id=self.letta_message_id,
325
- source="reasoner_model",
326
- reasoning=delta.thinking,
327
- date=datetime.now(timezone.utc).isoformat(),
328
- otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
329
- )
330
- self.reasoning_messages.append(reasoning_message)
331
- prev_message_type = reasoning_message.message_type
332
- yield reasoning_message
333
- elif isinstance(delta, BetaSignatureDelta):
334
- # Safety check
335
- if not self.anthropic_mode == EventMode.THINKING:
336
- raise RuntimeError(
337
- f"Streaming integrity failed - received BetaSignatureDelta object while not in THINKING EventMode: {delta}"
338
- )
339
-
340
- if prev_message_type and prev_message_type != "reasoning_message":
341
- message_index += 1
342
- reasoning_message = ReasoningMessage(
343
- id=self.letta_message_id,
344
- source="reasoner_model",
345
- reasoning="",
346
- date=datetime.now(timezone.utc).isoformat(),
347
- signature=delta.signature,
348
- otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
349
- )
350
- self.reasoning_messages.append(reasoning_message)
351
- prev_message_type = reasoning_message.message_type
352
- yield reasoning_message
353
- elif isinstance(event, BetaRawMessageStartEvent):
354
- self.message_id = event.message.id
355
- self.input_tokens += event.message.usage.input_tokens
356
- self.output_tokens += event.message.usage.output_tokens
357
- self.model = event.message.model
358
- elif isinstance(event, BetaRawMessageDeltaEvent):
359
- self.output_tokens += event.usage.output_tokens
360
- elif isinstance(event, BetaRawMessageStopEvent):
361
- # Don't do anything here! We don't want to stop the stream.
362
- pass
363
- elif isinstance(event, BetaRawContentBlockStopEvent):
364
- # If we're exiting a tool use block and there are still buffered messages,
365
- # we should flush them now
366
- if self.anthropic_mode == EventMode.TOOL_USE and self.tool_call_buffer:
367
- for buffered_msg in self.tool_call_buffer:
368
- yield buffered_msg
369
- self.tool_call_buffer = []
370
-
371
- self.anthropic_mode = None
372
- except asyncio.CancelledError as e:
373
- import traceback
374
-
375
- logger.error("Cancelled stream %s: %s", e, traceback.format_exc())
376
- ttft_span.add_event(
377
- name="stop_reason",
378
- attributes={"stop_reason": StopReasonType.cancelled.value, "error": str(e), "stacktrace": traceback.format_exc()},
379
- )
380
- raise e
381
- except Exception as e:
382
- import traceback
383
-
384
- logger.error("Error processing stream: %s", e, traceback.format_exc())
385
- ttft_span.add_event(
386
- name="stop_reason",
387
- attributes={"stop_reason": StopReasonType.error.value, "error": str(e), "stacktrace": traceback.format_exc()},
388
- )
389
- yield LettaStopReason(stop_reason=StopReasonType.error)
390
- raise e
391
- finally:
392
- logger.info("AnthropicStreamingInterface: Stream processing complete.")
393
-
394
129
  def get_reasoning_content(self) -> list[TextContent | ReasoningContent | RedactedReasoningContent]:
395
130
  def _process_group(
396
131
  group: list[ReasoningMessage | HiddenReasoningMessage], group_type: str
@@ -445,3 +180,294 @@ class AnthropicStreamingInterface:
445
180
  content.text = content.text[:cutoff]
446
181
 
447
182
  return merged
183
+
184
+ async def process(
185
+ self,
186
+ stream: AsyncStream[BetaRawMessageStreamEvent],
187
+ ttft_span: Optional["Span"] = None,
188
+ ) -> AsyncGenerator[LettaMessage | LettaStopReason, None]:
189
+ prev_message_type = None
190
+ message_index = 0
191
+ event = None
192
+ try:
193
+ async with stream:
194
+ async for event in stream:
195
+ try:
196
+ async for message in self._process_event(event, ttft_span, prev_message_type, message_index):
197
+ new_message_type = message.message_type
198
+ if new_message_type != prev_message_type:
199
+ if prev_message_type != None:
200
+ message_index += 1
201
+ prev_message_type = new_message_type
202
+ yield message
203
+ except asyncio.CancelledError as e:
204
+ import traceback
205
+
206
+ logger.info("Cancelled stream attempt but overriding %s: %s", e, traceback.format_exc())
207
+ async for message in self._process_event(event, ttft_span, prev_message_type, message_index):
208
+ new_message_type = message.message_type
209
+ if new_message_type != prev_message_type:
210
+ if prev_message_type != None:
211
+ message_index += 1
212
+ prev_message_type = new_message_type
213
+ yield message
214
+
215
+ # Don't raise the exception here
216
+ continue
217
+
218
+ except Exception as e:
219
+ import traceback
220
+
221
+ logger.error("Error processing stream: %s", e, traceback.format_exc())
222
+ ttft_span.add_event(
223
+ name="stop_reason",
224
+ attributes={"stop_reason": StopReasonType.error.value, "error": str(e), "stacktrace": traceback.format_exc()},
225
+ )
226
+ yield LettaStopReason(stop_reason=StopReasonType.error)
227
+ raise e
228
+ finally:
229
+ logger.info("AnthropicStreamingInterface: Stream processing complete.")
230
+
231
+ async def _process_event(
232
+ self,
233
+ event: BetaRawMessageStreamEvent,
234
+ ttft_span: Optional["Span"] = None,
235
+ prev_message_type: Optional[str] = None,
236
+ message_index: int = 0,
237
+ ) -> AsyncGenerator[LettaMessage | LettaStopReason, None]:
238
+ """Process a single event from the Anthropic stream and yield any resulting messages.
239
+
240
+ Args:
241
+ event: The event to process
242
+
243
+ Yields:
244
+ Messages generated from processing this event
245
+ """
246
+ if isinstance(event, BetaRawContentBlockStartEvent):
247
+ content = event.content_block
248
+
249
+ if isinstance(content, BetaTextBlock):
250
+ self.anthropic_mode = EventMode.TEXT
251
+ # TODO: Can capture citations, etc.
252
+ elif isinstance(content, BetaToolUseBlock):
253
+ self.anthropic_mode = EventMode.TOOL_USE
254
+ self.tool_call_id = content.id
255
+ self.tool_call_name = content.name
256
+ self.inner_thoughts_complete = False
257
+
258
+ if not self.use_assistant_message:
259
+ # Buffer the initial tool call message instead of yielding immediately
260
+ tool_call_msg = ToolCallMessage(
261
+ id=self.letta_message_id,
262
+ tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id),
263
+ date=datetime.now(timezone.utc).isoformat(),
264
+ )
265
+ self.tool_call_buffer.append(tool_call_msg)
266
+ elif isinstance(content, BetaThinkingBlock):
267
+ self.anthropic_mode = EventMode.THINKING
268
+ # TODO: Can capture signature, etc.
269
+ elif isinstance(content, BetaRedactedThinkingBlock):
270
+ self.anthropic_mode = EventMode.REDACTED_THINKING
271
+ if prev_message_type and prev_message_type != "hidden_reasoning_message":
272
+ message_index += 1
273
+ hidden_reasoning_message = HiddenReasoningMessage(
274
+ id=self.letta_message_id,
275
+ state="redacted",
276
+ hidden_reasoning=content.data,
277
+ date=datetime.now(timezone.utc).isoformat(),
278
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
279
+ )
280
+ self.reasoning_messages.append(hidden_reasoning_message)
281
+ prev_message_type = hidden_reasoning_message.message_type
282
+ yield hidden_reasoning_message
283
+
284
+ elif isinstance(event, BetaRawContentBlockDeltaEvent):
285
+ delta = event.delta
286
+
287
+ if isinstance(delta, BetaTextDelta):
288
+ # Safety check
289
+ if not self.anthropic_mode == EventMode.TEXT:
290
+ raise RuntimeError(f"Streaming integrity failed - received BetaTextDelta object while not in TEXT EventMode: {delta}")
291
+
292
+ # Combine buffer with current text to handle tags split across chunks
293
+ combined_text = self.partial_tag_buffer + delta.text
294
+
295
+ # Remove all occurrences of </thinking> tag
296
+ cleaned_text = combined_text.replace("</thinking>", "")
297
+
298
+ # Extract just the new content (without the buffer part)
299
+ if len(self.partial_tag_buffer) <= len(cleaned_text):
300
+ delta.text = cleaned_text[len(self.partial_tag_buffer) :]
301
+ else:
302
+ # Edge case: the tag was removed and now the text is shorter than the buffer
303
+ delta.text = ""
304
+
305
+ # Store the last 10 characters (or all if less than 10) for the next chunk
306
+ # This is enough to catch "</thinking" which is 10 characters
307
+ self.partial_tag_buffer = combined_text[-10:] if len(combined_text) > 10 else combined_text
308
+ self.accumulated_inner_thoughts.append(delta.text)
309
+
310
+ if prev_message_type and prev_message_type != "reasoning_message":
311
+ message_index += 1
312
+ reasoning_message = ReasoningMessage(
313
+ id=self.letta_message_id,
314
+ reasoning=self.accumulated_inner_thoughts[-1],
315
+ date=datetime.now(timezone.utc).isoformat(),
316
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
317
+ )
318
+ self.reasoning_messages.append(reasoning_message)
319
+ prev_message_type = reasoning_message.message_type
320
+ yield reasoning_message
321
+
322
+ elif isinstance(delta, BetaInputJSONDelta):
323
+ if not self.anthropic_mode == EventMode.TOOL_USE:
324
+ raise RuntimeError(
325
+ f"Streaming integrity failed - received BetaInputJSONDelta object while not in TOOL_USE EventMode: {delta}"
326
+ )
327
+
328
+ self.accumulated_tool_call_args += delta.partial_json
329
+ current_parsed = self.json_parser.parse(self.accumulated_tool_call_args)
330
+
331
+ # Start detecting a difference in inner thoughts
332
+ previous_inner_thoughts = self.previous_parse.get(INNER_THOUGHTS_KWARG, "")
333
+ current_inner_thoughts = current_parsed.get(INNER_THOUGHTS_KWARG, "")
334
+ inner_thoughts_diff = current_inner_thoughts[len(previous_inner_thoughts) :]
335
+
336
+ if inner_thoughts_diff:
337
+ if prev_message_type and prev_message_type != "reasoning_message":
338
+ message_index += 1
339
+ reasoning_message = ReasoningMessage(
340
+ id=self.letta_message_id,
341
+ reasoning=inner_thoughts_diff,
342
+ date=datetime.now(timezone.utc).isoformat(),
343
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
344
+ )
345
+ self.reasoning_messages.append(reasoning_message)
346
+ prev_message_type = reasoning_message.message_type
347
+ yield reasoning_message
348
+
349
+ # Check if inner thoughts are complete - if so, flush the buffer
350
+ if not self.inner_thoughts_complete and self._check_inner_thoughts_complete(self.accumulated_tool_call_args):
351
+ self.inner_thoughts_complete = True
352
+ # Flush all buffered tool call messages
353
+ if len(self.tool_call_buffer) > 0:
354
+ if prev_message_type and prev_message_type != "tool_call_message":
355
+ message_index += 1
356
+
357
+ # Strip out the inner thoughts from the buffered tool call arguments before streaming
358
+ tool_call_args = ""
359
+ for buffered_msg in self.tool_call_buffer:
360
+ tool_call_args += buffered_msg.tool_call.arguments if buffered_msg.tool_call.arguments else ""
361
+ tool_call_args = tool_call_args.replace(f'"{INNER_THOUGHTS_KWARG}": "{current_inner_thoughts}"', "")
362
+
363
+ tool_call_msg = ToolCallMessage(
364
+ id=self.tool_call_buffer[0].id,
365
+ otid=Message.generate_otid_from_id(self.tool_call_buffer[0].id, message_index),
366
+ date=self.tool_call_buffer[0].date,
367
+ name=self.tool_call_buffer[0].name,
368
+ sender_id=self.tool_call_buffer[0].sender_id,
369
+ step_id=self.tool_call_buffer[0].step_id,
370
+ tool_call=ToolCallDelta(
371
+ name=self.tool_call_name,
372
+ tool_call_id=self.tool_call_id,
373
+ arguments=tool_call_args,
374
+ ),
375
+ )
376
+ prev_message_type = tool_call_msg.message_type
377
+ yield tool_call_msg
378
+ self.tool_call_buffer = []
379
+
380
+ # Start detecting special case of "send_message"
381
+ if self.tool_call_name == DEFAULT_MESSAGE_TOOL and self.use_assistant_message:
382
+ previous_send_message = self.previous_parse.get(DEFAULT_MESSAGE_TOOL_KWARG, "")
383
+ current_send_message = current_parsed.get(DEFAULT_MESSAGE_TOOL_KWARG, "")
384
+ send_message_diff = current_send_message[len(previous_send_message) :]
385
+
386
+ # Only stream out if it's not an empty string
387
+ if send_message_diff:
388
+ if prev_message_type and prev_message_type != "assistant_message":
389
+ message_index += 1
390
+ assistant_msg = AssistantMessage(
391
+ id=self.letta_message_id,
392
+ content=[TextContent(text=send_message_diff)],
393
+ date=datetime.now(timezone.utc).isoformat(),
394
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
395
+ )
396
+ prev_message_type = assistant_msg.message_type
397
+ yield assistant_msg
398
+ else:
399
+ # Otherwise, it is a normal tool call - buffer or yield based on inner thoughts status
400
+ tool_call_msg = ToolCallMessage(
401
+ id=self.letta_message_id,
402
+ tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id, arguments=delta.partial_json),
403
+ date=datetime.now(timezone.utc).isoformat(),
404
+ )
405
+ if self.inner_thoughts_complete:
406
+ if prev_message_type and prev_message_type != "tool_call_message":
407
+ message_index += 1
408
+ tool_call_msg.otid = Message.generate_otid_from_id(self.letta_message_id, message_index)
409
+ prev_message_type = tool_call_msg.message_type
410
+ yield tool_call_msg
411
+ else:
412
+ self.tool_call_buffer.append(tool_call_msg)
413
+
414
+ # Set previous parse
415
+ self.previous_parse = current_parsed
416
+ elif isinstance(delta, BetaThinkingDelta):
417
+ # Safety check
418
+ if not self.anthropic_mode == EventMode.THINKING:
419
+ raise RuntimeError(
420
+ f"Streaming integrity failed - received BetaThinkingBlock object while not in THINKING EventMode: {delta}"
421
+ )
422
+
423
+ if prev_message_type and prev_message_type != "reasoning_message":
424
+ message_index += 1
425
+ reasoning_message = ReasoningMessage(
426
+ id=self.letta_message_id,
427
+ source="reasoner_model",
428
+ reasoning=delta.thinking,
429
+ date=datetime.now(timezone.utc).isoformat(),
430
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
431
+ )
432
+ self.reasoning_messages.append(reasoning_message)
433
+ prev_message_type = reasoning_message.message_type
434
+ yield reasoning_message
435
+ elif isinstance(delta, BetaSignatureDelta):
436
+ # Safety check
437
+ if not self.anthropic_mode == EventMode.THINKING:
438
+ raise RuntimeError(
439
+ f"Streaming integrity failed - received BetaSignatureDelta object while not in THINKING EventMode: {delta}"
440
+ )
441
+
442
+ if prev_message_type and prev_message_type != "reasoning_message":
443
+ message_index += 1
444
+ reasoning_message = ReasoningMessage(
445
+ id=self.letta_message_id,
446
+ source="reasoner_model",
447
+ reasoning="",
448
+ date=datetime.now(timezone.utc).isoformat(),
449
+ signature=delta.signature,
450
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
451
+ )
452
+ self.reasoning_messages.append(reasoning_message)
453
+ prev_message_type = reasoning_message.message_type
454
+ yield reasoning_message
455
+ elif isinstance(event, BetaRawMessageStartEvent):
456
+ self.message_id = event.message.id
457
+ self.input_tokens += event.message.usage.input_tokens
458
+ self.output_tokens += event.message.usage.output_tokens
459
+ self.model = event.message.model
460
+ elif isinstance(event, BetaRawMessageDeltaEvent):
461
+ self.output_tokens += event.usage.output_tokens
462
+ elif isinstance(event, BetaRawMessageStopEvent):
463
+ # Don't do anything here! We don't want to stop the stream.
464
+ pass
465
+ elif isinstance(event, BetaRawContentBlockStopEvent):
466
+ # If we're exiting a tool use block and there are still buffered messages,
467
+ # we should flush them now
468
+ if self.anthropic_mode == EventMode.TOOL_USE and self.tool_call_buffer:
469
+ for buffered_msg in self.tool_call_buffer:
470
+ yield buffered_msg
471
+ self.tool_call_buffer = []
472
+
473
+ self.anthropic_mode = None