letta-nightly 0.12.0.dev20251009104148__py3-none-any.whl → 0.12.1.dev20251009224219__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
letta/__init__.py CHANGED
@@ -5,7 +5,7 @@ try:
5
5
  __version__ = version("letta")
6
6
  except PackageNotFoundError:
7
7
  # Fallback for development installations
8
- __version__ = "0.12.0"
8
+ __version__ = "0.12.1"
9
9
 
10
10
  if os.environ.get("LETTA_VERSION"):
11
11
  __version__ = os.environ["LETTA_VERSION"]
@@ -2,7 +2,7 @@ from typing import AsyncGenerator, List
2
2
 
3
3
  from letta.adapters.letta_llm_stream_adapter import LettaLLMStreamAdapter
4
4
  from letta.helpers.datetime_helpers import get_utc_timestamp_ns
5
- from letta.interfaces.anthropic_streaming_interface import SimpleAnthropicStreamingInterface
5
+ from letta.interfaces.anthropic_parallel_tool_call_streaming_interface import SimpleAnthropicStreamingInterface
6
6
  from letta.interfaces.gemini_streaming_interface import SimpleGeminiStreamingInterface
7
7
  from letta.interfaces.openai_streaming_interface import SimpleOpenAIResponsesStreamingInterface, SimpleOpenAIStreamingInterface
8
8
  from letta.schemas.enums import ProviderType
@@ -99,16 +99,16 @@ class LettaAgentV2(BaseAgentV2):
99
99
  self.step_manager = StepManager()
100
100
  self.telemetry_manager = TelemetryManager()
101
101
 
102
- # TODO: Expand to more
103
- if summarizer_settings.enable_summarization and model_settings.openai_api_key:
104
- self.summarization_agent = EphemeralSummaryAgent(
105
- target_block_label="conversation_summary",
106
- agent_id=self.agent_state.id,
107
- block_manager=self.block_manager,
108
- message_manager=self.message_manager,
109
- agent_manager=self.agent_manager,
110
- actor=self.actor,
111
- )
102
+ ## TODO: Expand to more
103
+ # if summarizer_settings.enable_summarization and model_settings.openai_api_key:
104
+ # self.summarization_agent = EphemeralSummaryAgent(
105
+ # target_block_label="conversation_summary",
106
+ # agent_id=self.agent_state.id,
107
+ # block_manager=self.block_manager,
108
+ # message_manager=self.message_manager,
109
+ # agent_manager=self.agent_manager,
110
+ # actor=self.actor,
111
+ # )
112
112
 
113
113
  # Initialize summarizer for context window management
114
114
  self.summarizer = Summarizer(
@@ -117,7 +117,7 @@ class LettaAgentV2(BaseAgentV2):
117
117
  if self.agent_state.agent_type == AgentType.voice_convo_agent
118
118
  else summarizer_settings.mode
119
119
  ),
120
- summarizer_agent=self.summarization_agent,
120
+ summarizer_agent=None, # self.summarization_agent,
121
121
  message_buffer_limit=summarizer_settings.message_buffer_limit,
122
122
  message_buffer_min=summarizer_settings.message_buffer_min,
123
123
  partial_evict_summarizer_percentage=summarizer_settings.partial_evict_summarizer_percentage,
@@ -0,0 +1,487 @@
1
+ import asyncio
2
+ import json
3
+ from collections.abc import AsyncGenerator
4
+ from datetime import datetime, timezone
5
+ from enum import Enum
6
+ from typing import Optional
7
+
8
+ from anthropic import AsyncStream
9
+ from anthropic.types.beta import (
10
+ BetaInputJSONDelta,
11
+ BetaRawContentBlockDeltaEvent,
12
+ BetaRawContentBlockStartEvent,
13
+ BetaRawContentBlockStopEvent,
14
+ BetaRawMessageDeltaEvent,
15
+ BetaRawMessageStartEvent,
16
+ BetaRawMessageStopEvent,
17
+ BetaRawMessageStreamEvent,
18
+ BetaRedactedThinkingBlock,
19
+ BetaSignatureDelta,
20
+ BetaTextBlock,
21
+ BetaTextDelta,
22
+ BetaThinkingBlock,
23
+ BetaThinkingDelta,
24
+ BetaToolUseBlock,
25
+ )
26
+
27
+ from letta.log import get_logger
28
+ from letta.schemas.letta_message import (
29
+ ApprovalRequestMessage,
30
+ AssistantMessage,
31
+ HiddenReasoningMessage,
32
+ LettaMessage,
33
+ ReasoningMessage,
34
+ ToolCallDelta,
35
+ ToolCallMessage,
36
+ )
37
+ from letta.schemas.letta_message_content import ReasoningContent, RedactedReasoningContent, TextContent
38
+ from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
39
+ from letta.schemas.message import Message
40
+ from letta.schemas.openai.chat_completion_response import FunctionCall, ToolCall
41
+ from letta.server.rest_api.json_parser import JSONParser, PydanticJSONParser
42
+
43
+ logger = get_logger(__name__)
44
+
45
+
46
+ # TODO: These modes aren't used right now - but can be useful we do multiple sequential tool calling within one Claude message
47
+ class EventMode(Enum):
48
+ TEXT = "TEXT"
49
+ TOOL_USE = "TOOL_USE"
50
+ THINKING = "THINKING"
51
+ REDACTED_THINKING = "REDACTED_THINKING"
52
+
53
+
54
+ # TODO: There's a duplicate version of this in anthropic_streaming_interface
55
+ class SimpleAnthropicStreamingInterface:
56
+ """
57
+ A simpler version of AnthropicStreamingInterface focused on streaming assistant text and
58
+ tool call deltas. Updated to support parallel tool calling by collecting completed
59
+ ToolUse blocks (from content_block stop events) and exposing all finalized tool calls
60
+ via get_tool_call_objects().
61
+
62
+ Notes:
63
+ - We keep emitting the stream (text and tool-call deltas) as before for latency.
64
+ - We no longer rely on accumulating partial JSON to build the final tool call; instead
65
+ we read the finalized ToolUse input from the stop event and store it.
66
+ - Multiple tool calls within a single message (parallel tool use) are collected and
67
+ can be returned to the agent as a list.
68
+ """
69
+
70
+ def __init__(
71
+ self,
72
+ requires_approval_tools: list = [],
73
+ run_id: str | None = None,
74
+ step_id: str | None = None,
75
+ ):
76
+ self.json_parser: JSONParser = PydanticJSONParser()
77
+ self.run_id = run_id
78
+ self.step_id = step_id
79
+
80
+ # Premake IDs for database writes
81
+ self.letta_message_id = Message.generate_id()
82
+
83
+ self.anthropic_mode = None
84
+ self.message_id = None
85
+ self.accumulated_inner_thoughts = []
86
+ self.tool_call_id = None
87
+ self.tool_call_name = None
88
+ self.accumulated_tool_call_args = ""
89
+ self.previous_parse = {}
90
+
91
+ # usage trackers
92
+ self.input_tokens = 0
93
+ self.output_tokens = 0
94
+ self.model = None
95
+
96
+ # reasoning object trackers
97
+ self.reasoning_messages = []
98
+
99
+ # assistant object trackers
100
+ self.assistant_messages: list[AssistantMessage] = []
101
+
102
+ # Buffer to hold tool call messages until inner thoughts are complete
103
+ self.tool_call_buffer = []
104
+ self.inner_thoughts_complete = False
105
+
106
+ # Buffer to handle partial XML tags across chunks
107
+ self.partial_tag_buffer = ""
108
+
109
+ self.requires_approval_tools = requires_approval_tools
110
+ # Collected finalized tool calls (supports parallel tool use)
111
+ self.collected_tool_calls: list[ToolCall] = []
112
+ # Track active tool_use blocks by stream index for parallel tool calling
113
+ # { index: {"id": str, "name": str, "args": str} }
114
+ self.active_tool_uses: dict[int, dict[str, str]] = {}
115
+ # Maintain start order and indexed collection for stable ordering
116
+ self._tool_use_start_order: list[int] = []
117
+ self._collected_indexed: list[tuple[int, ToolCall]] = []
118
+
119
+ def get_tool_call_objects(self) -> list[ToolCall]:
120
+ """Return all finalized tool calls collected during this message (parallel supported)."""
121
+ # Prefer indexed ordering if available
122
+ if self._collected_indexed:
123
+ return [
124
+ call
125
+ for _, call in sorted(
126
+ self._collected_indexed,
127
+ key=lambda x: self._tool_use_start_order.index(x[0]) if x[0] in self._tool_use_start_order else x[0],
128
+ )
129
+ ]
130
+ return self.collected_tool_calls
131
+
132
+ # This exists for legacy compatibility
133
+ def get_tool_call_object(self) -> Optional[ToolCall]:
134
+ tool_calls = self.get_tool_call_objects()
135
+ if tool_calls:
136
+ return tool_calls[0]
137
+ return None
138
+
139
+ def get_reasoning_content(self) -> list[TextContent | ReasoningContent | RedactedReasoningContent]:
140
+ def _process_group(
141
+ group: list[ReasoningMessage | HiddenReasoningMessage | AssistantMessage],
142
+ group_type: str,
143
+ ) -> TextContent | ReasoningContent | RedactedReasoningContent:
144
+ if group_type == "reasoning":
145
+ reasoning_text = "".join(chunk.reasoning for chunk in group).strip()
146
+ is_native = any(chunk.source == "reasoner_model" for chunk in group)
147
+ signature = next((chunk.signature for chunk in group if chunk.signature is not None), None)
148
+ if is_native:
149
+ return ReasoningContent(is_native=is_native, reasoning=reasoning_text, signature=signature)
150
+ else:
151
+ return TextContent(text=reasoning_text)
152
+ elif group_type == "redacted":
153
+ redacted_text = "".join(chunk.hidden_reasoning for chunk in group if chunk.hidden_reasoning is not None)
154
+ return RedactedReasoningContent(data=redacted_text)
155
+ elif group_type == "text":
156
+ concat = ""
157
+ for chunk in group:
158
+ if isinstance(chunk.content, list):
159
+ concat += "".join([c.text for c in chunk.content])
160
+ else:
161
+ concat += chunk.content
162
+ return TextContent(text=concat)
163
+ else:
164
+ raise ValueError("Unexpected group type")
165
+
166
+ merged = []
167
+ current_group = []
168
+ current_group_type = None # "reasoning" or "redacted"
169
+
170
+ for msg in self.reasoning_messages:
171
+ # Determine the type of the current message
172
+ if isinstance(msg, HiddenReasoningMessage):
173
+ msg_type = "redacted"
174
+ elif isinstance(msg, ReasoningMessage):
175
+ msg_type = "reasoning"
176
+ elif isinstance(msg, AssistantMessage):
177
+ msg_type = "text"
178
+ else:
179
+ raise ValueError("Unexpected message type")
180
+
181
+ # Initialize group type if not set
182
+ if current_group_type is None:
183
+ current_group_type = msg_type
184
+
185
+ # If the type changes, process the current group
186
+ if msg_type != current_group_type:
187
+ merged.append(_process_group(current_group, current_group_type))
188
+ current_group = []
189
+ current_group_type = msg_type
190
+
191
+ current_group.append(msg)
192
+
193
+ # Process the final group, if any.
194
+ if current_group:
195
+ merged.append(_process_group(current_group, current_group_type))
196
+
197
+ return merged
198
+
199
+ def get_content(self) -> list[TextContent | ReasoningContent | RedactedReasoningContent]:
200
+ return self.get_reasoning_content()
201
+
202
+ async def process(
203
+ self,
204
+ stream: AsyncStream[BetaRawMessageStreamEvent],
205
+ ttft_span: Optional["Span"] = None,
206
+ ) -> AsyncGenerator[LettaMessage | LettaStopReason, None]:
207
+ prev_message_type = None
208
+ message_index = 0
209
+ event = None
210
+ try:
211
+ async with stream:
212
+ async for event in stream:
213
+ try:
214
+ async for message in self._process_event(event, ttft_span, prev_message_type, message_index):
215
+ new_message_type = message.message_type
216
+ if new_message_type != prev_message_type:
217
+ if prev_message_type != None:
218
+ message_index += 1
219
+ prev_message_type = new_message_type
220
+ # print(f"Yielding message: {message}")
221
+ yield message
222
+ except asyncio.CancelledError as e:
223
+ import traceback
224
+
225
+ logger.info("Cancelled stream attempt but overriding %s: %s", e, traceback.format_exc())
226
+ async for message in self._process_event(event, ttft_span, prev_message_type, message_index):
227
+ new_message_type = message.message_type
228
+ if new_message_type != prev_message_type:
229
+ if prev_message_type != None:
230
+ message_index += 1
231
+ prev_message_type = new_message_type
232
+ yield message
233
+
234
+ # Don't raise the exception here
235
+ continue
236
+
237
+ except Exception as e:
238
+ import traceback
239
+
240
+ logger.error("Error processing stream: %s\n%s", e, traceback.format_exc())
241
+ if ttft_span:
242
+ ttft_span.add_event(
243
+ name="stop_reason",
244
+ attributes={"stop_reason": StopReasonType.error.value, "error": str(e), "stacktrace": traceback.format_exc()},
245
+ )
246
+ yield LettaStopReason(stop_reason=StopReasonType.error)
247
+ raise e
248
+ finally:
249
+ logger.info("AnthropicStreamingInterface: Stream processing complete.")
250
+
251
+ async def _process_event(
252
+ self,
253
+ event: BetaRawMessageStreamEvent,
254
+ ttft_span: Optional["Span"] = None,
255
+ prev_message_type: Optional[str] = None,
256
+ message_index: int = 0,
257
+ ) -> AsyncGenerator[LettaMessage | LettaStopReason, None]:
258
+ """Process a single event from the Anthropic stream and yield any resulting messages.
259
+
260
+ Args:
261
+ event: The event to process
262
+
263
+ Yields:
264
+ Messages generated from processing this event
265
+ """
266
+ if isinstance(event, BetaRawContentBlockStartEvent):
267
+ content = event.content_block
268
+
269
+ if isinstance(content, BetaTextBlock):
270
+ self.anthropic_mode = EventMode.TEXT
271
+ # TODO: Can capture citations, etc.
272
+
273
+ elif isinstance(content, BetaToolUseBlock):
274
+ # New tool_use block started at this index
275
+ self.anthropic_mode = EventMode.TOOL_USE
276
+ self.active_tool_uses[event.index] = {"id": content.id, "name": content.name, "args": ""}
277
+ if event.index not in self._tool_use_start_order:
278
+ self._tool_use_start_order.append(event.index)
279
+
280
+ # Emit an initial tool call delta for this new block
281
+ name = content.name
282
+ call_id = content.id
283
+ # Initialize arguments from the start event's input (often {}) to avoid undefined in UIs
284
+ if name in self.requires_approval_tools:
285
+ if prev_message_type and prev_message_type != "approval_request_message":
286
+ message_index += 1
287
+ tool_call_msg = ApprovalRequestMessage(
288
+ id=self.letta_message_id,
289
+ # Do not emit placeholder arguments here to avoid UI duplicates
290
+ tool_call=ToolCallDelta(name=name, tool_call_id=call_id),
291
+ date=datetime.now(timezone.utc).isoformat(),
292
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
293
+ run_id=self.run_id,
294
+ step_id=self.step_id,
295
+ )
296
+ else:
297
+ if prev_message_type and prev_message_type != "tool_call_message":
298
+ message_index += 1
299
+ tool_call_msg = ToolCallMessage(
300
+ id=self.letta_message_id,
301
+ # Do not emit placeholder arguments here to avoid UI duplicates
302
+ tool_call=ToolCallDelta(name=name, tool_call_id=call_id),
303
+ date=datetime.now(timezone.utc).isoformat(),
304
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
305
+ run_id=self.run_id,
306
+ step_id=self.step_id,
307
+ )
308
+ prev_message_type = tool_call_msg.message_type
309
+ yield tool_call_msg
310
+
311
+ elif isinstance(content, BetaThinkingBlock):
312
+ self.anthropic_mode = EventMode.THINKING
313
+ # TODO: Can capture signature, etc.
314
+
315
+ elif isinstance(content, BetaRedactedThinkingBlock):
316
+ self.anthropic_mode = EventMode.REDACTED_THINKING
317
+
318
+ if prev_message_type and prev_message_type != "hidden_reasoning_message":
319
+ message_index += 1
320
+
321
+ hidden_reasoning_message = HiddenReasoningMessage(
322
+ id=self.letta_message_id,
323
+ state="redacted",
324
+ hidden_reasoning=content.data,
325
+ date=datetime.now(timezone.utc).isoformat(),
326
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
327
+ run_id=self.run_id,
328
+ step_id=self.step_id,
329
+ )
330
+
331
+ self.reasoning_messages.append(hidden_reasoning_message)
332
+ prev_message_type = hidden_reasoning_message.message_type
333
+ yield hidden_reasoning_message
334
+
335
+ elif isinstance(event, BetaRawContentBlockDeltaEvent):
336
+ delta = event.delta
337
+
338
+ if isinstance(delta, BetaTextDelta):
339
+ # Safety check
340
+ if not self.anthropic_mode == EventMode.TEXT:
341
+ raise RuntimeError(f"Streaming integrity failed - received BetaTextDelta object while not in TEXT EventMode: {delta}")
342
+
343
+ if prev_message_type and prev_message_type != "assistant_message":
344
+ message_index += 1
345
+
346
+ assistant_msg = AssistantMessage(
347
+ id=self.letta_message_id,
348
+ # content=[TextContent(text=delta.text)],
349
+ content=delta.text,
350
+ date=datetime.now(timezone.utc).isoformat(),
351
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
352
+ run_id=self.run_id,
353
+ step_id=self.step_id,
354
+ )
355
+ # self.assistant_messages.append(assistant_msg)
356
+ self.reasoning_messages.append(assistant_msg)
357
+ prev_message_type = assistant_msg.message_type
358
+ yield assistant_msg
359
+
360
+ elif isinstance(delta, BetaInputJSONDelta):
361
+ # Append partial JSON for the specific tool_use block at this index
362
+ if not self.anthropic_mode == EventMode.TOOL_USE:
363
+ raise RuntimeError(
364
+ f"Streaming integrity failed - received BetaInputJSONDelta object while not in TOOL_USE EventMode: {delta}"
365
+ )
366
+
367
+ ctx = self.active_tool_uses.get(event.index)
368
+ if ctx is None:
369
+ # Defensive: initialize if missing
370
+ self.active_tool_uses[event.index] = {"id": self.tool_call_id or "", "name": self.tool_call_name or "", "args": ""}
371
+ ctx = self.active_tool_uses[event.index]
372
+
373
+ # Append only non-empty partials
374
+ if delta.partial_json:
375
+ ctx["args"] += delta.partial_json
376
+ else:
377
+ # Skip streaming a no-op delta to prevent duplicate placeholders in UI
378
+ return
379
+
380
+ name = ctx.get("name")
381
+ call_id = ctx.get("id")
382
+
383
+ if name in self.requires_approval_tools:
384
+ if prev_message_type and prev_message_type != "approval_request_message":
385
+ message_index += 1
386
+ tool_call_msg = ApprovalRequestMessage(
387
+ id=self.letta_message_id,
388
+ tool_call=ToolCallDelta(name=name, tool_call_id=call_id, arguments=delta.partial_json),
389
+ date=datetime.now(timezone.utc).isoformat(),
390
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
391
+ run_id=self.run_id,
392
+ step_id=self.step_id,
393
+ )
394
+ else:
395
+ if prev_message_type and prev_message_type != "tool_call_message":
396
+ message_index += 1
397
+ tool_call_msg = ToolCallMessage(
398
+ id=self.letta_message_id,
399
+ tool_call=ToolCallDelta(name=name, tool_call_id=call_id, arguments=delta.partial_json),
400
+ date=datetime.now(timezone.utc).isoformat(),
401
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
402
+ run_id=self.run_id,
403
+ step_id=self.step_id,
404
+ )
405
+
406
+ yield tool_call_msg
407
+
408
+ elif isinstance(delta, BetaThinkingDelta):
409
+ # Safety check
410
+ if not self.anthropic_mode == EventMode.THINKING:
411
+ raise RuntimeError(
412
+ f"Streaming integrity failed - received BetaThinkingBlock object while not in THINKING EventMode: {delta}"
413
+ )
414
+
415
+ if prev_message_type and prev_message_type != "reasoning_message":
416
+ message_index += 1
417
+ reasoning_message = ReasoningMessage(
418
+ id=self.letta_message_id,
419
+ source="reasoner_model",
420
+ reasoning=delta.thinking,
421
+ date=datetime.now(timezone.utc).isoformat(),
422
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
423
+ run_id=self.run_id,
424
+ step_id=self.step_id,
425
+ )
426
+ self.reasoning_messages.append(reasoning_message)
427
+ prev_message_type = reasoning_message.message_type
428
+ yield reasoning_message
429
+
430
+ elif isinstance(delta, BetaSignatureDelta):
431
+ # Safety check
432
+ if not self.anthropic_mode == EventMode.THINKING:
433
+ raise RuntimeError(
434
+ f"Streaming integrity failed - received BetaSignatureDelta object while not in THINKING EventMode: {delta}"
435
+ )
436
+
437
+ if prev_message_type and prev_message_type != "reasoning_message":
438
+ message_index += 1
439
+ reasoning_message = ReasoningMessage(
440
+ id=self.letta_message_id,
441
+ source="reasoner_model",
442
+ reasoning="",
443
+ date=datetime.now(timezone.utc).isoformat(),
444
+ signature=delta.signature,
445
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
446
+ run_id=self.run_id,
447
+ step_id=self.step_id,
448
+ )
449
+ self.reasoning_messages.append(reasoning_message)
450
+ prev_message_type = reasoning_message.message_type
451
+ yield reasoning_message
452
+
453
+ elif isinstance(event, BetaRawMessageStartEvent):
454
+ self.message_id = event.message.id
455
+ self.input_tokens += event.message.usage.input_tokens
456
+ self.output_tokens += event.message.usage.output_tokens
457
+ self.model = event.message.model
458
+
459
+ elif isinstance(event, BetaRawMessageDeltaEvent):
460
+ self.output_tokens += event.usage.output_tokens
461
+
462
+ elif isinstance(event, BetaRawMessageStopEvent):
463
+ # Don't do anything here! We don't want to stop the stream.
464
+ pass
465
+
466
+ elif isinstance(event, BetaRawContentBlockStopEvent):
467
+ # Finalize the tool_use block at this index using accumulated deltas
468
+ ctx = self.active_tool_uses.pop(event.index, None)
469
+ if ctx is not None and ctx.get("id") and ctx.get("name") is not None:
470
+ raw_args = ctx.get("args", "")
471
+ try:
472
+ # Prefer strict JSON load, fallback to permissive parser
473
+ tool_input = json.loads(raw_args) if raw_args else {}
474
+ except json.JSONDecodeError:
475
+ try:
476
+ tool_input = self.json_parser.parse(raw_args) if raw_args else {}
477
+ except Exception:
478
+ tool_input = {}
479
+
480
+ arguments = json.dumps(tool_input)
481
+ finalized = ToolCall(id=ctx["id"], function=FunctionCall(arguments=arguments, name=ctx["name"]))
482
+ # Keep both raw list and indexed list for compatibility
483
+ self.collected_tool_calls.append(finalized)
484
+ self._collected_indexed.append((event.index, finalized))
485
+
486
+ # Reset mode when a content block ends
487
+ self.anthropic_mode = None
@@ -279,9 +279,11 @@ class AnthropicStreamingInterface:
279
279
  if prev_message_type and prev_message_type != "tool_call_message":
280
280
  message_index += 1
281
281
  if self.tool_call_name not in self.requires_approval_tools:
282
+ tool_call_delta = ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id)
282
283
  tool_call_msg = ToolCallMessage(
283
284
  id=self.letta_message_id,
284
- tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id),
285
+ tool_call=tool_call_delta,
286
+ tool_calls=tool_call_delta,
285
287
  date=datetime.now(timezone.utc).isoformat(),
286
288
  otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
287
289
  run_id=self.run_id,
@@ -423,15 +425,17 @@ class AnthropicStreamingInterface:
423
425
  tool_call_args += buffered_msg.tool_call.arguments if buffered_msg.tool_call.arguments else ""
424
426
  tool_call_args = tool_call_args.replace(f'"{INNER_THOUGHTS_KWARG}": "{current_inner_thoughts}"', "")
425
427
 
428
+ tool_call_delta = ToolCallDelta(
429
+ name=self.tool_call_name,
430
+ tool_call_id=self.tool_call_id,
431
+ arguments=tool_call_args,
432
+ )
426
433
  tool_call_msg = ToolCallMessage(
427
434
  id=self.tool_call_buffer[0].id,
428
435
  otid=Message.generate_otid_from_id(self.tool_call_buffer[0].id, message_index),
429
436
  date=self.tool_call_buffer[0].date,
430
- tool_call=ToolCallDelta(
431
- name=self.tool_call_name,
432
- tool_call_id=self.tool_call_id,
433
- arguments=tool_call_args,
434
- ),
437
+ tool_call=tool_call_delta,
438
+ tool_calls=tool_call_delta,
435
439
  run_id=self.run_id,
436
440
  )
437
441
  prev_message_type = tool_call_msg.message_type
@@ -467,9 +471,13 @@ class AnthropicStreamingInterface:
467
471
  run_id=self.run_id,
468
472
  )
469
473
  else:
474
+ tool_call_delta = ToolCallDelta(
475
+ name=self.tool_call_name, tool_call_id=self.tool_call_id, arguments=delta.partial_json
476
+ )
470
477
  tool_call_msg = ToolCallMessage(
471
478
  id=self.letta_message_id,
472
- tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id, arguments=delta.partial_json),
479
+ tool_call=tool_call_delta,
480
+ tool_calls=tool_call_delta,
473
481
  date=datetime.now(timezone.utc).isoformat(),
474
482
  run_id=self.run_id,
475
483
  )
@@ -778,9 +786,11 @@ class SimpleAnthropicStreamingInterface:
778
786
  else:
779
787
  if prev_message_type and prev_message_type != "tool_call_message":
780
788
  message_index += 1
789
+ tool_call_delta = ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id)
781
790
  tool_call_msg = ToolCallMessage(
782
791
  id=self.letta_message_id,
783
- tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id),
792
+ tool_call=tool_call_delta,
793
+ tool_calls=tool_call_delta,
784
794
  date=datetime.now(timezone.utc).isoformat(),
785
795
  otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
786
796
  run_id=self.run_id,
@@ -860,9 +870,11 @@ class SimpleAnthropicStreamingInterface:
860
870
  else:
861
871
  if prev_message_type and prev_message_type != "tool_call_message":
862
872
  message_index += 1
873
+ tool_call_delta = ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id, arguments=delta.partial_json)
863
874
  tool_call_msg = ToolCallMessage(
864
875
  id=self.letta_message_id,
865
- tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id, arguments=delta.partial_json),
876
+ tool_call=tool_call_delta,
877
+ tool_calls=tool_call_delta,
866
878
  date=datetime.now(timezone.utc).isoformat(),
867
879
  otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
868
880
  run_id=self.run_id,
@@ -273,15 +273,17 @@ class SimpleGeminiStreamingInterface:
273
273
  else:
274
274
  if prev_message_type and prev_message_type != "tool_call_message":
275
275
  message_index += 1
276
+ tool_call_delta = ToolCallDelta(
277
+ name=name,
278
+ arguments=arguments_str,
279
+ tool_call_id=call_id,
280
+ )
276
281
  yield ToolCallMessage(
277
282
  id=self.letta_message_id,
278
283
  otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
279
284
  date=datetime.now(timezone.utc),
280
- tool_call=ToolCallDelta(
281
- name=name,
282
- arguments=arguments_str,
283
- tool_call_id=call_id,
284
- ),
285
+ tool_call=tool_call_delta,
286
+ tool_calls=tool_call_delta,
285
287
  run_id=self.run_id,
286
288
  step_id=self.step_id,
287
289
  )