letta-nightly 0.12.0.dev20251009104148__py3-none-any.whl → 0.12.1.dev20251009224219__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/adapters/simple_llm_stream_adapter.py +1 -1
- letta/agents/letta_agent_v2.py +11 -11
- letta/interfaces/anthropic_parallel_tool_call_streaming_interface.py +487 -0
- letta/interfaces/anthropic_streaming_interface.py +21 -9
- letta/interfaces/gemini_streaming_interface.py +7 -5
- letta/interfaces/openai_streaming_interface.py +42 -30
- letta/llm_api/anthropic_client.py +35 -16
- letta/llm_api/openai_client.py +11 -0
- letta/schemas/environment_variables.py +24 -0
- letta/schemas/letta_message.py +29 -12
- letta/schemas/message.py +102 -21
- letta/schemas/providers/base.py +43 -0
- letta/schemas/secret.py +103 -36
- letta/server/rest_api/interface.py +85 -41
- letta/server/rest_api/routers/v1/providers.py +34 -0
- letta/server/rest_api/routers/v1/runs.py +1 -1
- letta/server/server.py +22 -0
- letta/settings.py +3 -0
- {letta_nightly-0.12.0.dev20251009104148.dist-info → letta_nightly-0.12.1.dev20251009224219.dist-info}/METADATA +1 -1
- {letta_nightly-0.12.0.dev20251009104148.dist-info → letta_nightly-0.12.1.dev20251009224219.dist-info}/RECORD +24 -23
- {letta_nightly-0.12.0.dev20251009104148.dist-info → letta_nightly-0.12.1.dev20251009224219.dist-info}/WHEEL +0 -0
- {letta_nightly-0.12.0.dev20251009104148.dist-info → letta_nightly-0.12.1.dev20251009224219.dist-info}/entry_points.txt +0 -0
- {letta_nightly-0.12.0.dev20251009104148.dist-info → letta_nightly-0.12.1.dev20251009224219.dist-info}/licenses/LICENSE +0 -0
letta/__init__.py
CHANGED
@@ -2,7 +2,7 @@ from typing import AsyncGenerator, List
|
|
2
2
|
|
3
3
|
from letta.adapters.letta_llm_stream_adapter import LettaLLMStreamAdapter
|
4
4
|
from letta.helpers.datetime_helpers import get_utc_timestamp_ns
|
5
|
-
from letta.interfaces.
|
5
|
+
from letta.interfaces.anthropic_parallel_tool_call_streaming_interface import SimpleAnthropicStreamingInterface
|
6
6
|
from letta.interfaces.gemini_streaming_interface import SimpleGeminiStreamingInterface
|
7
7
|
from letta.interfaces.openai_streaming_interface import SimpleOpenAIResponsesStreamingInterface, SimpleOpenAIStreamingInterface
|
8
8
|
from letta.schemas.enums import ProviderType
|
letta/agents/letta_agent_v2.py
CHANGED
@@ -99,16 +99,16 @@ class LettaAgentV2(BaseAgentV2):
|
|
99
99
|
self.step_manager = StepManager()
|
100
100
|
self.telemetry_manager = TelemetryManager()
|
101
101
|
|
102
|
-
|
103
|
-
if summarizer_settings.enable_summarization and model_settings.openai_api_key:
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
102
|
+
## TODO: Expand to more
|
103
|
+
# if summarizer_settings.enable_summarization and model_settings.openai_api_key:
|
104
|
+
# self.summarization_agent = EphemeralSummaryAgent(
|
105
|
+
# target_block_label="conversation_summary",
|
106
|
+
# agent_id=self.agent_state.id,
|
107
|
+
# block_manager=self.block_manager,
|
108
|
+
# message_manager=self.message_manager,
|
109
|
+
# agent_manager=self.agent_manager,
|
110
|
+
# actor=self.actor,
|
111
|
+
# )
|
112
112
|
|
113
113
|
# Initialize summarizer for context window management
|
114
114
|
self.summarizer = Summarizer(
|
@@ -117,7 +117,7 @@ class LettaAgentV2(BaseAgentV2):
|
|
117
117
|
if self.agent_state.agent_type == AgentType.voice_convo_agent
|
118
118
|
else summarizer_settings.mode
|
119
119
|
),
|
120
|
-
summarizer_agent=self.summarization_agent,
|
120
|
+
summarizer_agent=None, # self.summarization_agent,
|
121
121
|
message_buffer_limit=summarizer_settings.message_buffer_limit,
|
122
122
|
message_buffer_min=summarizer_settings.message_buffer_min,
|
123
123
|
partial_evict_summarizer_percentage=summarizer_settings.partial_evict_summarizer_percentage,
|
@@ -0,0 +1,487 @@
|
|
1
|
+
import asyncio
|
2
|
+
import json
|
3
|
+
from collections.abc import AsyncGenerator
|
4
|
+
from datetime import datetime, timezone
|
5
|
+
from enum import Enum
|
6
|
+
from typing import Optional
|
7
|
+
|
8
|
+
from anthropic import AsyncStream
|
9
|
+
from anthropic.types.beta import (
|
10
|
+
BetaInputJSONDelta,
|
11
|
+
BetaRawContentBlockDeltaEvent,
|
12
|
+
BetaRawContentBlockStartEvent,
|
13
|
+
BetaRawContentBlockStopEvent,
|
14
|
+
BetaRawMessageDeltaEvent,
|
15
|
+
BetaRawMessageStartEvent,
|
16
|
+
BetaRawMessageStopEvent,
|
17
|
+
BetaRawMessageStreamEvent,
|
18
|
+
BetaRedactedThinkingBlock,
|
19
|
+
BetaSignatureDelta,
|
20
|
+
BetaTextBlock,
|
21
|
+
BetaTextDelta,
|
22
|
+
BetaThinkingBlock,
|
23
|
+
BetaThinkingDelta,
|
24
|
+
BetaToolUseBlock,
|
25
|
+
)
|
26
|
+
|
27
|
+
from letta.log import get_logger
|
28
|
+
from letta.schemas.letta_message import (
|
29
|
+
ApprovalRequestMessage,
|
30
|
+
AssistantMessage,
|
31
|
+
HiddenReasoningMessage,
|
32
|
+
LettaMessage,
|
33
|
+
ReasoningMessage,
|
34
|
+
ToolCallDelta,
|
35
|
+
ToolCallMessage,
|
36
|
+
)
|
37
|
+
from letta.schemas.letta_message_content import ReasoningContent, RedactedReasoningContent, TextContent
|
38
|
+
from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
|
39
|
+
from letta.schemas.message import Message
|
40
|
+
from letta.schemas.openai.chat_completion_response import FunctionCall, ToolCall
|
41
|
+
from letta.server.rest_api.json_parser import JSONParser, PydanticJSONParser
|
42
|
+
|
43
|
+
logger = get_logger(__name__)
|
44
|
+
|
45
|
+
|
46
|
+
# TODO: These modes aren't used right now - but can be useful we do multiple sequential tool calling within one Claude message
|
47
|
+
class EventMode(Enum):
|
48
|
+
TEXT = "TEXT"
|
49
|
+
TOOL_USE = "TOOL_USE"
|
50
|
+
THINKING = "THINKING"
|
51
|
+
REDACTED_THINKING = "REDACTED_THINKING"
|
52
|
+
|
53
|
+
|
54
|
+
# TODO: There's a duplicate version of this in anthropic_streaming_interface
|
55
|
+
class SimpleAnthropicStreamingInterface:
|
56
|
+
"""
|
57
|
+
A simpler version of AnthropicStreamingInterface focused on streaming assistant text and
|
58
|
+
tool call deltas. Updated to support parallel tool calling by collecting completed
|
59
|
+
ToolUse blocks (from content_block stop events) and exposing all finalized tool calls
|
60
|
+
via get_tool_call_objects().
|
61
|
+
|
62
|
+
Notes:
|
63
|
+
- We keep emitting the stream (text and tool-call deltas) as before for latency.
|
64
|
+
- We no longer rely on accumulating partial JSON to build the final tool call; instead
|
65
|
+
we read the finalized ToolUse input from the stop event and store it.
|
66
|
+
- Multiple tool calls within a single message (parallel tool use) are collected and
|
67
|
+
can be returned to the agent as a list.
|
68
|
+
"""
|
69
|
+
|
70
|
+
def __init__(
|
71
|
+
self,
|
72
|
+
requires_approval_tools: list = [],
|
73
|
+
run_id: str | None = None,
|
74
|
+
step_id: str | None = None,
|
75
|
+
):
|
76
|
+
self.json_parser: JSONParser = PydanticJSONParser()
|
77
|
+
self.run_id = run_id
|
78
|
+
self.step_id = step_id
|
79
|
+
|
80
|
+
# Premake IDs for database writes
|
81
|
+
self.letta_message_id = Message.generate_id()
|
82
|
+
|
83
|
+
self.anthropic_mode = None
|
84
|
+
self.message_id = None
|
85
|
+
self.accumulated_inner_thoughts = []
|
86
|
+
self.tool_call_id = None
|
87
|
+
self.tool_call_name = None
|
88
|
+
self.accumulated_tool_call_args = ""
|
89
|
+
self.previous_parse = {}
|
90
|
+
|
91
|
+
# usage trackers
|
92
|
+
self.input_tokens = 0
|
93
|
+
self.output_tokens = 0
|
94
|
+
self.model = None
|
95
|
+
|
96
|
+
# reasoning object trackers
|
97
|
+
self.reasoning_messages = []
|
98
|
+
|
99
|
+
# assistant object trackers
|
100
|
+
self.assistant_messages: list[AssistantMessage] = []
|
101
|
+
|
102
|
+
# Buffer to hold tool call messages until inner thoughts are complete
|
103
|
+
self.tool_call_buffer = []
|
104
|
+
self.inner_thoughts_complete = False
|
105
|
+
|
106
|
+
# Buffer to handle partial XML tags across chunks
|
107
|
+
self.partial_tag_buffer = ""
|
108
|
+
|
109
|
+
self.requires_approval_tools = requires_approval_tools
|
110
|
+
# Collected finalized tool calls (supports parallel tool use)
|
111
|
+
self.collected_tool_calls: list[ToolCall] = []
|
112
|
+
# Track active tool_use blocks by stream index for parallel tool calling
|
113
|
+
# { index: {"id": str, "name": str, "args": str} }
|
114
|
+
self.active_tool_uses: dict[int, dict[str, str]] = {}
|
115
|
+
# Maintain start order and indexed collection for stable ordering
|
116
|
+
self._tool_use_start_order: list[int] = []
|
117
|
+
self._collected_indexed: list[tuple[int, ToolCall]] = []
|
118
|
+
|
119
|
+
def get_tool_call_objects(self) -> list[ToolCall]:
|
120
|
+
"""Return all finalized tool calls collected during this message (parallel supported)."""
|
121
|
+
# Prefer indexed ordering if available
|
122
|
+
if self._collected_indexed:
|
123
|
+
return [
|
124
|
+
call
|
125
|
+
for _, call in sorted(
|
126
|
+
self._collected_indexed,
|
127
|
+
key=lambda x: self._tool_use_start_order.index(x[0]) if x[0] in self._tool_use_start_order else x[0],
|
128
|
+
)
|
129
|
+
]
|
130
|
+
return self.collected_tool_calls
|
131
|
+
|
132
|
+
# This exists for legacy compatibility
|
133
|
+
def get_tool_call_object(self) -> Optional[ToolCall]:
|
134
|
+
tool_calls = self.get_tool_call_objects()
|
135
|
+
if tool_calls:
|
136
|
+
return tool_calls[0]
|
137
|
+
return None
|
138
|
+
|
139
|
+
def get_reasoning_content(self) -> list[TextContent | ReasoningContent | RedactedReasoningContent]:
|
140
|
+
def _process_group(
|
141
|
+
group: list[ReasoningMessage | HiddenReasoningMessage | AssistantMessage],
|
142
|
+
group_type: str,
|
143
|
+
) -> TextContent | ReasoningContent | RedactedReasoningContent:
|
144
|
+
if group_type == "reasoning":
|
145
|
+
reasoning_text = "".join(chunk.reasoning for chunk in group).strip()
|
146
|
+
is_native = any(chunk.source == "reasoner_model" for chunk in group)
|
147
|
+
signature = next((chunk.signature for chunk in group if chunk.signature is not None), None)
|
148
|
+
if is_native:
|
149
|
+
return ReasoningContent(is_native=is_native, reasoning=reasoning_text, signature=signature)
|
150
|
+
else:
|
151
|
+
return TextContent(text=reasoning_text)
|
152
|
+
elif group_type == "redacted":
|
153
|
+
redacted_text = "".join(chunk.hidden_reasoning for chunk in group if chunk.hidden_reasoning is not None)
|
154
|
+
return RedactedReasoningContent(data=redacted_text)
|
155
|
+
elif group_type == "text":
|
156
|
+
concat = ""
|
157
|
+
for chunk in group:
|
158
|
+
if isinstance(chunk.content, list):
|
159
|
+
concat += "".join([c.text for c in chunk.content])
|
160
|
+
else:
|
161
|
+
concat += chunk.content
|
162
|
+
return TextContent(text=concat)
|
163
|
+
else:
|
164
|
+
raise ValueError("Unexpected group type")
|
165
|
+
|
166
|
+
merged = []
|
167
|
+
current_group = []
|
168
|
+
current_group_type = None # "reasoning" or "redacted"
|
169
|
+
|
170
|
+
for msg in self.reasoning_messages:
|
171
|
+
# Determine the type of the current message
|
172
|
+
if isinstance(msg, HiddenReasoningMessage):
|
173
|
+
msg_type = "redacted"
|
174
|
+
elif isinstance(msg, ReasoningMessage):
|
175
|
+
msg_type = "reasoning"
|
176
|
+
elif isinstance(msg, AssistantMessage):
|
177
|
+
msg_type = "text"
|
178
|
+
else:
|
179
|
+
raise ValueError("Unexpected message type")
|
180
|
+
|
181
|
+
# Initialize group type if not set
|
182
|
+
if current_group_type is None:
|
183
|
+
current_group_type = msg_type
|
184
|
+
|
185
|
+
# If the type changes, process the current group
|
186
|
+
if msg_type != current_group_type:
|
187
|
+
merged.append(_process_group(current_group, current_group_type))
|
188
|
+
current_group = []
|
189
|
+
current_group_type = msg_type
|
190
|
+
|
191
|
+
current_group.append(msg)
|
192
|
+
|
193
|
+
# Process the final group, if any.
|
194
|
+
if current_group:
|
195
|
+
merged.append(_process_group(current_group, current_group_type))
|
196
|
+
|
197
|
+
return merged
|
198
|
+
|
199
|
+
def get_content(self) -> list[TextContent | ReasoningContent | RedactedReasoningContent]:
|
200
|
+
return self.get_reasoning_content()
|
201
|
+
|
202
|
+
async def process(
|
203
|
+
self,
|
204
|
+
stream: AsyncStream[BetaRawMessageStreamEvent],
|
205
|
+
ttft_span: Optional["Span"] = None,
|
206
|
+
) -> AsyncGenerator[LettaMessage | LettaStopReason, None]:
|
207
|
+
prev_message_type = None
|
208
|
+
message_index = 0
|
209
|
+
event = None
|
210
|
+
try:
|
211
|
+
async with stream:
|
212
|
+
async for event in stream:
|
213
|
+
try:
|
214
|
+
async for message in self._process_event(event, ttft_span, prev_message_type, message_index):
|
215
|
+
new_message_type = message.message_type
|
216
|
+
if new_message_type != prev_message_type:
|
217
|
+
if prev_message_type != None:
|
218
|
+
message_index += 1
|
219
|
+
prev_message_type = new_message_type
|
220
|
+
# print(f"Yielding message: {message}")
|
221
|
+
yield message
|
222
|
+
except asyncio.CancelledError as e:
|
223
|
+
import traceback
|
224
|
+
|
225
|
+
logger.info("Cancelled stream attempt but overriding %s: %s", e, traceback.format_exc())
|
226
|
+
async for message in self._process_event(event, ttft_span, prev_message_type, message_index):
|
227
|
+
new_message_type = message.message_type
|
228
|
+
if new_message_type != prev_message_type:
|
229
|
+
if prev_message_type != None:
|
230
|
+
message_index += 1
|
231
|
+
prev_message_type = new_message_type
|
232
|
+
yield message
|
233
|
+
|
234
|
+
# Don't raise the exception here
|
235
|
+
continue
|
236
|
+
|
237
|
+
except Exception as e:
|
238
|
+
import traceback
|
239
|
+
|
240
|
+
logger.error("Error processing stream: %s\n%s", e, traceback.format_exc())
|
241
|
+
if ttft_span:
|
242
|
+
ttft_span.add_event(
|
243
|
+
name="stop_reason",
|
244
|
+
attributes={"stop_reason": StopReasonType.error.value, "error": str(e), "stacktrace": traceback.format_exc()},
|
245
|
+
)
|
246
|
+
yield LettaStopReason(stop_reason=StopReasonType.error)
|
247
|
+
raise e
|
248
|
+
finally:
|
249
|
+
logger.info("AnthropicStreamingInterface: Stream processing complete.")
|
250
|
+
|
251
|
+
async def _process_event(
|
252
|
+
self,
|
253
|
+
event: BetaRawMessageStreamEvent,
|
254
|
+
ttft_span: Optional["Span"] = None,
|
255
|
+
prev_message_type: Optional[str] = None,
|
256
|
+
message_index: int = 0,
|
257
|
+
) -> AsyncGenerator[LettaMessage | LettaStopReason, None]:
|
258
|
+
"""Process a single event from the Anthropic stream and yield any resulting messages.
|
259
|
+
|
260
|
+
Args:
|
261
|
+
event: The event to process
|
262
|
+
|
263
|
+
Yields:
|
264
|
+
Messages generated from processing this event
|
265
|
+
"""
|
266
|
+
if isinstance(event, BetaRawContentBlockStartEvent):
|
267
|
+
content = event.content_block
|
268
|
+
|
269
|
+
if isinstance(content, BetaTextBlock):
|
270
|
+
self.anthropic_mode = EventMode.TEXT
|
271
|
+
# TODO: Can capture citations, etc.
|
272
|
+
|
273
|
+
elif isinstance(content, BetaToolUseBlock):
|
274
|
+
# New tool_use block started at this index
|
275
|
+
self.anthropic_mode = EventMode.TOOL_USE
|
276
|
+
self.active_tool_uses[event.index] = {"id": content.id, "name": content.name, "args": ""}
|
277
|
+
if event.index not in self._tool_use_start_order:
|
278
|
+
self._tool_use_start_order.append(event.index)
|
279
|
+
|
280
|
+
# Emit an initial tool call delta for this new block
|
281
|
+
name = content.name
|
282
|
+
call_id = content.id
|
283
|
+
# Initialize arguments from the start event's input (often {}) to avoid undefined in UIs
|
284
|
+
if name in self.requires_approval_tools:
|
285
|
+
if prev_message_type and prev_message_type != "approval_request_message":
|
286
|
+
message_index += 1
|
287
|
+
tool_call_msg = ApprovalRequestMessage(
|
288
|
+
id=self.letta_message_id,
|
289
|
+
# Do not emit placeholder arguments here to avoid UI duplicates
|
290
|
+
tool_call=ToolCallDelta(name=name, tool_call_id=call_id),
|
291
|
+
date=datetime.now(timezone.utc).isoformat(),
|
292
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
293
|
+
run_id=self.run_id,
|
294
|
+
step_id=self.step_id,
|
295
|
+
)
|
296
|
+
else:
|
297
|
+
if prev_message_type and prev_message_type != "tool_call_message":
|
298
|
+
message_index += 1
|
299
|
+
tool_call_msg = ToolCallMessage(
|
300
|
+
id=self.letta_message_id,
|
301
|
+
# Do not emit placeholder arguments here to avoid UI duplicates
|
302
|
+
tool_call=ToolCallDelta(name=name, tool_call_id=call_id),
|
303
|
+
date=datetime.now(timezone.utc).isoformat(),
|
304
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
305
|
+
run_id=self.run_id,
|
306
|
+
step_id=self.step_id,
|
307
|
+
)
|
308
|
+
prev_message_type = tool_call_msg.message_type
|
309
|
+
yield tool_call_msg
|
310
|
+
|
311
|
+
elif isinstance(content, BetaThinkingBlock):
|
312
|
+
self.anthropic_mode = EventMode.THINKING
|
313
|
+
# TODO: Can capture signature, etc.
|
314
|
+
|
315
|
+
elif isinstance(content, BetaRedactedThinkingBlock):
|
316
|
+
self.anthropic_mode = EventMode.REDACTED_THINKING
|
317
|
+
|
318
|
+
if prev_message_type and prev_message_type != "hidden_reasoning_message":
|
319
|
+
message_index += 1
|
320
|
+
|
321
|
+
hidden_reasoning_message = HiddenReasoningMessage(
|
322
|
+
id=self.letta_message_id,
|
323
|
+
state="redacted",
|
324
|
+
hidden_reasoning=content.data,
|
325
|
+
date=datetime.now(timezone.utc).isoformat(),
|
326
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
327
|
+
run_id=self.run_id,
|
328
|
+
step_id=self.step_id,
|
329
|
+
)
|
330
|
+
|
331
|
+
self.reasoning_messages.append(hidden_reasoning_message)
|
332
|
+
prev_message_type = hidden_reasoning_message.message_type
|
333
|
+
yield hidden_reasoning_message
|
334
|
+
|
335
|
+
elif isinstance(event, BetaRawContentBlockDeltaEvent):
|
336
|
+
delta = event.delta
|
337
|
+
|
338
|
+
if isinstance(delta, BetaTextDelta):
|
339
|
+
# Safety check
|
340
|
+
if not self.anthropic_mode == EventMode.TEXT:
|
341
|
+
raise RuntimeError(f"Streaming integrity failed - received BetaTextDelta object while not in TEXT EventMode: {delta}")
|
342
|
+
|
343
|
+
if prev_message_type and prev_message_type != "assistant_message":
|
344
|
+
message_index += 1
|
345
|
+
|
346
|
+
assistant_msg = AssistantMessage(
|
347
|
+
id=self.letta_message_id,
|
348
|
+
# content=[TextContent(text=delta.text)],
|
349
|
+
content=delta.text,
|
350
|
+
date=datetime.now(timezone.utc).isoformat(),
|
351
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
352
|
+
run_id=self.run_id,
|
353
|
+
step_id=self.step_id,
|
354
|
+
)
|
355
|
+
# self.assistant_messages.append(assistant_msg)
|
356
|
+
self.reasoning_messages.append(assistant_msg)
|
357
|
+
prev_message_type = assistant_msg.message_type
|
358
|
+
yield assistant_msg
|
359
|
+
|
360
|
+
elif isinstance(delta, BetaInputJSONDelta):
|
361
|
+
# Append partial JSON for the specific tool_use block at this index
|
362
|
+
if not self.anthropic_mode == EventMode.TOOL_USE:
|
363
|
+
raise RuntimeError(
|
364
|
+
f"Streaming integrity failed - received BetaInputJSONDelta object while not in TOOL_USE EventMode: {delta}"
|
365
|
+
)
|
366
|
+
|
367
|
+
ctx = self.active_tool_uses.get(event.index)
|
368
|
+
if ctx is None:
|
369
|
+
# Defensive: initialize if missing
|
370
|
+
self.active_tool_uses[event.index] = {"id": self.tool_call_id or "", "name": self.tool_call_name or "", "args": ""}
|
371
|
+
ctx = self.active_tool_uses[event.index]
|
372
|
+
|
373
|
+
# Append only non-empty partials
|
374
|
+
if delta.partial_json:
|
375
|
+
ctx["args"] += delta.partial_json
|
376
|
+
else:
|
377
|
+
# Skip streaming a no-op delta to prevent duplicate placeholders in UI
|
378
|
+
return
|
379
|
+
|
380
|
+
name = ctx.get("name")
|
381
|
+
call_id = ctx.get("id")
|
382
|
+
|
383
|
+
if name in self.requires_approval_tools:
|
384
|
+
if prev_message_type and prev_message_type != "approval_request_message":
|
385
|
+
message_index += 1
|
386
|
+
tool_call_msg = ApprovalRequestMessage(
|
387
|
+
id=self.letta_message_id,
|
388
|
+
tool_call=ToolCallDelta(name=name, tool_call_id=call_id, arguments=delta.partial_json),
|
389
|
+
date=datetime.now(timezone.utc).isoformat(),
|
390
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
391
|
+
run_id=self.run_id,
|
392
|
+
step_id=self.step_id,
|
393
|
+
)
|
394
|
+
else:
|
395
|
+
if prev_message_type and prev_message_type != "tool_call_message":
|
396
|
+
message_index += 1
|
397
|
+
tool_call_msg = ToolCallMessage(
|
398
|
+
id=self.letta_message_id,
|
399
|
+
tool_call=ToolCallDelta(name=name, tool_call_id=call_id, arguments=delta.partial_json),
|
400
|
+
date=datetime.now(timezone.utc).isoformat(),
|
401
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
402
|
+
run_id=self.run_id,
|
403
|
+
step_id=self.step_id,
|
404
|
+
)
|
405
|
+
|
406
|
+
yield tool_call_msg
|
407
|
+
|
408
|
+
elif isinstance(delta, BetaThinkingDelta):
|
409
|
+
# Safety check
|
410
|
+
if not self.anthropic_mode == EventMode.THINKING:
|
411
|
+
raise RuntimeError(
|
412
|
+
f"Streaming integrity failed - received BetaThinkingBlock object while not in THINKING EventMode: {delta}"
|
413
|
+
)
|
414
|
+
|
415
|
+
if prev_message_type and prev_message_type != "reasoning_message":
|
416
|
+
message_index += 1
|
417
|
+
reasoning_message = ReasoningMessage(
|
418
|
+
id=self.letta_message_id,
|
419
|
+
source="reasoner_model",
|
420
|
+
reasoning=delta.thinking,
|
421
|
+
date=datetime.now(timezone.utc).isoformat(),
|
422
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
423
|
+
run_id=self.run_id,
|
424
|
+
step_id=self.step_id,
|
425
|
+
)
|
426
|
+
self.reasoning_messages.append(reasoning_message)
|
427
|
+
prev_message_type = reasoning_message.message_type
|
428
|
+
yield reasoning_message
|
429
|
+
|
430
|
+
elif isinstance(delta, BetaSignatureDelta):
|
431
|
+
# Safety check
|
432
|
+
if not self.anthropic_mode == EventMode.THINKING:
|
433
|
+
raise RuntimeError(
|
434
|
+
f"Streaming integrity failed - received BetaSignatureDelta object while not in THINKING EventMode: {delta}"
|
435
|
+
)
|
436
|
+
|
437
|
+
if prev_message_type and prev_message_type != "reasoning_message":
|
438
|
+
message_index += 1
|
439
|
+
reasoning_message = ReasoningMessage(
|
440
|
+
id=self.letta_message_id,
|
441
|
+
source="reasoner_model",
|
442
|
+
reasoning="",
|
443
|
+
date=datetime.now(timezone.utc).isoformat(),
|
444
|
+
signature=delta.signature,
|
445
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
446
|
+
run_id=self.run_id,
|
447
|
+
step_id=self.step_id,
|
448
|
+
)
|
449
|
+
self.reasoning_messages.append(reasoning_message)
|
450
|
+
prev_message_type = reasoning_message.message_type
|
451
|
+
yield reasoning_message
|
452
|
+
|
453
|
+
elif isinstance(event, BetaRawMessageStartEvent):
|
454
|
+
self.message_id = event.message.id
|
455
|
+
self.input_tokens += event.message.usage.input_tokens
|
456
|
+
self.output_tokens += event.message.usage.output_tokens
|
457
|
+
self.model = event.message.model
|
458
|
+
|
459
|
+
elif isinstance(event, BetaRawMessageDeltaEvent):
|
460
|
+
self.output_tokens += event.usage.output_tokens
|
461
|
+
|
462
|
+
elif isinstance(event, BetaRawMessageStopEvent):
|
463
|
+
# Don't do anything here! We don't want to stop the stream.
|
464
|
+
pass
|
465
|
+
|
466
|
+
elif isinstance(event, BetaRawContentBlockStopEvent):
|
467
|
+
# Finalize the tool_use block at this index using accumulated deltas
|
468
|
+
ctx = self.active_tool_uses.pop(event.index, None)
|
469
|
+
if ctx is not None and ctx.get("id") and ctx.get("name") is not None:
|
470
|
+
raw_args = ctx.get("args", "")
|
471
|
+
try:
|
472
|
+
# Prefer strict JSON load, fallback to permissive parser
|
473
|
+
tool_input = json.loads(raw_args) if raw_args else {}
|
474
|
+
except json.JSONDecodeError:
|
475
|
+
try:
|
476
|
+
tool_input = self.json_parser.parse(raw_args) if raw_args else {}
|
477
|
+
except Exception:
|
478
|
+
tool_input = {}
|
479
|
+
|
480
|
+
arguments = json.dumps(tool_input)
|
481
|
+
finalized = ToolCall(id=ctx["id"], function=FunctionCall(arguments=arguments, name=ctx["name"]))
|
482
|
+
# Keep both raw list and indexed list for compatibility
|
483
|
+
self.collected_tool_calls.append(finalized)
|
484
|
+
self._collected_indexed.append((event.index, finalized))
|
485
|
+
|
486
|
+
# Reset mode when a content block ends
|
487
|
+
self.anthropic_mode = None
|
@@ -279,9 +279,11 @@ class AnthropicStreamingInterface:
|
|
279
279
|
if prev_message_type and prev_message_type != "tool_call_message":
|
280
280
|
message_index += 1
|
281
281
|
if self.tool_call_name not in self.requires_approval_tools:
|
282
|
+
tool_call_delta = ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id)
|
282
283
|
tool_call_msg = ToolCallMessage(
|
283
284
|
id=self.letta_message_id,
|
284
|
-
tool_call=
|
285
|
+
tool_call=tool_call_delta,
|
286
|
+
tool_calls=tool_call_delta,
|
285
287
|
date=datetime.now(timezone.utc).isoformat(),
|
286
288
|
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
287
289
|
run_id=self.run_id,
|
@@ -423,15 +425,17 @@ class AnthropicStreamingInterface:
|
|
423
425
|
tool_call_args += buffered_msg.tool_call.arguments if buffered_msg.tool_call.arguments else ""
|
424
426
|
tool_call_args = tool_call_args.replace(f'"{INNER_THOUGHTS_KWARG}": "{current_inner_thoughts}"', "")
|
425
427
|
|
428
|
+
tool_call_delta = ToolCallDelta(
|
429
|
+
name=self.tool_call_name,
|
430
|
+
tool_call_id=self.tool_call_id,
|
431
|
+
arguments=tool_call_args,
|
432
|
+
)
|
426
433
|
tool_call_msg = ToolCallMessage(
|
427
434
|
id=self.tool_call_buffer[0].id,
|
428
435
|
otid=Message.generate_otid_from_id(self.tool_call_buffer[0].id, message_index),
|
429
436
|
date=self.tool_call_buffer[0].date,
|
430
|
-
tool_call=
|
431
|
-
|
432
|
-
tool_call_id=self.tool_call_id,
|
433
|
-
arguments=tool_call_args,
|
434
|
-
),
|
437
|
+
tool_call=tool_call_delta,
|
438
|
+
tool_calls=tool_call_delta,
|
435
439
|
run_id=self.run_id,
|
436
440
|
)
|
437
441
|
prev_message_type = tool_call_msg.message_type
|
@@ -467,9 +471,13 @@ class AnthropicStreamingInterface:
|
|
467
471
|
run_id=self.run_id,
|
468
472
|
)
|
469
473
|
else:
|
474
|
+
tool_call_delta = ToolCallDelta(
|
475
|
+
name=self.tool_call_name, tool_call_id=self.tool_call_id, arguments=delta.partial_json
|
476
|
+
)
|
470
477
|
tool_call_msg = ToolCallMessage(
|
471
478
|
id=self.letta_message_id,
|
472
|
-
tool_call=
|
479
|
+
tool_call=tool_call_delta,
|
480
|
+
tool_calls=tool_call_delta,
|
473
481
|
date=datetime.now(timezone.utc).isoformat(),
|
474
482
|
run_id=self.run_id,
|
475
483
|
)
|
@@ -778,9 +786,11 @@ class SimpleAnthropicStreamingInterface:
|
|
778
786
|
else:
|
779
787
|
if prev_message_type and prev_message_type != "tool_call_message":
|
780
788
|
message_index += 1
|
789
|
+
tool_call_delta = ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id)
|
781
790
|
tool_call_msg = ToolCallMessage(
|
782
791
|
id=self.letta_message_id,
|
783
|
-
tool_call=
|
792
|
+
tool_call=tool_call_delta,
|
793
|
+
tool_calls=tool_call_delta,
|
784
794
|
date=datetime.now(timezone.utc).isoformat(),
|
785
795
|
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
786
796
|
run_id=self.run_id,
|
@@ -860,9 +870,11 @@ class SimpleAnthropicStreamingInterface:
|
|
860
870
|
else:
|
861
871
|
if prev_message_type and prev_message_type != "tool_call_message":
|
862
872
|
message_index += 1
|
873
|
+
tool_call_delta = ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id, arguments=delta.partial_json)
|
863
874
|
tool_call_msg = ToolCallMessage(
|
864
875
|
id=self.letta_message_id,
|
865
|
-
tool_call=
|
876
|
+
tool_call=tool_call_delta,
|
877
|
+
tool_calls=tool_call_delta,
|
866
878
|
date=datetime.now(timezone.utc).isoformat(),
|
867
879
|
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
868
880
|
run_id=self.run_id,
|
@@ -273,15 +273,17 @@ class SimpleGeminiStreamingInterface:
|
|
273
273
|
else:
|
274
274
|
if prev_message_type and prev_message_type != "tool_call_message":
|
275
275
|
message_index += 1
|
276
|
+
tool_call_delta = ToolCallDelta(
|
277
|
+
name=name,
|
278
|
+
arguments=arguments_str,
|
279
|
+
tool_call_id=call_id,
|
280
|
+
)
|
276
281
|
yield ToolCallMessage(
|
277
282
|
id=self.letta_message_id,
|
278
283
|
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
279
284
|
date=datetime.now(timezone.utc),
|
280
|
-
tool_call=
|
281
|
-
|
282
|
-
arguments=arguments_str,
|
283
|
-
tool_call_id=call_id,
|
284
|
-
),
|
285
|
+
tool_call=tool_call_delta,
|
286
|
+
tool_calls=tool_call_delta,
|
285
287
|
run_id=self.run_id,
|
286
288
|
step_id=self.step_id,
|
287
289
|
)
|