letta-nightly 0.11.0.dev20250807104511__py3-none-any.whl → 0.11.0.dev20250808104456__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/agent.py +2 -1
- letta/agents/letta_agent.py +215 -143
- letta/constants.py +4 -1
- letta/embeddings.py +6 -5
- letta/functions/function_sets/base.py +2 -2
- letta/functions/function_sets/files.py +22 -9
- letta/interfaces/anthropic_streaming_interface.py +291 -265
- letta/interfaces/openai_streaming_interface.py +270 -250
- letta/llm_api/anthropic.py +3 -10
- letta/llm_api/openai_client.py +6 -1
- letta/orm/__init__.py +1 -0
- letta/orm/step.py +14 -0
- letta/orm/step_metrics.py +71 -0
- letta/schemas/enums.py +9 -0
- letta/schemas/llm_config.py +8 -6
- letta/schemas/providers/lmstudio.py +2 -2
- letta/schemas/providers/ollama.py +42 -54
- letta/schemas/providers/openai.py +1 -1
- letta/schemas/step.py +6 -0
- letta/schemas/step_metrics.py +23 -0
- letta/schemas/tool_rule.py +10 -29
- letta/services/step_manager.py +179 -1
- letta/services/tool_executor/builtin_tool_executor.py +4 -1
- letta/services/tool_executor/core_tool_executor.py +2 -10
- letta/services/tool_executor/files_tool_executor.py +89 -40
- {letta_nightly-0.11.0.dev20250807104511.dist-info → letta_nightly-0.11.0.dev20250808104456.dist-info}/METADATA +1 -1
- {letta_nightly-0.11.0.dev20250807104511.dist-info → letta_nightly-0.11.0.dev20250808104456.dist-info}/RECORD +30 -28
- {letta_nightly-0.11.0.dev20250807104511.dist-info → letta_nightly-0.11.0.dev20250808104456.dist-info}/LICENSE +0 -0
- {letta_nightly-0.11.0.dev20250807104511.dist-info → letta_nightly-0.11.0.dev20250808104456.dist-info}/WHEEL +0 -0
- {letta_nightly-0.11.0.dev20250807104511.dist-info → letta_nightly-0.11.0.dev20250808104456.dist-info}/entry_points.txt +0 -0
@@ -43,24 +43,37 @@ async def grep_files(
|
|
43
43
|
agent_state: "AgentState",
|
44
44
|
pattern: str,
|
45
45
|
include: Optional[str] = None,
|
46
|
-
context_lines: Optional[int] =
|
46
|
+
context_lines: Optional[int] = 1,
|
47
|
+
offset: Optional[int] = None,
|
47
48
|
) -> str:
|
48
49
|
"""
|
49
50
|
Searches file contents for pattern matches with surrounding context.
|
50
51
|
|
51
|
-
|
52
|
-
-
|
53
|
-
-
|
54
|
-
-
|
52
|
+
Results are paginated - shows 20 matches per call. The response includes:
|
53
|
+
- A summary of total matches and which files contain them
|
54
|
+
- The current page of matches (20 at a time)
|
55
|
+
- Instructions for viewing more matches using the offset parameter
|
56
|
+
|
57
|
+
Example usage:
|
58
|
+
First call: grep_files(pattern="TODO")
|
59
|
+
Next call: grep_files(pattern="TODO", offset=20) # Shows matches 21-40
|
60
|
+
|
61
|
+
Returns search results containing:
|
62
|
+
- Summary with total match count and file distribution
|
63
|
+
- List of files with match counts per file
|
64
|
+
- Current page of matches (up to 20)
|
65
|
+
- Navigation hint for next page if more matches exist
|
55
66
|
|
56
67
|
Args:
|
57
68
|
pattern (str): Keyword or regex pattern to search within file contents.
|
58
69
|
include (Optional[str]): Optional keyword or regex pattern to filter filenames to include in the search.
|
59
70
|
context_lines (Optional[int]): Number of lines of context to show before and after each match.
|
60
|
-
Equivalent to `-C` in grep_files. Defaults to
|
61
|
-
|
62
|
-
|
63
|
-
|
71
|
+
Equivalent to `-C` in grep_files. Defaults to 1.
|
72
|
+
offset (Optional[int]): Number of matches to skip before showing results. Used for pagination.
|
73
|
+
For example, offset=20 shows matches starting from the 21st match.
|
74
|
+
Use offset=0 (or omit) for first page, offset=20 for second page,
|
75
|
+
offset=40 for third page, etc. The tool will tell you the exact
|
76
|
+
offset to use for the next page.
|
64
77
|
"""
|
65
78
|
raise NotImplementedError("Tool not implemented. Please contact the Letta team.")
|
66
79
|
|
@@ -126,271 +126,6 @@ class AnthropicStreamingInterface:
|
|
126
126
|
logger.error("Error checking inner thoughts: %s", e)
|
127
127
|
raise
|
128
128
|
|
129
|
-
async def process(
|
130
|
-
self,
|
131
|
-
stream: AsyncStream[BetaRawMessageStreamEvent],
|
132
|
-
ttft_span: Optional["Span"] = None,
|
133
|
-
) -> AsyncGenerator[LettaMessage | LettaStopReason, None]:
|
134
|
-
prev_message_type = None
|
135
|
-
message_index = 0
|
136
|
-
try:
|
137
|
-
async with stream:
|
138
|
-
async for event in stream:
|
139
|
-
# TODO: Support BetaThinkingBlock, BetaRedactedThinkingBlock
|
140
|
-
if isinstance(event, BetaRawContentBlockStartEvent):
|
141
|
-
content = event.content_block
|
142
|
-
|
143
|
-
if isinstance(content, BetaTextBlock):
|
144
|
-
self.anthropic_mode = EventMode.TEXT
|
145
|
-
# TODO: Can capture citations, etc.
|
146
|
-
elif isinstance(content, BetaToolUseBlock):
|
147
|
-
self.anthropic_mode = EventMode.TOOL_USE
|
148
|
-
self.tool_call_id = content.id
|
149
|
-
self.tool_call_name = content.name
|
150
|
-
self.inner_thoughts_complete = False
|
151
|
-
|
152
|
-
if not self.use_assistant_message:
|
153
|
-
# Buffer the initial tool call message instead of yielding immediately
|
154
|
-
tool_call_msg = ToolCallMessage(
|
155
|
-
id=self.letta_message_id,
|
156
|
-
tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id),
|
157
|
-
date=datetime.now(timezone.utc).isoformat(),
|
158
|
-
)
|
159
|
-
self.tool_call_buffer.append(tool_call_msg)
|
160
|
-
elif isinstance(content, BetaThinkingBlock):
|
161
|
-
self.anthropic_mode = EventMode.THINKING
|
162
|
-
# TODO: Can capture signature, etc.
|
163
|
-
elif isinstance(content, BetaRedactedThinkingBlock):
|
164
|
-
self.anthropic_mode = EventMode.REDACTED_THINKING
|
165
|
-
if prev_message_type and prev_message_type != "hidden_reasoning_message":
|
166
|
-
message_index += 1
|
167
|
-
hidden_reasoning_message = HiddenReasoningMessage(
|
168
|
-
id=self.letta_message_id,
|
169
|
-
state="redacted",
|
170
|
-
hidden_reasoning=content.data,
|
171
|
-
date=datetime.now(timezone.utc).isoformat(),
|
172
|
-
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
173
|
-
)
|
174
|
-
self.reasoning_messages.append(hidden_reasoning_message)
|
175
|
-
prev_message_type = hidden_reasoning_message.message_type
|
176
|
-
yield hidden_reasoning_message
|
177
|
-
|
178
|
-
elif isinstance(event, BetaRawContentBlockDeltaEvent):
|
179
|
-
delta = event.delta
|
180
|
-
|
181
|
-
if isinstance(delta, BetaTextDelta):
|
182
|
-
# Safety check
|
183
|
-
if not self.anthropic_mode == EventMode.TEXT:
|
184
|
-
raise RuntimeError(
|
185
|
-
f"Streaming integrity failed - received BetaTextDelta object while not in TEXT EventMode: {delta}"
|
186
|
-
)
|
187
|
-
|
188
|
-
# Combine buffer with current text to handle tags split across chunks
|
189
|
-
combined_text = self.partial_tag_buffer + delta.text
|
190
|
-
|
191
|
-
# Remove all occurrences of </thinking> tag
|
192
|
-
cleaned_text = combined_text.replace("</thinking>", "")
|
193
|
-
|
194
|
-
# Extract just the new content (without the buffer part)
|
195
|
-
if len(self.partial_tag_buffer) <= len(cleaned_text):
|
196
|
-
delta.text = cleaned_text[len(self.partial_tag_buffer) :]
|
197
|
-
else:
|
198
|
-
# Edge case: the tag was removed and now the text is shorter than the buffer
|
199
|
-
delta.text = ""
|
200
|
-
|
201
|
-
# Store the last 10 characters (or all if less than 10) for the next chunk
|
202
|
-
# This is enough to catch "</thinking" which is 10 characters
|
203
|
-
self.partial_tag_buffer = combined_text[-10:] if len(combined_text) > 10 else combined_text
|
204
|
-
self.accumulated_inner_thoughts.append(delta.text)
|
205
|
-
|
206
|
-
if prev_message_type and prev_message_type != "reasoning_message":
|
207
|
-
message_index += 1
|
208
|
-
reasoning_message = ReasoningMessage(
|
209
|
-
id=self.letta_message_id,
|
210
|
-
reasoning=self.accumulated_inner_thoughts[-1],
|
211
|
-
date=datetime.now(timezone.utc).isoformat(),
|
212
|
-
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
213
|
-
)
|
214
|
-
self.reasoning_messages.append(reasoning_message)
|
215
|
-
prev_message_type = reasoning_message.message_type
|
216
|
-
yield reasoning_message
|
217
|
-
|
218
|
-
elif isinstance(delta, BetaInputJSONDelta):
|
219
|
-
if not self.anthropic_mode == EventMode.TOOL_USE:
|
220
|
-
raise RuntimeError(
|
221
|
-
f"Streaming integrity failed - received BetaInputJSONDelta object while not in TOOL_USE EventMode: {delta}"
|
222
|
-
)
|
223
|
-
|
224
|
-
self.accumulated_tool_call_args += delta.partial_json
|
225
|
-
current_parsed = self.json_parser.parse(self.accumulated_tool_call_args)
|
226
|
-
|
227
|
-
# Start detecting a difference in inner thoughts
|
228
|
-
previous_inner_thoughts = self.previous_parse.get(INNER_THOUGHTS_KWARG, "")
|
229
|
-
current_inner_thoughts = current_parsed.get(INNER_THOUGHTS_KWARG, "")
|
230
|
-
inner_thoughts_diff = current_inner_thoughts[len(previous_inner_thoughts) :]
|
231
|
-
|
232
|
-
if inner_thoughts_diff:
|
233
|
-
if prev_message_type and prev_message_type != "reasoning_message":
|
234
|
-
message_index += 1
|
235
|
-
reasoning_message = ReasoningMessage(
|
236
|
-
id=self.letta_message_id,
|
237
|
-
reasoning=inner_thoughts_diff,
|
238
|
-
date=datetime.now(timezone.utc).isoformat(),
|
239
|
-
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
240
|
-
)
|
241
|
-
self.reasoning_messages.append(reasoning_message)
|
242
|
-
prev_message_type = reasoning_message.message_type
|
243
|
-
yield reasoning_message
|
244
|
-
|
245
|
-
# Check if inner thoughts are complete - if so, flush the buffer
|
246
|
-
if not self.inner_thoughts_complete and self._check_inner_thoughts_complete(self.accumulated_tool_call_args):
|
247
|
-
self.inner_thoughts_complete = True
|
248
|
-
# Flush all buffered tool call messages
|
249
|
-
if len(self.tool_call_buffer) > 0:
|
250
|
-
if prev_message_type and prev_message_type != "tool_call_message":
|
251
|
-
message_index += 1
|
252
|
-
|
253
|
-
# Strip out the inner thoughts from the buffered tool call arguments before streaming
|
254
|
-
tool_call_args = ""
|
255
|
-
for buffered_msg in self.tool_call_buffer:
|
256
|
-
tool_call_args += buffered_msg.tool_call.arguments if buffered_msg.tool_call.arguments else ""
|
257
|
-
tool_call_args = tool_call_args.replace(f'"{INNER_THOUGHTS_KWARG}": "{current_inner_thoughts}"', "")
|
258
|
-
|
259
|
-
tool_call_msg = ToolCallMessage(
|
260
|
-
id=self.tool_call_buffer[0].id,
|
261
|
-
otid=Message.generate_otid_from_id(self.tool_call_buffer[0].id, message_index),
|
262
|
-
date=self.tool_call_buffer[0].date,
|
263
|
-
name=self.tool_call_buffer[0].name,
|
264
|
-
sender_id=self.tool_call_buffer[0].sender_id,
|
265
|
-
step_id=self.tool_call_buffer[0].step_id,
|
266
|
-
tool_call=ToolCallDelta(
|
267
|
-
name=self.tool_call_name,
|
268
|
-
tool_call_id=self.tool_call_id,
|
269
|
-
arguments=tool_call_args,
|
270
|
-
),
|
271
|
-
)
|
272
|
-
prev_message_type = tool_call_msg.message_type
|
273
|
-
yield tool_call_msg
|
274
|
-
self.tool_call_buffer = []
|
275
|
-
|
276
|
-
# Start detecting special case of "send_message"
|
277
|
-
if self.tool_call_name == DEFAULT_MESSAGE_TOOL and self.use_assistant_message:
|
278
|
-
previous_send_message = self.previous_parse.get(DEFAULT_MESSAGE_TOOL_KWARG, "")
|
279
|
-
current_send_message = current_parsed.get(DEFAULT_MESSAGE_TOOL_KWARG, "")
|
280
|
-
send_message_diff = current_send_message[len(previous_send_message) :]
|
281
|
-
|
282
|
-
# Only stream out if it's not an empty string
|
283
|
-
if send_message_diff:
|
284
|
-
if prev_message_type and prev_message_type != "assistant_message":
|
285
|
-
message_index += 1
|
286
|
-
assistant_msg = AssistantMessage(
|
287
|
-
id=self.letta_message_id,
|
288
|
-
content=[TextContent(text=send_message_diff)],
|
289
|
-
date=datetime.now(timezone.utc).isoformat(),
|
290
|
-
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
291
|
-
)
|
292
|
-
prev_message_type = assistant_msg.message_type
|
293
|
-
yield assistant_msg
|
294
|
-
else:
|
295
|
-
# Otherwise, it is a normal tool call - buffer or yield based on inner thoughts status
|
296
|
-
tool_call_msg = ToolCallMessage(
|
297
|
-
id=self.letta_message_id,
|
298
|
-
tool_call=ToolCallDelta(
|
299
|
-
name=self.tool_call_name, tool_call_id=self.tool_call_id, arguments=delta.partial_json
|
300
|
-
),
|
301
|
-
date=datetime.now(timezone.utc).isoformat(),
|
302
|
-
)
|
303
|
-
if self.inner_thoughts_complete:
|
304
|
-
if prev_message_type and prev_message_type != "tool_call_message":
|
305
|
-
message_index += 1
|
306
|
-
tool_call_msg.otid = Message.generate_otid_from_id(self.letta_message_id, message_index)
|
307
|
-
prev_message_type = tool_call_msg.message_type
|
308
|
-
yield tool_call_msg
|
309
|
-
else:
|
310
|
-
self.tool_call_buffer.append(tool_call_msg)
|
311
|
-
|
312
|
-
# Set previous parse
|
313
|
-
self.previous_parse = current_parsed
|
314
|
-
elif isinstance(delta, BetaThinkingDelta):
|
315
|
-
# Safety check
|
316
|
-
if not self.anthropic_mode == EventMode.THINKING:
|
317
|
-
raise RuntimeError(
|
318
|
-
f"Streaming integrity failed - received BetaThinkingBlock object while not in THINKING EventMode: {delta}"
|
319
|
-
)
|
320
|
-
|
321
|
-
if prev_message_type and prev_message_type != "reasoning_message":
|
322
|
-
message_index += 1
|
323
|
-
reasoning_message = ReasoningMessage(
|
324
|
-
id=self.letta_message_id,
|
325
|
-
source="reasoner_model",
|
326
|
-
reasoning=delta.thinking,
|
327
|
-
date=datetime.now(timezone.utc).isoformat(),
|
328
|
-
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
329
|
-
)
|
330
|
-
self.reasoning_messages.append(reasoning_message)
|
331
|
-
prev_message_type = reasoning_message.message_type
|
332
|
-
yield reasoning_message
|
333
|
-
elif isinstance(delta, BetaSignatureDelta):
|
334
|
-
# Safety check
|
335
|
-
if not self.anthropic_mode == EventMode.THINKING:
|
336
|
-
raise RuntimeError(
|
337
|
-
f"Streaming integrity failed - received BetaSignatureDelta object while not in THINKING EventMode: {delta}"
|
338
|
-
)
|
339
|
-
|
340
|
-
if prev_message_type and prev_message_type != "reasoning_message":
|
341
|
-
message_index += 1
|
342
|
-
reasoning_message = ReasoningMessage(
|
343
|
-
id=self.letta_message_id,
|
344
|
-
source="reasoner_model",
|
345
|
-
reasoning="",
|
346
|
-
date=datetime.now(timezone.utc).isoformat(),
|
347
|
-
signature=delta.signature,
|
348
|
-
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
349
|
-
)
|
350
|
-
self.reasoning_messages.append(reasoning_message)
|
351
|
-
prev_message_type = reasoning_message.message_type
|
352
|
-
yield reasoning_message
|
353
|
-
elif isinstance(event, BetaRawMessageStartEvent):
|
354
|
-
self.message_id = event.message.id
|
355
|
-
self.input_tokens += event.message.usage.input_tokens
|
356
|
-
self.output_tokens += event.message.usage.output_tokens
|
357
|
-
self.model = event.message.model
|
358
|
-
elif isinstance(event, BetaRawMessageDeltaEvent):
|
359
|
-
self.output_tokens += event.usage.output_tokens
|
360
|
-
elif isinstance(event, BetaRawMessageStopEvent):
|
361
|
-
# Don't do anything here! We don't want to stop the stream.
|
362
|
-
pass
|
363
|
-
elif isinstance(event, BetaRawContentBlockStopEvent):
|
364
|
-
# If we're exiting a tool use block and there are still buffered messages,
|
365
|
-
# we should flush them now
|
366
|
-
if self.anthropic_mode == EventMode.TOOL_USE and self.tool_call_buffer:
|
367
|
-
for buffered_msg in self.tool_call_buffer:
|
368
|
-
yield buffered_msg
|
369
|
-
self.tool_call_buffer = []
|
370
|
-
|
371
|
-
self.anthropic_mode = None
|
372
|
-
except asyncio.CancelledError as e:
|
373
|
-
import traceback
|
374
|
-
|
375
|
-
logger.error("Cancelled stream %s: %s", e, traceback.format_exc())
|
376
|
-
ttft_span.add_event(
|
377
|
-
name="stop_reason",
|
378
|
-
attributes={"stop_reason": StopReasonType.cancelled.value, "error": str(e), "stacktrace": traceback.format_exc()},
|
379
|
-
)
|
380
|
-
raise e
|
381
|
-
except Exception as e:
|
382
|
-
import traceback
|
383
|
-
|
384
|
-
logger.error("Error processing stream: %s", e, traceback.format_exc())
|
385
|
-
ttft_span.add_event(
|
386
|
-
name="stop_reason",
|
387
|
-
attributes={"stop_reason": StopReasonType.error.value, "error": str(e), "stacktrace": traceback.format_exc()},
|
388
|
-
)
|
389
|
-
yield LettaStopReason(stop_reason=StopReasonType.error)
|
390
|
-
raise e
|
391
|
-
finally:
|
392
|
-
logger.info("AnthropicStreamingInterface: Stream processing complete.")
|
393
|
-
|
394
129
|
def get_reasoning_content(self) -> list[TextContent | ReasoningContent | RedactedReasoningContent]:
|
395
130
|
def _process_group(
|
396
131
|
group: list[ReasoningMessage | HiddenReasoningMessage], group_type: str
|
@@ -445,3 +180,294 @@ class AnthropicStreamingInterface:
|
|
445
180
|
content.text = content.text[:cutoff]
|
446
181
|
|
447
182
|
return merged
|
183
|
+
|
184
|
+
async def process(
|
185
|
+
self,
|
186
|
+
stream: AsyncStream[BetaRawMessageStreamEvent],
|
187
|
+
ttft_span: Optional["Span"] = None,
|
188
|
+
) -> AsyncGenerator[LettaMessage | LettaStopReason, None]:
|
189
|
+
prev_message_type = None
|
190
|
+
message_index = 0
|
191
|
+
event = None
|
192
|
+
try:
|
193
|
+
async with stream:
|
194
|
+
async for event in stream:
|
195
|
+
try:
|
196
|
+
async for message in self._process_event(event, ttft_span, prev_message_type, message_index):
|
197
|
+
new_message_type = message.message_type
|
198
|
+
if new_message_type != prev_message_type:
|
199
|
+
if prev_message_type != None:
|
200
|
+
message_index += 1
|
201
|
+
prev_message_type = new_message_type
|
202
|
+
yield message
|
203
|
+
except asyncio.CancelledError as e:
|
204
|
+
import traceback
|
205
|
+
|
206
|
+
logger.info("Cancelled stream attempt but overriding %s: %s", e, traceback.format_exc())
|
207
|
+
async for message in self._process_event(event, ttft_span, prev_message_type, message_index):
|
208
|
+
new_message_type = message.message_type
|
209
|
+
if new_message_type != prev_message_type:
|
210
|
+
if prev_message_type != None:
|
211
|
+
message_index += 1
|
212
|
+
prev_message_type = new_message_type
|
213
|
+
yield message
|
214
|
+
|
215
|
+
# Don't raise the exception here
|
216
|
+
continue
|
217
|
+
|
218
|
+
except Exception as e:
|
219
|
+
import traceback
|
220
|
+
|
221
|
+
logger.error("Error processing stream: %s", e, traceback.format_exc())
|
222
|
+
ttft_span.add_event(
|
223
|
+
name="stop_reason",
|
224
|
+
attributes={"stop_reason": StopReasonType.error.value, "error": str(e), "stacktrace": traceback.format_exc()},
|
225
|
+
)
|
226
|
+
yield LettaStopReason(stop_reason=StopReasonType.error)
|
227
|
+
raise e
|
228
|
+
finally:
|
229
|
+
logger.info("AnthropicStreamingInterface: Stream processing complete.")
|
230
|
+
|
231
|
+
async def _process_event(
|
232
|
+
self,
|
233
|
+
event: BetaRawMessageStreamEvent,
|
234
|
+
ttft_span: Optional["Span"] = None,
|
235
|
+
prev_message_type: Optional[str] = None,
|
236
|
+
message_index: int = 0,
|
237
|
+
) -> AsyncGenerator[LettaMessage | LettaStopReason, None]:
|
238
|
+
"""Process a single event from the Anthropic stream and yield any resulting messages.
|
239
|
+
|
240
|
+
Args:
|
241
|
+
event: The event to process
|
242
|
+
|
243
|
+
Yields:
|
244
|
+
Messages generated from processing this event
|
245
|
+
"""
|
246
|
+
if isinstance(event, BetaRawContentBlockStartEvent):
|
247
|
+
content = event.content_block
|
248
|
+
|
249
|
+
if isinstance(content, BetaTextBlock):
|
250
|
+
self.anthropic_mode = EventMode.TEXT
|
251
|
+
# TODO: Can capture citations, etc.
|
252
|
+
elif isinstance(content, BetaToolUseBlock):
|
253
|
+
self.anthropic_mode = EventMode.TOOL_USE
|
254
|
+
self.tool_call_id = content.id
|
255
|
+
self.tool_call_name = content.name
|
256
|
+
self.inner_thoughts_complete = False
|
257
|
+
|
258
|
+
if not self.use_assistant_message:
|
259
|
+
# Buffer the initial tool call message instead of yielding immediately
|
260
|
+
tool_call_msg = ToolCallMessage(
|
261
|
+
id=self.letta_message_id,
|
262
|
+
tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id),
|
263
|
+
date=datetime.now(timezone.utc).isoformat(),
|
264
|
+
)
|
265
|
+
self.tool_call_buffer.append(tool_call_msg)
|
266
|
+
elif isinstance(content, BetaThinkingBlock):
|
267
|
+
self.anthropic_mode = EventMode.THINKING
|
268
|
+
# TODO: Can capture signature, etc.
|
269
|
+
elif isinstance(content, BetaRedactedThinkingBlock):
|
270
|
+
self.anthropic_mode = EventMode.REDACTED_THINKING
|
271
|
+
if prev_message_type and prev_message_type != "hidden_reasoning_message":
|
272
|
+
message_index += 1
|
273
|
+
hidden_reasoning_message = HiddenReasoningMessage(
|
274
|
+
id=self.letta_message_id,
|
275
|
+
state="redacted",
|
276
|
+
hidden_reasoning=content.data,
|
277
|
+
date=datetime.now(timezone.utc).isoformat(),
|
278
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
279
|
+
)
|
280
|
+
self.reasoning_messages.append(hidden_reasoning_message)
|
281
|
+
prev_message_type = hidden_reasoning_message.message_type
|
282
|
+
yield hidden_reasoning_message
|
283
|
+
|
284
|
+
elif isinstance(event, BetaRawContentBlockDeltaEvent):
|
285
|
+
delta = event.delta
|
286
|
+
|
287
|
+
if isinstance(delta, BetaTextDelta):
|
288
|
+
# Safety check
|
289
|
+
if not self.anthropic_mode == EventMode.TEXT:
|
290
|
+
raise RuntimeError(f"Streaming integrity failed - received BetaTextDelta object while not in TEXT EventMode: {delta}")
|
291
|
+
|
292
|
+
# Combine buffer with current text to handle tags split across chunks
|
293
|
+
combined_text = self.partial_tag_buffer + delta.text
|
294
|
+
|
295
|
+
# Remove all occurrences of </thinking> tag
|
296
|
+
cleaned_text = combined_text.replace("</thinking>", "")
|
297
|
+
|
298
|
+
# Extract just the new content (without the buffer part)
|
299
|
+
if len(self.partial_tag_buffer) <= len(cleaned_text):
|
300
|
+
delta.text = cleaned_text[len(self.partial_tag_buffer) :]
|
301
|
+
else:
|
302
|
+
# Edge case: the tag was removed and now the text is shorter than the buffer
|
303
|
+
delta.text = ""
|
304
|
+
|
305
|
+
# Store the last 10 characters (or all if less than 10) for the next chunk
|
306
|
+
# This is enough to catch "</thinking" which is 10 characters
|
307
|
+
self.partial_tag_buffer = combined_text[-10:] if len(combined_text) > 10 else combined_text
|
308
|
+
self.accumulated_inner_thoughts.append(delta.text)
|
309
|
+
|
310
|
+
if prev_message_type and prev_message_type != "reasoning_message":
|
311
|
+
message_index += 1
|
312
|
+
reasoning_message = ReasoningMessage(
|
313
|
+
id=self.letta_message_id,
|
314
|
+
reasoning=self.accumulated_inner_thoughts[-1],
|
315
|
+
date=datetime.now(timezone.utc).isoformat(),
|
316
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
317
|
+
)
|
318
|
+
self.reasoning_messages.append(reasoning_message)
|
319
|
+
prev_message_type = reasoning_message.message_type
|
320
|
+
yield reasoning_message
|
321
|
+
|
322
|
+
elif isinstance(delta, BetaInputJSONDelta):
|
323
|
+
if not self.anthropic_mode == EventMode.TOOL_USE:
|
324
|
+
raise RuntimeError(
|
325
|
+
f"Streaming integrity failed - received BetaInputJSONDelta object while not in TOOL_USE EventMode: {delta}"
|
326
|
+
)
|
327
|
+
|
328
|
+
self.accumulated_tool_call_args += delta.partial_json
|
329
|
+
current_parsed = self.json_parser.parse(self.accumulated_tool_call_args)
|
330
|
+
|
331
|
+
# Start detecting a difference in inner thoughts
|
332
|
+
previous_inner_thoughts = self.previous_parse.get(INNER_THOUGHTS_KWARG, "")
|
333
|
+
current_inner_thoughts = current_parsed.get(INNER_THOUGHTS_KWARG, "")
|
334
|
+
inner_thoughts_diff = current_inner_thoughts[len(previous_inner_thoughts) :]
|
335
|
+
|
336
|
+
if inner_thoughts_diff:
|
337
|
+
if prev_message_type and prev_message_type != "reasoning_message":
|
338
|
+
message_index += 1
|
339
|
+
reasoning_message = ReasoningMessage(
|
340
|
+
id=self.letta_message_id,
|
341
|
+
reasoning=inner_thoughts_diff,
|
342
|
+
date=datetime.now(timezone.utc).isoformat(),
|
343
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
344
|
+
)
|
345
|
+
self.reasoning_messages.append(reasoning_message)
|
346
|
+
prev_message_type = reasoning_message.message_type
|
347
|
+
yield reasoning_message
|
348
|
+
|
349
|
+
# Check if inner thoughts are complete - if so, flush the buffer
|
350
|
+
if not self.inner_thoughts_complete and self._check_inner_thoughts_complete(self.accumulated_tool_call_args):
|
351
|
+
self.inner_thoughts_complete = True
|
352
|
+
# Flush all buffered tool call messages
|
353
|
+
if len(self.tool_call_buffer) > 0:
|
354
|
+
if prev_message_type and prev_message_type != "tool_call_message":
|
355
|
+
message_index += 1
|
356
|
+
|
357
|
+
# Strip out the inner thoughts from the buffered tool call arguments before streaming
|
358
|
+
tool_call_args = ""
|
359
|
+
for buffered_msg in self.tool_call_buffer:
|
360
|
+
tool_call_args += buffered_msg.tool_call.arguments if buffered_msg.tool_call.arguments else ""
|
361
|
+
tool_call_args = tool_call_args.replace(f'"{INNER_THOUGHTS_KWARG}": "{current_inner_thoughts}"', "")
|
362
|
+
|
363
|
+
tool_call_msg = ToolCallMessage(
|
364
|
+
id=self.tool_call_buffer[0].id,
|
365
|
+
otid=Message.generate_otid_from_id(self.tool_call_buffer[0].id, message_index),
|
366
|
+
date=self.tool_call_buffer[0].date,
|
367
|
+
name=self.tool_call_buffer[0].name,
|
368
|
+
sender_id=self.tool_call_buffer[0].sender_id,
|
369
|
+
step_id=self.tool_call_buffer[0].step_id,
|
370
|
+
tool_call=ToolCallDelta(
|
371
|
+
name=self.tool_call_name,
|
372
|
+
tool_call_id=self.tool_call_id,
|
373
|
+
arguments=tool_call_args,
|
374
|
+
),
|
375
|
+
)
|
376
|
+
prev_message_type = tool_call_msg.message_type
|
377
|
+
yield tool_call_msg
|
378
|
+
self.tool_call_buffer = []
|
379
|
+
|
380
|
+
# Start detecting special case of "send_message"
|
381
|
+
if self.tool_call_name == DEFAULT_MESSAGE_TOOL and self.use_assistant_message:
|
382
|
+
previous_send_message = self.previous_parse.get(DEFAULT_MESSAGE_TOOL_KWARG, "")
|
383
|
+
current_send_message = current_parsed.get(DEFAULT_MESSAGE_TOOL_KWARG, "")
|
384
|
+
send_message_diff = current_send_message[len(previous_send_message) :]
|
385
|
+
|
386
|
+
# Only stream out if it's not an empty string
|
387
|
+
if send_message_diff:
|
388
|
+
if prev_message_type and prev_message_type != "assistant_message":
|
389
|
+
message_index += 1
|
390
|
+
assistant_msg = AssistantMessage(
|
391
|
+
id=self.letta_message_id,
|
392
|
+
content=[TextContent(text=send_message_diff)],
|
393
|
+
date=datetime.now(timezone.utc).isoformat(),
|
394
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
395
|
+
)
|
396
|
+
prev_message_type = assistant_msg.message_type
|
397
|
+
yield assistant_msg
|
398
|
+
else:
|
399
|
+
# Otherwise, it is a normal tool call - buffer or yield based on inner thoughts status
|
400
|
+
tool_call_msg = ToolCallMessage(
|
401
|
+
id=self.letta_message_id,
|
402
|
+
tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id, arguments=delta.partial_json),
|
403
|
+
date=datetime.now(timezone.utc).isoformat(),
|
404
|
+
)
|
405
|
+
if self.inner_thoughts_complete:
|
406
|
+
if prev_message_type and prev_message_type != "tool_call_message":
|
407
|
+
message_index += 1
|
408
|
+
tool_call_msg.otid = Message.generate_otid_from_id(self.letta_message_id, message_index)
|
409
|
+
prev_message_type = tool_call_msg.message_type
|
410
|
+
yield tool_call_msg
|
411
|
+
else:
|
412
|
+
self.tool_call_buffer.append(tool_call_msg)
|
413
|
+
|
414
|
+
# Set previous parse
|
415
|
+
self.previous_parse = current_parsed
|
416
|
+
elif isinstance(delta, BetaThinkingDelta):
|
417
|
+
# Safety check
|
418
|
+
if not self.anthropic_mode == EventMode.THINKING:
|
419
|
+
raise RuntimeError(
|
420
|
+
f"Streaming integrity failed - received BetaThinkingBlock object while not in THINKING EventMode: {delta}"
|
421
|
+
)
|
422
|
+
|
423
|
+
if prev_message_type and prev_message_type != "reasoning_message":
|
424
|
+
message_index += 1
|
425
|
+
reasoning_message = ReasoningMessage(
|
426
|
+
id=self.letta_message_id,
|
427
|
+
source="reasoner_model",
|
428
|
+
reasoning=delta.thinking,
|
429
|
+
date=datetime.now(timezone.utc).isoformat(),
|
430
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
431
|
+
)
|
432
|
+
self.reasoning_messages.append(reasoning_message)
|
433
|
+
prev_message_type = reasoning_message.message_type
|
434
|
+
yield reasoning_message
|
435
|
+
elif isinstance(delta, BetaSignatureDelta):
|
436
|
+
# Safety check
|
437
|
+
if not self.anthropic_mode == EventMode.THINKING:
|
438
|
+
raise RuntimeError(
|
439
|
+
f"Streaming integrity failed - received BetaSignatureDelta object while not in THINKING EventMode: {delta}"
|
440
|
+
)
|
441
|
+
|
442
|
+
if prev_message_type and prev_message_type != "reasoning_message":
|
443
|
+
message_index += 1
|
444
|
+
reasoning_message = ReasoningMessage(
|
445
|
+
id=self.letta_message_id,
|
446
|
+
source="reasoner_model",
|
447
|
+
reasoning="",
|
448
|
+
date=datetime.now(timezone.utc).isoformat(),
|
449
|
+
signature=delta.signature,
|
450
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
451
|
+
)
|
452
|
+
self.reasoning_messages.append(reasoning_message)
|
453
|
+
prev_message_type = reasoning_message.message_type
|
454
|
+
yield reasoning_message
|
455
|
+
elif isinstance(event, BetaRawMessageStartEvent):
|
456
|
+
self.message_id = event.message.id
|
457
|
+
self.input_tokens += event.message.usage.input_tokens
|
458
|
+
self.output_tokens += event.message.usage.output_tokens
|
459
|
+
self.model = event.message.model
|
460
|
+
elif isinstance(event, BetaRawMessageDeltaEvent):
|
461
|
+
self.output_tokens += event.usage.output_tokens
|
462
|
+
elif isinstance(event, BetaRawMessageStopEvent):
|
463
|
+
# Don't do anything here! We don't want to stop the stream.
|
464
|
+
pass
|
465
|
+
elif isinstance(event, BetaRawContentBlockStopEvent):
|
466
|
+
# If we're exiting a tool use block and there are still buffered messages,
|
467
|
+
# we should flush them now
|
468
|
+
if self.anthropic_mode == EventMode.TOOL_USE and self.tool_call_buffer:
|
469
|
+
for buffered_msg in self.tool_call_buffer:
|
470
|
+
yield buffered_msg
|
471
|
+
self.tool_call_buffer = []
|
472
|
+
|
473
|
+
self.anthropic_mode = None
|