letta-nightly 0.11.7.dev20250909104137__py3-none-any.whl → 0.11.7.dev20250911104039__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/adapters/letta_llm_adapter.py +81 -0
- letta/adapters/letta_llm_request_adapter.py +113 -0
- letta/adapters/letta_llm_stream_adapter.py +171 -0
- letta/agents/agent_loop.py +23 -0
- letta/agents/base_agent.py +4 -1
- letta/agents/base_agent_v2.py +68 -0
- letta/agents/helpers.py +3 -5
- letta/agents/letta_agent.py +23 -12
- letta/agents/letta_agent_v2.py +1221 -0
- letta/agents/voice_agent.py +2 -1
- letta/constants.py +1 -1
- letta/errors.py +12 -0
- letta/functions/function_sets/base.py +53 -12
- letta/functions/helpers.py +3 -2
- letta/functions/schema_generator.py +1 -1
- letta/groups/sleeptime_multi_agent_v2.py +4 -2
- letta/groups/sleeptime_multi_agent_v3.py +233 -0
- letta/helpers/tool_rule_solver.py +4 -0
- letta/helpers/tpuf_client.py +607 -34
- letta/interfaces/anthropic_streaming_interface.py +74 -30
- letta/interfaces/openai_streaming_interface.py +80 -37
- letta/llm_api/google_vertex_client.py +1 -1
- letta/llm_api/openai_client.py +45 -4
- letta/orm/agent.py +4 -1
- letta/orm/block.py +2 -0
- letta/orm/blocks_agents.py +1 -0
- letta/orm/group.py +1 -0
- letta/orm/source.py +8 -1
- letta/orm/sources_agents.py +2 -1
- letta/orm/step_metrics.py +10 -0
- letta/orm/tools_agents.py +5 -2
- letta/schemas/block.py +4 -0
- letta/schemas/enums.py +1 -0
- letta/schemas/group.py +8 -0
- letta/schemas/letta_message.py +1 -1
- letta/schemas/letta_request.py +2 -2
- letta/schemas/mcp.py +9 -1
- letta/schemas/message.py +42 -2
- letta/schemas/providers/ollama.py +1 -1
- letta/schemas/providers.py +1 -2
- letta/schemas/source.py +6 -0
- letta/schemas/step_metrics.py +2 -0
- letta/server/rest_api/interface.py +34 -2
- letta/server/rest_api/json_parser.py +2 -0
- letta/server/rest_api/redis_stream_manager.py +2 -1
- letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +4 -2
- letta/server/rest_api/routers/v1/__init__.py +2 -0
- letta/server/rest_api/routers/v1/agents.py +132 -170
- letta/server/rest_api/routers/v1/blocks.py +6 -0
- letta/server/rest_api/routers/v1/folders.py +25 -7
- letta/server/rest_api/routers/v1/groups.py +6 -0
- letta/server/rest_api/routers/v1/internal_templates.py +218 -12
- letta/server/rest_api/routers/v1/messages.py +14 -19
- letta/server/rest_api/routers/v1/runs.py +43 -28
- letta/server/rest_api/routers/v1/sources.py +25 -7
- letta/server/rest_api/routers/v1/tools.py +42 -0
- letta/server/rest_api/streaming_response.py +11 -2
- letta/server/server.py +9 -6
- letta/services/agent_manager.py +39 -59
- letta/services/agent_serialization_manager.py +26 -11
- letta/services/archive_manager.py +60 -9
- letta/services/block_manager.py +5 -0
- letta/services/file_processor/embedder/base_embedder.py +5 -0
- letta/services/file_processor/embedder/openai_embedder.py +4 -0
- letta/services/file_processor/embedder/pinecone_embedder.py +5 -1
- letta/services/file_processor/embedder/turbopuffer_embedder.py +71 -0
- letta/services/file_processor/file_processor.py +9 -7
- letta/services/group_manager.py +74 -11
- letta/services/mcp_manager.py +134 -28
- letta/services/message_manager.py +229 -125
- letta/services/passage_manager.py +2 -1
- letta/services/source_manager.py +23 -1
- letta/services/summarizer/summarizer.py +4 -1
- letta/services/tool_executor/core_tool_executor.py +2 -120
- letta/services/tool_executor/files_tool_executor.py +133 -8
- letta/services/tool_executor/multi_agent_tool_executor.py +17 -14
- letta/services/tool_sandbox/local_sandbox.py +2 -2
- letta/services/tool_sandbox/modal_version_manager.py +2 -1
- letta/settings.py +6 -0
- letta/streaming_utils.py +29 -4
- letta/utils.py +106 -4
- {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/METADATA +2 -2
- {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/RECORD +86 -78
- {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/WHEEL +0 -0
- {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/entry_points.txt +0 -0
- {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/licenses/LICENSE +0 -0
@@ -28,6 +28,7 @@ from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
|
|
28
28
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG
|
29
29
|
from letta.log import get_logger
|
30
30
|
from letta.schemas.letta_message import (
|
31
|
+
ApprovalRequestMessage,
|
31
32
|
AssistantMessage,
|
32
33
|
HiddenReasoningMessage,
|
33
34
|
LettaMessage,
|
@@ -59,7 +60,12 @@ class AnthropicStreamingInterface:
|
|
59
60
|
and detection of tool call events.
|
60
61
|
"""
|
61
62
|
|
62
|
-
def __init__(
|
63
|
+
def __init__(
|
64
|
+
self,
|
65
|
+
use_assistant_message: bool = False,
|
66
|
+
put_inner_thoughts_in_kwarg: bool = False,
|
67
|
+
requires_approval_tools: list = [],
|
68
|
+
):
|
63
69
|
self.json_parser: JSONParser = PydanticJSONParser()
|
64
70
|
self.use_assistant_message = use_assistant_message
|
65
71
|
|
@@ -90,6 +96,8 @@ class AnthropicStreamingInterface:
|
|
90
96
|
# Buffer to handle partial XML tags across chunks
|
91
97
|
self.partial_tag_buffer = ""
|
92
98
|
|
99
|
+
self.requires_approval_tools = requires_approval_tools
|
100
|
+
|
93
101
|
def get_tool_call_object(self) -> ToolCall:
|
94
102
|
"""Useful for agent loop"""
|
95
103
|
if not self.tool_call_name:
|
@@ -98,15 +106,19 @@ class AnthropicStreamingInterface:
|
|
98
106
|
try:
|
99
107
|
tool_input = json.loads(self.accumulated_tool_call_args)
|
100
108
|
except json.JSONDecodeError as e:
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
109
|
+
# Attempt to use OptimisticJSONParser to handle incomplete/malformed JSON
|
110
|
+
try:
|
111
|
+
tool_input = self.json_parser.parse(self.accumulated_tool_call_args)
|
112
|
+
except:
|
113
|
+
logger.warning(
|
114
|
+
f"Failed to decode tool call arguments for tool_call_id={self.tool_call_id}, "
|
115
|
+
f"name={self.tool_call_name}. Raw input: {self.accumulated_tool_call_args!r}. Error: {e}"
|
116
|
+
)
|
117
|
+
raise e
|
106
118
|
if "id" in tool_input and tool_input["id"].startswith("toolu_") and "function" in tool_input:
|
107
119
|
arguments = str(json.dumps(tool_input["function"]["arguments"], indent=2))
|
108
120
|
else:
|
109
|
-
arguments =
|
121
|
+
arguments = str(json.dumps(tool_input, indent=2))
|
110
122
|
return ToolCall(id=self.tool_call_id, function=FunctionCall(arguments=arguments, name=self.tool_call_name))
|
111
123
|
|
112
124
|
def _check_inner_thoughts_complete(self, combined_args: str) -> bool:
|
@@ -218,11 +230,12 @@ class AnthropicStreamingInterface:
|
|
218
230
|
except Exception as e:
|
219
231
|
import traceback
|
220
232
|
|
221
|
-
logger.error("Error processing stream: %s", e, traceback.format_exc())
|
222
|
-
ttft_span
|
223
|
-
|
224
|
-
|
225
|
-
|
233
|
+
logger.error("Error processing stream: %s\n%s", e, traceback.format_exc())
|
234
|
+
if ttft_span:
|
235
|
+
ttft_span.add_event(
|
236
|
+
name="stop_reason",
|
237
|
+
attributes={"stop_reason": StopReasonType.error.value, "error": str(e), "stacktrace": traceback.format_exc()},
|
238
|
+
)
|
226
239
|
yield LettaStopReason(stop_reason=StopReasonType.error)
|
227
240
|
raise e
|
228
241
|
finally:
|
@@ -256,13 +269,15 @@ class AnthropicStreamingInterface:
|
|
256
269
|
self.inner_thoughts_complete = False
|
257
270
|
|
258
271
|
if not self.use_assistant_message:
|
259
|
-
#
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
272
|
+
# Only buffer the initial tool call message if it doesn't require approval
|
273
|
+
# For approval-required tools, we'll create the ApprovalRequestMessage later
|
274
|
+
if self.tool_call_name not in self.requires_approval_tools:
|
275
|
+
tool_call_msg = ToolCallMessage(
|
276
|
+
id=self.letta_message_id,
|
277
|
+
tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id),
|
278
|
+
date=datetime.now(timezone.utc).isoformat(),
|
279
|
+
)
|
280
|
+
self.tool_call_buffer.append(tool_call_msg)
|
266
281
|
elif isinstance(content, BetaThinkingBlock):
|
267
282
|
self.anthropic_mode = EventMode.THINKING
|
268
283
|
# TODO: Can capture signature, etc.
|
@@ -353,11 +368,36 @@ class AnthropicStreamingInterface:
|
|
353
368
|
prev_message_type = reasoning_message.message_type
|
354
369
|
yield reasoning_message
|
355
370
|
|
356
|
-
# Check if inner thoughts are complete - if so, flush the buffer
|
371
|
+
# Check if inner thoughts are complete - if so, flush the buffer or create approval message
|
357
372
|
if not self.inner_thoughts_complete and self._check_inner_thoughts_complete(self.accumulated_tool_call_args):
|
358
373
|
self.inner_thoughts_complete = True
|
359
|
-
|
360
|
-
if
|
374
|
+
|
375
|
+
# Check if this tool requires approval
|
376
|
+
if self.tool_call_name in self.requires_approval_tools:
|
377
|
+
# Create ApprovalRequestMessage directly (buffer should be empty)
|
378
|
+
if prev_message_type and prev_message_type != "approval_request_message":
|
379
|
+
message_index += 1
|
380
|
+
|
381
|
+
# Strip out inner thoughts from arguments
|
382
|
+
tool_call_args = self.accumulated_tool_call_args
|
383
|
+
if current_inner_thoughts:
|
384
|
+
tool_call_args = tool_call_args.replace(f'"{INNER_THOUGHTS_KWARG}": "{current_inner_thoughts}"', "")
|
385
|
+
|
386
|
+
approval_msg = ApprovalRequestMessage(
|
387
|
+
id=self.letta_message_id,
|
388
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
389
|
+
date=datetime.now(timezone.utc).isoformat(),
|
390
|
+
name=self.tool_call_name,
|
391
|
+
tool_call=ToolCallDelta(
|
392
|
+
name=self.tool_call_name,
|
393
|
+
tool_call_id=self.tool_call_id,
|
394
|
+
arguments=tool_call_args,
|
395
|
+
),
|
396
|
+
)
|
397
|
+
prev_message_type = approval_msg.message_type
|
398
|
+
yield approval_msg
|
399
|
+
elif len(self.tool_call_buffer) > 0:
|
400
|
+
# Flush buffered tool call messages for non-approval tools
|
361
401
|
if prev_message_type and prev_message_type != "tool_call_message":
|
362
402
|
message_index += 1
|
363
403
|
|
@@ -371,9 +411,6 @@ class AnthropicStreamingInterface:
|
|
371
411
|
id=self.tool_call_buffer[0].id,
|
372
412
|
otid=Message.generate_otid_from_id(self.tool_call_buffer[0].id, message_index),
|
373
413
|
date=self.tool_call_buffer[0].date,
|
374
|
-
name=self.tool_call_buffer[0].name,
|
375
|
-
sender_id=self.tool_call_buffer[0].sender_id,
|
376
|
-
step_id=self.tool_call_buffer[0].step_id,
|
377
414
|
tool_call=ToolCallDelta(
|
378
415
|
name=self.tool_call_name,
|
379
416
|
tool_call_id=self.tool_call_id,
|
@@ -404,11 +441,18 @@ class AnthropicStreamingInterface:
|
|
404
441
|
yield assistant_msg
|
405
442
|
else:
|
406
443
|
# Otherwise, it is a normal tool call - buffer or yield based on inner thoughts status
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
444
|
+
if self.tool_call_name in self.requires_approval_tools:
|
445
|
+
tool_call_msg = ApprovalRequestMessage(
|
446
|
+
id=self.letta_message_id,
|
447
|
+
tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id, arguments=delta.partial_json),
|
448
|
+
date=datetime.now(timezone.utc).isoformat(),
|
449
|
+
)
|
450
|
+
else:
|
451
|
+
tool_call_msg = ToolCallMessage(
|
452
|
+
id=self.letta_message_id,
|
453
|
+
tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id, arguments=delta.partial_json),
|
454
|
+
date=datetime.now(timezone.utc).isoformat(),
|
455
|
+
)
|
412
456
|
if self.inner_thoughts_complete:
|
413
457
|
if prev_message_type and prev_message_type != "tool_call_message":
|
414
458
|
message_index += 1
|
@@ -11,6 +11,7 @@ from letta.llm_api.openai_client import is_openai_reasoning_model
|
|
11
11
|
from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
|
12
12
|
from letta.log import get_logger
|
13
13
|
from letta.schemas.letta_message import (
|
14
|
+
ApprovalRequestMessage,
|
14
15
|
AssistantMessage,
|
15
16
|
HiddenReasoningMessage,
|
16
17
|
LettaMessage,
|
@@ -43,6 +44,7 @@ class OpenAIStreamingInterface:
|
|
43
44
|
messages: Optional[list] = None,
|
44
45
|
tools: Optional[list] = None,
|
45
46
|
put_inner_thoughts_in_kwarg: bool = True,
|
47
|
+
requires_approval_tools: list = [],
|
46
48
|
):
|
47
49
|
self.use_assistant_message = use_assistant_message
|
48
50
|
self.assistant_message_tool_name = DEFAULT_MESSAGE_TOOL
|
@@ -86,6 +88,8 @@ class OpenAIStreamingInterface:
|
|
86
88
|
self.reasoning_messages = []
|
87
89
|
self.emitted_hidden_reasoning = False # Track if we've emitted hidden reasoning message
|
88
90
|
|
91
|
+
self.requires_approval_tools = requires_approval_tools
|
92
|
+
|
89
93
|
def get_reasoning_content(self) -> list[TextContent | OmittedReasoningContent]:
|
90
94
|
content = "".join(self.reasoning_messages).strip()
|
91
95
|
|
@@ -162,11 +166,12 @@ class OpenAIStreamingInterface:
|
|
162
166
|
except Exception as e:
|
163
167
|
import traceback
|
164
168
|
|
165
|
-
logger.error("Error processing stream: %s", e, traceback.format_exc())
|
166
|
-
ttft_span
|
167
|
-
|
168
|
-
|
169
|
-
|
169
|
+
logger.error("Error processing stream: %s\n%s", e, traceback.format_exc())
|
170
|
+
if ttft_span:
|
171
|
+
ttft_span.add_event(
|
172
|
+
name="stop_reason",
|
173
|
+
attributes={"stop_reason": StopReasonType.error.value, "error": str(e), "stacktrace": traceback.format_exc()},
|
174
|
+
)
|
170
175
|
yield LettaStopReason(stop_reason=StopReasonType.error)
|
171
176
|
raise e
|
172
177
|
finally:
|
@@ -274,16 +279,28 @@ class OpenAIStreamingInterface:
|
|
274
279
|
if prev_message_type and prev_message_type != "tool_call_message":
|
275
280
|
message_index += 1
|
276
281
|
self.tool_call_name = str(self.function_name_buffer)
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
282
|
+
if self.tool_call_name in self.requires_approval_tools:
|
283
|
+
tool_call_msg = ApprovalRequestMessage(
|
284
|
+
id=self.letta_message_id,
|
285
|
+
date=datetime.now(timezone.utc),
|
286
|
+
tool_call=ToolCallDelta(
|
287
|
+
name=self.function_name_buffer,
|
288
|
+
arguments=None,
|
289
|
+
tool_call_id=self.function_id_buffer,
|
290
|
+
),
|
291
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
292
|
+
)
|
293
|
+
else:
|
294
|
+
tool_call_msg = ToolCallMessage(
|
295
|
+
id=self.letta_message_id,
|
296
|
+
date=datetime.now(timezone.utc),
|
297
|
+
tool_call=ToolCallDelta(
|
298
|
+
name=self.function_name_buffer,
|
299
|
+
arguments=None,
|
300
|
+
tool_call_id=self.function_id_buffer,
|
301
|
+
),
|
302
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
303
|
+
)
|
287
304
|
prev_message_type = tool_call_msg.message_type
|
288
305
|
yield tool_call_msg
|
289
306
|
|
@@ -404,17 +421,30 @@ class OpenAIStreamingInterface:
|
|
404
421
|
combined_chunk = self.function_args_buffer + updates_main_json
|
405
422
|
if prev_message_type and prev_message_type != "tool_call_message":
|
406
423
|
message_index += 1
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
424
|
+
if self.function_name_buffer in self.requires_approval_tools:
|
425
|
+
tool_call_msg = ApprovalRequestMessage(
|
426
|
+
id=self.letta_message_id,
|
427
|
+
date=datetime.now(timezone.utc),
|
428
|
+
tool_call=ToolCallDelta(
|
429
|
+
name=self.function_name_buffer,
|
430
|
+
arguments=combined_chunk,
|
431
|
+
tool_call_id=self.function_id_buffer,
|
432
|
+
),
|
433
|
+
# name=name,
|
434
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
435
|
+
)
|
436
|
+
else:
|
437
|
+
tool_call_msg = ToolCallMessage(
|
438
|
+
id=self.letta_message_id,
|
439
|
+
date=datetime.now(timezone.utc),
|
440
|
+
tool_call=ToolCallDelta(
|
441
|
+
name=self.function_name_buffer,
|
442
|
+
arguments=combined_chunk,
|
443
|
+
tool_call_id=self.function_id_buffer,
|
444
|
+
),
|
445
|
+
# name=name,
|
446
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
447
|
+
)
|
418
448
|
prev_message_type = tool_call_msg.message_type
|
419
449
|
yield tool_call_msg
|
420
450
|
# clear buffer
|
@@ -424,17 +454,30 @@ class OpenAIStreamingInterface:
|
|
424
454
|
# If there's no buffer to clear, just output a new chunk with new data
|
425
455
|
if prev_message_type and prev_message_type != "tool_call_message":
|
426
456
|
message_index += 1
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
457
|
+
if self.function_name_buffer in self.requires_approval_tools:
|
458
|
+
tool_call_msg = ApprovalRequestMessage(
|
459
|
+
id=self.letta_message_id,
|
460
|
+
date=datetime.now(timezone.utc),
|
461
|
+
tool_call=ToolCallDelta(
|
462
|
+
name=None,
|
463
|
+
arguments=updates_main_json,
|
464
|
+
tool_call_id=self.function_id_buffer,
|
465
|
+
),
|
466
|
+
# name=name,
|
467
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
468
|
+
)
|
469
|
+
else:
|
470
|
+
tool_call_msg = ToolCallMessage(
|
471
|
+
id=self.letta_message_id,
|
472
|
+
date=datetime.now(timezone.utc),
|
473
|
+
tool_call=ToolCallDelta(
|
474
|
+
name=None,
|
475
|
+
arguments=updates_main_json,
|
476
|
+
tool_call_id=self.function_id_buffer,
|
477
|
+
),
|
478
|
+
# name=name,
|
479
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
480
|
+
)
|
438
481
|
prev_message_type = tool_call_msg.message_type
|
439
482
|
yield tool_call_msg
|
440
483
|
self.function_id_buffer = None
|
letta/llm_api/openai_client.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
import asyncio
|
1
2
|
import os
|
2
3
|
from typing import List, Optional
|
3
4
|
|
@@ -319,13 +320,53 @@ class OpenAIClient(LLMClientBase):
|
|
319
320
|
|
320
321
|
@trace_method
|
321
322
|
async def request_embeddings(self, inputs: List[str], embedding_config: EmbeddingConfig) -> List[List[float]]:
|
322
|
-
"""Request embeddings given texts and embedding config"""
|
323
|
+
"""Request embeddings given texts and embedding config with chunking and retry logic"""
|
324
|
+
if not inputs:
|
325
|
+
return []
|
326
|
+
|
323
327
|
kwargs = self._prepare_client_kwargs_embedding(embedding_config)
|
324
328
|
client = AsyncOpenAI(**kwargs)
|
325
|
-
response = await client.embeddings.create(model=embedding_config.embedding_model, input=inputs)
|
326
329
|
|
327
|
-
#
|
328
|
-
|
330
|
+
# track results by original index to maintain order
|
331
|
+
results = [None] * len(inputs)
|
332
|
+
|
333
|
+
# queue of (start_idx, chunk_inputs) to process
|
334
|
+
chunks_to_process = [(i, inputs[i : i + 2048]) for i in range(0, len(inputs), 2048)]
|
335
|
+
|
336
|
+
min_chunk_size = 256
|
337
|
+
|
338
|
+
while chunks_to_process:
|
339
|
+
tasks = []
|
340
|
+
task_metadata = []
|
341
|
+
|
342
|
+
for start_idx, chunk_inputs in chunks_to_process:
|
343
|
+
task = client.embeddings.create(model=embedding_config.embedding_model, input=chunk_inputs)
|
344
|
+
tasks.append(task)
|
345
|
+
task_metadata.append((start_idx, chunk_inputs))
|
346
|
+
|
347
|
+
task_results = await asyncio.gather(*tasks, return_exceptions=True)
|
348
|
+
|
349
|
+
failed_chunks = []
|
350
|
+
for (start_idx, chunk_inputs), result in zip(task_metadata, task_results):
|
351
|
+
if isinstance(result, Exception):
|
352
|
+
# check if we can retry with smaller chunks
|
353
|
+
if len(chunk_inputs) > min_chunk_size:
|
354
|
+
# split chunk in half and queue for retry
|
355
|
+
mid = len(chunk_inputs) // 2
|
356
|
+
failed_chunks.append((start_idx, chunk_inputs[:mid]))
|
357
|
+
failed_chunks.append((start_idx + mid, chunk_inputs[mid:]))
|
358
|
+
else:
|
359
|
+
# can't split further, re-raise the error
|
360
|
+
logger.error(f"Failed to get embeddings for chunk starting at {start_idx} even with minimum size {min_chunk_size}")
|
361
|
+
raise result
|
362
|
+
else:
|
363
|
+
embeddings = [r.embedding for r in result.data]
|
364
|
+
for i, embedding in enumerate(embeddings):
|
365
|
+
results[start_idx + i] = embedding
|
366
|
+
|
367
|
+
chunks_to_process = failed_chunks
|
368
|
+
|
369
|
+
return results
|
329
370
|
|
330
371
|
@trace_method
|
331
372
|
def handle_llm_error(self, e: Exception) -> Exception:
|
letta/orm/agent.py
CHANGED
@@ -34,7 +34,10 @@ if TYPE_CHECKING:
|
|
34
34
|
class Agent(SqlalchemyBase, OrganizationMixin, ProjectMixin, TemplateEntityMixin, TemplateMixin, AsyncAttrs):
|
35
35
|
__tablename__ = "agents"
|
36
36
|
__pydantic_model__ = PydanticAgentState
|
37
|
-
__table_args__ = (
|
37
|
+
__table_args__ = (
|
38
|
+
Index("ix_agents_created_at", "created_at", "id"),
|
39
|
+
Index("ix_agents_organization_id", "organization_id"),
|
40
|
+
)
|
38
41
|
|
39
42
|
# agent generates its own id
|
40
43
|
# TODO: We want to migrate all the ORM models to do this, so we will need to move this to the SqlalchemyBase
|
letta/orm/block.py
CHANGED
@@ -24,6 +24,7 @@ class Block(OrganizationMixin, SqlalchemyBase, ProjectMixin, TemplateEntityMixin
|
|
24
24
|
__table_args__ = (
|
25
25
|
UniqueConstraint("id", "label", name="unique_block_id_label"),
|
26
26
|
Index("created_at_label_idx", "created_at", "label"),
|
27
|
+
Index("ix_block_label", "label"),
|
27
28
|
)
|
28
29
|
|
29
30
|
template_name: Mapped[Optional[str]] = mapped_column(
|
@@ -41,6 +42,7 @@ class Block(OrganizationMixin, SqlalchemyBase, ProjectMixin, TemplateEntityMixin
|
|
41
42
|
|
42
43
|
# permissions of the agent
|
43
44
|
read_only: Mapped[bool] = mapped_column(doc="whether the agent has read-only access to the block", default=False)
|
45
|
+
hidden: Mapped[Optional[bool]] = mapped_column(nullable=True, doc="If set to True, the block will be hidden.")
|
44
46
|
|
45
47
|
# history pointers / locking mechanisms
|
46
48
|
current_history_entry_id: Mapped[Optional[str]] = mapped_column(
|
letta/orm/blocks_agents.py
CHANGED
@@ -20,6 +20,7 @@ class BlocksAgents(Base):
|
|
20
20
|
UniqueConstraint("agent_id", "block_id", name="unique_agent_block"),
|
21
21
|
Index("ix_blocks_agents_block_label_agent_id", "block_label", "agent_id"),
|
22
22
|
Index("ix_blocks_block_label", "block_label"),
|
23
|
+
Index("ix_blocks_agents_block_id", "block_id"),
|
23
24
|
)
|
24
25
|
|
25
26
|
# unique agent + block label
|
letta/orm/group.py
CHANGED
@@ -24,6 +24,7 @@ class Group(SqlalchemyBase, OrganizationMixin, ProjectMixin, TemplateMixin):
|
|
24
24
|
min_message_buffer_length: Mapped[Optional[int]] = mapped_column(nullable=True, doc="")
|
25
25
|
turns_counter: Mapped[Optional[int]] = mapped_column(nullable=True, doc="")
|
26
26
|
last_processed_message_id: Mapped[Optional[str]] = mapped_column(nullable=True, doc="")
|
27
|
+
hidden: Mapped[Optional[bool]] = mapped_column(nullable=True, doc="If set to True, the group will be hidden.")
|
27
28
|
|
28
29
|
# relationships
|
29
30
|
organization: Mapped["Organization"] = relationship("Organization", back_populates="groups")
|
letta/orm/source.py
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
from typing import TYPE_CHECKING, Optional
|
2
2
|
|
3
|
-
from sqlalchemy import JSON, Index, UniqueConstraint
|
3
|
+
from sqlalchemy import JSON, Enum, Index, UniqueConstraint
|
4
4
|
from sqlalchemy.orm import Mapped, mapped_column
|
5
5
|
|
6
6
|
from letta.orm.custom_columns import EmbeddingConfigColumn
|
7
7
|
from letta.orm.mixins import OrganizationMixin
|
8
8
|
from letta.orm.sqlalchemy_base import SqlalchemyBase
|
9
9
|
from letta.schemas.embedding_config import EmbeddingConfig
|
10
|
+
from letta.schemas.enums import VectorDBProvider
|
10
11
|
from letta.schemas.source import Source as PydanticSource
|
11
12
|
|
12
13
|
if TYPE_CHECKING:
|
@@ -30,3 +31,9 @@ class Source(SqlalchemyBase, OrganizationMixin):
|
|
30
31
|
instructions: Mapped[str] = mapped_column(nullable=True, doc="instructions for how to use the source")
|
31
32
|
embedding_config: Mapped[EmbeddingConfig] = mapped_column(EmbeddingConfigColumn, doc="Configuration settings for embedding.")
|
32
33
|
metadata_: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True, doc="metadata for the source.")
|
34
|
+
vector_db_provider: Mapped[VectorDBProvider] = mapped_column(
|
35
|
+
Enum(VectorDBProvider),
|
36
|
+
nullable=False,
|
37
|
+
default=VectorDBProvider.NATIVE,
|
38
|
+
doc="The vector database provider used for this source's passages",
|
39
|
+
)
|
letta/orm/sources_agents.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
from sqlalchemy import ForeignKey, String
|
1
|
+
from sqlalchemy import ForeignKey, Index, String
|
2
2
|
from sqlalchemy.orm import Mapped, mapped_column
|
3
3
|
|
4
4
|
from letta.orm.base import Base
|
@@ -8,6 +8,7 @@ class SourcesAgents(Base):
|
|
8
8
|
"""Agents can have zero to many sources"""
|
9
9
|
|
10
10
|
__tablename__ = "sources_agents"
|
11
|
+
__table_args__ = (Index("ix_sources_agents_source_id", "source_id"),)
|
11
12
|
|
12
13
|
agent_id: Mapped[String] = mapped_column(String, ForeignKey("agents.id", ondelete="CASCADE"), primary_key=True)
|
13
14
|
source_id: Mapped[String] = mapped_column(String, ForeignKey("sources.id", ondelete="CASCADE"), primary_key=True)
|
letta/orm/step_metrics.py
CHANGED
@@ -43,6 +43,16 @@ class StepMetrics(SqlalchemyBase, ProjectMixin, AgentMixin):
|
|
43
43
|
nullable=True,
|
44
44
|
doc="The unique identifier of the job",
|
45
45
|
)
|
46
|
+
step_start_ns: Mapped[Optional[int]] = mapped_column(
|
47
|
+
BigInteger,
|
48
|
+
nullable=True,
|
49
|
+
doc="The timestamp of the start of the step in nanoseconds",
|
50
|
+
)
|
51
|
+
llm_request_start_ns: Mapped[Optional[int]] = mapped_column(
|
52
|
+
BigInteger,
|
53
|
+
nullable=True,
|
54
|
+
doc="The timestamp of the start of the LLM request in nanoseconds",
|
55
|
+
)
|
46
56
|
llm_request_ns: Mapped[Optional[int]] = mapped_column(
|
47
57
|
BigInteger,
|
48
58
|
nullable=True,
|
letta/orm/tools_agents.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
from sqlalchemy import ForeignKey, String, UniqueConstraint
|
1
|
+
from sqlalchemy import ForeignKey, Index, String, UniqueConstraint
|
2
2
|
from sqlalchemy.orm import Mapped, mapped_column
|
3
3
|
|
4
4
|
from letta.orm import Base
|
@@ -8,7 +8,10 @@ class ToolsAgents(Base):
|
|
8
8
|
"""Agents can have one or many tools associated with them."""
|
9
9
|
|
10
10
|
__tablename__ = "tools_agents"
|
11
|
-
__table_args__ = (
|
11
|
+
__table_args__ = (
|
12
|
+
UniqueConstraint("agent_id", "tool_id", name="unique_agent_tool"),
|
13
|
+
Index("ix_tools_agents_tool_id", "tool_id"),
|
14
|
+
)
|
12
15
|
|
13
16
|
# Each agent must have unique tool names
|
14
17
|
agent_id: Mapped[str] = mapped_column(String, ForeignKey("agents.id", ondelete="CASCADE"), primary_key=True)
|
letta/schemas/block.py
CHANGED
@@ -38,6 +38,10 @@ class BaseBlock(LettaBase, validate_assignment=True):
|
|
38
38
|
# metadata
|
39
39
|
description: Optional[str] = Field(None, description="Description of the block.")
|
40
40
|
metadata: Optional[dict] = Field({}, description="Metadata of the block.")
|
41
|
+
hidden: Optional[bool] = Field(
|
42
|
+
None,
|
43
|
+
description="If set to True, the block will be hidden.",
|
44
|
+
)
|
41
45
|
|
42
46
|
# def __len__(self):
|
43
47
|
# return len(self.value)
|
letta/schemas/enums.py
CHANGED
letta/schemas/group.py
CHANGED
@@ -49,6 +49,10 @@ class Group(GroupBase):
|
|
49
49
|
None,
|
50
50
|
description="The desired minimum length of messages in the context window of the convo agent. This is a best effort, and may be off-by-one due to user/assistant interleaving.",
|
51
51
|
)
|
52
|
+
hidden: Optional[bool] = Field(
|
53
|
+
None,
|
54
|
+
description="If set to True, the group will be hidden.",
|
55
|
+
)
|
52
56
|
|
53
57
|
@property
|
54
58
|
def manager_config(self) -> ManagerConfig:
|
@@ -170,6 +174,10 @@ class GroupCreate(BaseModel):
|
|
170
174
|
manager_config: ManagerConfigUnion = Field(RoundRobinManager(), description="")
|
171
175
|
project_id: Optional[str] = Field(None, description="The associated project id.")
|
172
176
|
shared_block_ids: List[str] = Field([], description="")
|
177
|
+
hidden: Optional[bool] = Field(
|
178
|
+
None,
|
179
|
+
description="If set to True, the group will be hidden.",
|
180
|
+
)
|
173
181
|
|
174
182
|
|
175
183
|
class InternalTemplateGroupCreate(GroupCreate):
|
letta/schemas/letta_message.py
CHANGED
@@ -265,7 +265,7 @@ class ApprovalRequestMessage(LettaMessage):
|
|
265
265
|
message_type: Literal[MessageType.approval_request_message] = Field(
|
266
266
|
default=MessageType.approval_request_message, description="The type of the message."
|
267
267
|
)
|
268
|
-
tool_call: ToolCall = Field(..., description="The tool call that has been requested by the llm to run")
|
268
|
+
tool_call: Union[ToolCall, ToolCallDelta] = Field(..., description="The tool call that has been requested by the llm to run")
|
269
269
|
|
270
270
|
|
271
271
|
class ApprovalResponseMessage(LettaMessage):
|
letta/schemas/letta_request.py
CHANGED
@@ -60,7 +60,7 @@ class LettaStreamingRequest(LettaRequest):
|
|
60
60
|
description="Flag to determine if individual tokens should be streamed, rather than streaming per step.",
|
61
61
|
)
|
62
62
|
include_pings: bool = Field(
|
63
|
-
default=
|
63
|
+
default=True,
|
64
64
|
description="Whether to include periodic keepalive ping messages in the stream to prevent connection timeouts.",
|
65
65
|
)
|
66
66
|
background: bool = Field(
|
@@ -94,7 +94,7 @@ class RetrieveStreamRequest(BaseModel):
|
|
94
94
|
0, description="Sequence id to use as a cursor for pagination. Response will start streaming after this chunk sequence id"
|
95
95
|
)
|
96
96
|
include_pings: Optional[bool] = Field(
|
97
|
-
default=
|
97
|
+
default=True,
|
98
98
|
description="Whether to include periodic keepalive ping messages in the stream to prevent connection timeouts.",
|
99
99
|
)
|
100
100
|
poll_interval: Optional[float] = Field(
|
letta/schemas/mcp.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
from datetime import datetime
|
2
|
-
from typing import Any, Dict, Optional, Union
|
2
|
+
from typing import Any, Dict, List, Optional, Union
|
3
3
|
|
4
4
|
from pydantic import Field
|
5
5
|
|
@@ -175,3 +175,11 @@ class MCPOAuthSessionUpdate(BaseMCPOAuth):
|
|
175
175
|
client_secret: Optional[str] = Field(None, description="OAuth client secret")
|
176
176
|
redirect_uri: Optional[str] = Field(None, description="OAuth redirect URI")
|
177
177
|
status: Optional[OAuthSessionStatus] = Field(None, description="Session status")
|
178
|
+
|
179
|
+
|
180
|
+
class MCPServerResyncResult(LettaBase):
|
181
|
+
"""Result of resyncing MCP server tools."""
|
182
|
+
|
183
|
+
deleted: List[str] = Field(default_factory=list, description="List of deleted tool names")
|
184
|
+
updated: List[str] = Field(default_factory=list, description="List of updated tool names")
|
185
|
+
added: List[str] = Field(default_factory=list, description="List of added tool names")
|